From 64bc1c39054500773e750e48371768a219827d9f Mon Sep 17 00:00:00 2001 From: Lloyd McCarthy Date: Tue, 16 Apr 2019 11:41:20 -0400 Subject: [PATCH 1/9] Removed an unused class --- .../cbioportal/web/GenePanelController.java | 8 +++---- .../GenePanelMultipleStudyFilter.java | 21 ------------------- ...volvedCancerStudyExtractorInterceptor.java | 21 +++++++++---------- .../web/GenePanelControllerTest.java | 5 +---- 4 files changed, 15 insertions(+), 40 deletions(-) delete mode 100644 web/src/main/java/org/cbioportal/web/parameter/GenePanelMultipleStudyFilter.java diff --git a/web/src/main/java/org/cbioportal/web/GenePanelController.java b/web/src/main/java/org/cbioportal/web/GenePanelController.java index 7d8bbce1fbb..30aaa33740c 100644 --- a/web/src/main/java/org/cbioportal/web/GenePanelController.java +++ b/web/src/main/java/org/cbioportal/web/GenePanelController.java @@ -11,7 +11,6 @@ import org.cbioportal.web.config.annotation.PublicApi; import org.cbioportal.web.parameter.Direction; import org.cbioportal.web.parameter.GenePanelDataFilter; -import org.cbioportal.web.parameter.GenePanelMultipleStudyFilter; import org.cbioportal.web.parameter.HeaderKeyConstants; import org.cbioportal.web.parameter.PagingConstants; import org.cbioportal.web.parameter.Projection; @@ -132,15 +131,16 @@ public ResponseEntity> fetchGenePanelDataInMultipleMolecular @ApiIgnore // prevent reference to this attribute in the swagger-ui interface @RequestAttribute(required = false, value = "involvedCancerStudies") Collection involvedCancerStudies, @ApiIgnore // prevent reference to this attribute in the swagger-ui interface. this attribute is needed for the @PreAuthorize tag above. - @Valid @RequestAttribute(required = false, value = "interceptedGenePanelMultipleStudyFilter") GenePanelMultipleStudyFilter interceptedGenePanelMultipleStudyFilter, + @RequestAttribute(required = false, value = "interceptedGenePanelSampleMolecularIdentifiers") List interceptedGenePanelSampleMolecularIdentifiers, @ApiParam(required = true, value = "List of Molecular Profile ID and Sample ID pairs") - @Valid @RequestBody(required = false) GenePanelMultipleStudyFilter genePanelMultipleStudyFilter) { + @Size(min = 1, max = PagingConstants.MAX_PAGE_SIZE) + @RequestBody(required = false) List sampleMolecularIdentifiers) { List molecularProfileIds = new ArrayList<>(); List sampleIds = new ArrayList<>(); for (SampleMolecularIdentifier sampleMolecularIdentifier : - interceptedGenePanelMultipleStudyFilter.getSampleMolecularIdentifiers()) { + interceptedGenePanelSampleMolecularIdentifiers) { molecularProfileIds.add(sampleMolecularIdentifier.getMolecularProfileId()); sampleIds.add(sampleMolecularIdentifier.getSampleId()); diff --git a/web/src/main/java/org/cbioportal/web/parameter/GenePanelMultipleStudyFilter.java b/web/src/main/java/org/cbioportal/web/parameter/GenePanelMultipleStudyFilter.java deleted file mode 100644 index 286fae0118d..00000000000 --- a/web/src/main/java/org/cbioportal/web/parameter/GenePanelMultipleStudyFilter.java +++ /dev/null @@ -1,21 +0,0 @@ -package org.cbioportal.web.parameter; - -import javax.validation.constraints.NotNull; -import javax.validation.constraints.Size; -import java.util.List; -import java.io.Serializable; - -public class GenePanelMultipleStudyFilter implements Serializable { - - @NotNull - @Size(min = 1, max = PagingConstants.MAX_PAGE_SIZE) - private List sampleMolecularIdentifiers; - - public List getSampleMolecularIdentifiers() { - return sampleMolecularIdentifiers; - } - - public void setSampleMolecularIdentifiers(List sampleMolecularIdentifiers) { - this.sampleMolecularIdentifiers = sampleMolecularIdentifiers; - } -} diff --git a/web/src/main/java/org/cbioportal/web/util/InvolvedCancerStudyExtractorInterceptor.java b/web/src/main/java/org/cbioportal/web/util/InvolvedCancerStudyExtractorInterceptor.java index 1c252032af4..7fd5e6c24fe 100644 --- a/web/src/main/java/org/cbioportal/web/util/InvolvedCancerStudyExtractorInterceptor.java +++ b/web/src/main/java/org/cbioportal/web/util/InvolvedCancerStudyExtractorInterceptor.java @@ -47,7 +47,6 @@ import org.cbioportal.web.parameter.ClinicalDataCountFilter; import org.cbioportal.web.parameter.ClinicalDataIdentifier; import org.cbioportal.web.parameter.ClinicalDataMultiStudyFilter; -import org.cbioportal.web.parameter.GenePanelMultipleStudyFilter; import org.cbioportal.web.parameter.GroupFilter; import org.cbioportal.web.parameter.MolecularDataMultipleStudyFilter; import org.cbioportal.web.parameter.MolecularProfileCasesGroupFilter; @@ -114,7 +113,7 @@ public class InvolvedCancerStudyExtractorInterceptor extends HandlerInterceptorA } else if (requestPathInfo.equals(CLINICAL_DATA_FETCH_PATH)) { return extractAttributesFromClinicalDataMultiStudyFilter(wrappedRequest); } else if (requestPathInfo.equals(GENE_PANEL_DATA_FETCH_PATH)) { - return extractAttributesFromGenePanelMultipleStudyFilter(wrappedRequest); + return extractAttributesFromGenePanelSampleMolecularIdentifiers(wrappedRequest); } else if (requestPathInfo.equals(MOLECULAR_DATA_MULTIPLE_STUDY_FETCH_PATH)) { return extractAttributesFromMolecularDataMultipleStudyFilter(wrappedRequest); } else if (requestPathInfo.equals(MUTATION_MULTIPLE_STUDY_FETCH_PATH)) { @@ -284,27 +283,27 @@ private Collection extractCancerStudyIdsFromClinicalDataMultiStudyFilter return studyIdSet; } - private boolean extractAttributesFromGenePanelMultipleStudyFilter(HttpServletRequest request) { + private boolean extractAttributesFromGenePanelSampleMolecularIdentifiers(HttpServletRequest request) { try { - GenePanelMultipleStudyFilter genePanelMultipleStudyFilter = objectMapper.readValue(request.getReader(), GenePanelMultipleStudyFilter.class); - LOG.debug("extracted genePanelMultipleStudyFilter: " + genePanelMultipleStudyFilter.toString()); - LOG.debug("setting interceptedGenePanelMultipleStudyFilter to " + genePanelMultipleStudyFilter); - request.setAttribute("interceptedGenePanelMultipleStudyFilter", genePanelMultipleStudyFilter); + List sampleMolecularIdentifiers = objectMapper.readValue(request.getReader(), List.class); + LOG.debug("extracted sampleMolecularIdentifiers: " + sampleMolecularIdentifiers.toString()); + LOG.debug("setting interceptedGenePanelSampleMolecularIdentifers to " + sampleMolecularIdentifiers); + request.setAttribute("interceptedGenePanelSampleMolecularIdentifiers", sampleMolecularIdentifiers); if (cacheMapUtil.hasCacheEnabled()) { - Collection cancerStudyIdCollection = extractCancerStudyIdsFromGenePanelMultipleStudyFilter(genePanelMultipleStudyFilter); + Collection cancerStudyIdCollection = extractCancerStudyIdsFromGenePanelSampleMolecularIdentifiers(sampleMolecularIdentifiers); LOG.debug("setting involvedCancerStudies to " + cancerStudyIdCollection); request.setAttribute("involvedCancerStudies", cancerStudyIdCollection); } } catch (Exception e) { - LOG.error("exception thrown during extraction of genePanelMultipleStudyFilter: " + e); + LOG.error("exception thrown during extraction of genePanelSampleMolecularIdentifiers: " + e); return false; } return true; } - private Collection extractCancerStudyIdsFromGenePanelMultipleStudyFilter(GenePanelMultipleStudyFilter genePanelMultipleStudyFilter) { + private Collection extractCancerStudyIdsFromGenePanelSampleMolecularIdentifiers(List sampleMolecularIdentifiers) { Set studyIdSet = new HashSet(); - extractCancerStudyIdsFromSampleMolecularIdentifiers(genePanelMultipleStudyFilter.getSampleMolecularIdentifiers(), studyIdSet); + extractCancerStudyIdsFromSampleMolecularIdentifiers(sampleMolecularIdentifiers, studyIdSet); return studyIdSet; } diff --git a/web/src/test/java/org/cbioportal/web/GenePanelControllerTest.java b/web/src/test/java/org/cbioportal/web/GenePanelControllerTest.java index 0a0850603fb..2204385c83c 100644 --- a/web/src/test/java/org/cbioportal/web/GenePanelControllerTest.java +++ b/web/src/test/java/org/cbioportal/web/GenePanelControllerTest.java @@ -8,7 +8,6 @@ import org.cbioportal.service.GenePanelService; import org.cbioportal.service.exception.GenePanelNotFoundException; import org.cbioportal.web.parameter.GenePanelDataFilter; -import org.cbioportal.web.parameter.GenePanelMultipleStudyFilter; import org.cbioportal.web.parameter.HeaderKeyConstants; import org.cbioportal.web.parameter.SampleMolecularIdentifier; import org.hamcrest.Matchers; @@ -255,7 +254,6 @@ public void fetchGenePanelData() throws Exception { Mockito.when(genePanelService.fetchGenePanelDataInMultipleMolecularProfiles(Mockito.anyListOf(String.class), Mockito.anyListOf(String.class))).thenReturn(genePanelDataList); - GenePanelMultipleStudyFilter genePanelMultipleStudyFilter = new GenePanelMultipleStudyFilter(); List sampleMolecularIdentifiers = new ArrayList<>(); SampleMolecularIdentifier sampleMolecularIdentifier1 = new SampleMolecularIdentifier(); sampleMolecularIdentifier1.setMolecularProfileId(TEST_MOLECULAR_PROFILE_ID_1); @@ -265,13 +263,12 @@ public void fetchGenePanelData() throws Exception { sampleMolecularIdentifier2.setMolecularProfileId(TEST_MOLECULAR_PROFILE_ID_2); sampleMolecularIdentifier2.setSampleId(TEST_SAMPLE_ID_2); sampleMolecularIdentifiers.add(sampleMolecularIdentifier2); - genePanelMultipleStudyFilter.setSampleMolecularIdentifiers(sampleMolecularIdentifiers); mockMvc.perform(MockMvcRequestBuilders.post( "/gene-panel-data/fetch") .accept(MediaType.APPLICATION_JSON) .contentType(MediaType.APPLICATION_JSON) - .content(objectMapper.writeValueAsString(genePanelMultipleStudyFilter))) + .content(objectMapper.writeValueAsString(sampleMolecularIdentifiers))) .andExpect(MockMvcResultMatchers.status().isOk()) .andExpect(MockMvcResultMatchers.content().contentTypeCompatibleWith(MediaType.APPLICATION_JSON)) .andExpect(MockMvcResultMatchers.jsonPath("$", Matchers.hasSize(2))) From 2bf032b30d1375401c829971f0b80ceb5a4a053f Mon Sep 17 00:00:00 2001 From: JianJiong Gao Date: Wed, 3 Jul 2019 17:28:24 -0400 Subject: [PATCH 2/9] Update InvolvedCancerStudyExtractorInterceptor.java --- .../web/util/InvolvedCancerStudyExtractorInterceptor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web/src/main/java/org/cbioportal/web/util/InvolvedCancerStudyExtractorInterceptor.java b/web/src/main/java/org/cbioportal/web/util/InvolvedCancerStudyExtractorInterceptor.java index 7fd5e6c24fe..d84fad60c94 100644 --- a/web/src/main/java/org/cbioportal/web/util/InvolvedCancerStudyExtractorInterceptor.java +++ b/web/src/main/java/org/cbioportal/web/util/InvolvedCancerStudyExtractorInterceptor.java @@ -285,7 +285,7 @@ private Collection extractCancerStudyIdsFromClinicalDataMultiStudyFilter private boolean extractAttributesFromGenePanelSampleMolecularIdentifiers(HttpServletRequest request) { try { - List sampleMolecularIdentifiers = objectMapper.readValue(request.getReader(), List.class); + List sampleMolecularIdentifiers = Arrays.asList(objectMapper.readValue(request.getReader(), SampleMolecularIdentifier[].class)); LOG.debug("extracted sampleMolecularIdentifiers: " + sampleMolecularIdentifiers.toString()); LOG.debug("setting interceptedGenePanelSampleMolecularIdentifers to " + sampleMolecularIdentifiers); request.setAttribute("interceptedGenePanelSampleMolecularIdentifiers", sampleMolecularIdentifiers); From 59f7f12c4d984aaa5b9da90f7aaf2636967b364f Mon Sep 17 00:00:00 2001 From: Kelsey Zhu Date: Fri, 27 Apr 2018 12:58:33 -0400 Subject: [PATCH 3/9] support multiple reference genomes 2nd try use MySql reserved keywords with a quote adjust column sizes in mutation_event table to be compatible with UTF8 add new options (species and reference genome build) to PYTHON importer/validate scripts check reference_genome_id in seg file and NCBI_Build (if filled) in MAF file see if agree with reference genome in cancer study meta file add a new API endpoint to fetch reference genome genes by entrez gene ids and hugo gene symbols --- .../persistence/GenePanelMapperLegacy.xml | 13 +- .../portal/persistence/StudyMapperLegacy.xml | 2 +- core/pom.xml | 10 +- .../mskcc/cbio/portal/dao/DaoCancerStudy.java | 19 +- .../org/mskcc/cbio/portal/dao/DaoGene.java | 12 +- .../cbio/portal/dao/DaoGeneOptimized.java | 3 +- .../cbio/portal/dao/DaoReferenceGenome.java | 299 ++++++++++++ .../portal/dao/DaoReferenceGenomeGene.java | 255 ++++++++++ .../org/mskcc/cbio/portal/dao/JdbcUtil.java | 24 +- .../mskcc/cbio/portal/model/CancerStudy.java | 7 + .../cbio/portal/model/CanonicalGene.java | 9 - .../portal/model/CopyNumberSegmentFile.java | 1 + .../cbio/portal/model/ReferenceGenome.java | 178 +++++++ .../portal/model/ReferenceGenomeGene.java | 129 +++++ .../scripts/ImportCopyNumberSegmentData.java | 11 +- .../scripts/ImportExtendedMutationData.java | 11 + .../cbio/portal/scripts/ImportGeneData.java | 288 +++++++++-- .../portal/scripts/ImportReferenceGenome.java | 184 +++++++ .../portal/scripts/ImportTabDelimData.java | 1 - .../mskcc/cbio/portal/servlet/CnaJSON.java | 1 - .../portal/servlet/GetCoExpressionJSON.java | 4 +- .../cbio/portal/servlet/MutationsJSON.java | 3 - .../cbio/portal/util/CancerStudyReader.java | 6 + .../portal/util/EnrichmentsAnalysisUtil.java | 34 +- .../main/scripts/importer/cbio_importer.py | 185 ++++++++ .../scripts/importer/cbioportal_common.py | 5 +- core/src/main/scripts/importer/metaImport.py | 12 +- .../src/main/scripts/importer/validateData.py | 107 +++-- .../cbio/portal/dao/TestDaoCancerStudy.java | 4 + .../portal/scripts/TestImportGeneData.java | 9 +- .../scripts/TestImportReferenceGenome.java | 79 +++ core/src/test/resources/cancer_study.txt | 3 +- core/src/test/resources/reference_genomes.txt | 4 + core/src/test/resources/seed_mini.sql | 73 ++- core/src/test/resources/testCancerStudy.txt | 1 + .../resources/adjust_col_size_to_utf8.sql | 16 + db-scripts/src/main/resources/cgds.sql | 34 +- db-scripts/src/main/resources/migration.sql | 12 +- docs/File-Formats.md | 2 +- docs/Import-reference-genome.md | 49 ++ docs/Updating-gene-and-gene_alias-tables.md | 13 +- docs/Using-the-dataset-validator.md | 41 +- docs/Using-the-metaImport-script.md | 16 +- .../org/cbioportal/model/CancerStudy.java | 5 + .../main/java/org/cbioportal/model/Gene.java | 26 +- .../java/org/cbioportal/model/Mutation.java | 5 + .../org/cbioportal/model/ReferenceGenome.java | 176 +++++++ .../cbioportal/model/ReferenceGenomeGene.java | 90 ++++ .../persistence/GeneRepository.java | 2 + .../ReferenceGenomeGeneRepository.java | 43 ++ .../persistence/mybatis/GeneMapper.java | 2 + .../mybatis/GeneMyBatisRepository.java | 5 + .../MolecularDataMyBatisRepository.java | 8 +- .../mybatis/ReferenceGenomeGeneMapper.java | 13 + .../ReferenceGenomeGeneMyBatisRepository.java | 47 ++ .../mybatis/DiscreteCopyNumberMapper.xml | 7 +- .../persistence/mybatis/GeneMapper.xml | 12 +- .../mybatis/MolecularProfileMapper.xml | 6 + .../persistence/mybatis/MutationMapper.xml | 1 + .../persistence/mybatis/PatientMapper.xml | 2 + .../mybatis/ReferenceGenomeGeneMapper.xml | 110 +++++ .../persistence/mybatis/SampleListMapper.xml | 3 + .../persistence/mybatis/SampleMapper.xml | 1 + .../persistence/mybatis/SecurityMapper.xml | 2 +- .../SignificantlyMutatedGeneMapper.xml | 2 +- .../persistence/mybatis/StudyMapper.xml | 4 +- ...screteCopyNumberMyBatisRepositoryTest.java | 14 +- .../mybatis/GeneMyBatisRepositoryTest.java | 6 - ...MolecularProfileMyBatisRepositoryTest.java | 8 +- .../MutationMyBatisRepositoryTest.java | 1 - .../mybatis/PatientMyBatisRepositoryTest.java | 8 +- ...erenceGenomeGeneMyBatisRepositoryTest.java | 109 +++++ .../SampleListMyBatisRepositoryTest.java | 12 +- .../mybatis/SampleMyBatisRepositoryTest.java | 12 +- .../mybatis/StudyMyBatisRepositoryTest.java | 16 +- .../src/test/resources/testSql.sql | 64 ++- pom.xml | 4 +- .../org/cbioportal/service/GeneService.java | 2 + .../service/ReferenceGenomeGeneService.java | 12 + .../service/impl/CoExpressionServiceImpl.java | 18 +- .../impl/ExpressionEnrichmentServiceImpl.java | 13 +- .../service/impl/GeneServiceImpl.java | 15 +- .../service/impl/MutationServiceImpl.java | 6 +- .../impl/ReferenceGenomeGeneServiceImpl.java | 46 ++ .../util/AlterationEnrichmentUtil.java | 2 +- .../service/util/ChromosomeCalculator.java | 8 +- .../service/impl/BaseServiceImplTest.java | 7 + .../impl/CoExpressionServiceImplTest.java | 10 +- .../DiscreteCopyNumberServiceImplTest.java | 7 +- .../ExpressionEnrichmentServiceImplTest.java | 8 +- .../service/impl/GeneServiceImplTest.java | 21 +- .../service/impl/MutationServiceImplTest.java | 17 +- .../ReferenceGenomeGeneServiceImplTest.java | 104 ++++ .../util/AlterationEnrichmentUtilTest.java | 9 +- .../web/ReferenceGenomeGeneController.java | 98 ++++ .../web/util/ClinicalDataEnrichmentUtil.java | 2 +- .../web/DiscreteCopyNumberControllerTest.java | 17 +- .../cbioportal/web/GeneControllerTest.java | 22 +- .../web/MutationControllerTest.java | 17 +- .../ReferenceGenomeGeneControllerTest.java | 142 ++++++ .../weblegacy/ApiControllerTest.java | 3 - .../GenePanelControllerLegacyTest.java | 2 - .../StructuralVariantControllerTest.java | 449 ++++++++++++++++++ 103 files changed, 3604 insertions(+), 448 deletions(-) create mode 100644 core/src/main/java/org/mskcc/cbio/portal/dao/DaoReferenceGenome.java create mode 100644 core/src/main/java/org/mskcc/cbio/portal/dao/DaoReferenceGenomeGene.java create mode 100644 core/src/main/java/org/mskcc/cbio/portal/model/ReferenceGenome.java create mode 100644 core/src/main/java/org/mskcc/cbio/portal/model/ReferenceGenomeGene.java create mode 100644 core/src/main/java/org/mskcc/cbio/portal/scripts/ImportReferenceGenome.java create mode 100644 core/src/main/scripts/importer/cbio_importer.py create mode 100644 core/src/test/java/org/mskcc/cbio/portal/scripts/TestImportReferenceGenome.java create mode 100644 core/src/test/resources/reference_genomes.txt create mode 100644 db-scripts/src/main/resources/adjust_col_size_to_utf8.sql create mode 100644 docs/Import-reference-genome.md create mode 100644 model/src/main/java/org/cbioportal/model/ReferenceGenome.java create mode 100644 model/src/main/java/org/cbioportal/model/ReferenceGenomeGene.java create mode 100644 persistence/persistence-api/src/main/java/org/cbioportal/persistence/ReferenceGenomeGeneRepository.java create mode 100644 persistence/persistence-mybatis/src/main/java/org/cbioportal/persistence/mybatis/ReferenceGenomeGeneMapper.java create mode 100644 persistence/persistence-mybatis/src/main/java/org/cbioportal/persistence/mybatis/ReferenceGenomeGeneMyBatisRepository.java create mode 100644 persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/ReferenceGenomeGeneMapper.xml create mode 100644 persistence/persistence-mybatis/src/test/java/org/cbioportal/persistence/mybatis/ReferenceGenomeGeneMyBatisRepositoryTest.java create mode 100644 service/src/main/java/org/cbioportal/service/ReferenceGenomeGeneService.java create mode 100644 service/src/main/java/org/cbioportal/service/impl/ReferenceGenomeGeneServiceImpl.java create mode 100644 service/src/test/java/org/cbioportal/service/impl/ReferenceGenomeGeneServiceImplTest.java create mode 100644 web/src/main/java/org/cbioportal/web/ReferenceGenomeGeneController.java create mode 100644 web/src/test/java/org/cbioportal/web/ReferenceGenomeGeneControllerTest.java create mode 100644 web/src/test/java/org/cbioportal/weblegacy/StructuralVariantControllerTest.java diff --git a/business/src/main/resources/org/mskcc/cbio/portal/persistence/GenePanelMapperLegacy.xml b/business/src/main/resources/org/mskcc/cbio/portal/persistence/GenePanelMapperLegacy.xml index d324789e657..f5204c9a011 100644 --- a/business/src/main/resources/org/mskcc/cbio/portal/persistence/GenePanelMapperLegacy.xml +++ b/business/src/main/resources/org/mskcc/cbio/portal/persistence/GenePanelMapperLegacy.xml @@ -69,7 +69,6 @@ - @@ -128,8 +127,7 @@ gene_panel.DESCRIPTION as description, gene.ENTREZ_GENE_ID as entrezGeneId, gene.HUGO_GENE_SYMBOL as hugoGeneSymbol, - gene.TYPE as type, - gene.CYTOBAND as cytoband + gene.TYPE as type from gene_panel left join gene_panel_list on gene_panel.INTERNAL_ID = gene_panel_list.INTERNAL_ID left join gene on gene_panel_list.GENE_ID = gene.ENTREZ_GENE_ID @@ -149,8 +147,7 @@ select gene.ENTREZ_GENE_ID, gene.HUGO_GENE_SYMBOL, - gene.TYPE, - gene.CYTOBAND + gene.TYPE from gene gene.HUGO_GENE_SYMBOL = #{symbol} @@ -162,8 +159,7 @@ select gene.ENTREZ_GENE_ID, gene.HUGO_GENE_SYMBOL, - gene.TYPE, - gene.CYTOBAND + gene.TYPE from gene gene.ENTREZ_GENE_ID = #{geneId} @@ -174,8 +170,7 @@ select gene.ENTREZ_GENE_ID, gene.HUGO_GENE_SYMBOL, - gene.TYPE, - gene.CYTOBAND + gene.TYPE from gene_alias inner join gene on gene_alias.ENTREZ_GENE_ID = gene.ENTREZ_GENE_ID diff --git a/business/src/main/resources/org/mskcc/cbio/portal/persistence/StudyMapperLegacy.xml b/business/src/main/resources/org/mskcc/cbio/portal/persistence/StudyMapperLegacy.xml index 154963551a0..062fa134e8f 100644 --- a/business/src/main/resources/org/mskcc/cbio/portal/persistence/StudyMapperLegacy.xml +++ b/business/src/main/resources/org/mskcc/cbio/portal/persistence/StudyMapperLegacy.xml @@ -14,7 +14,7 @@ DESCRIPTION as description, PMID as pmid, CITATION as citation, - GROUPS as groups, + `GROUPS` as `groups`, CANCER_STUDY_ID as internal_id from cancer_study where CANCER_STUDY_IDENTIFIER in #{item} diff --git a/core/pom.xml b/core/pom.xml index fd4aeeea528..905411c7f8f 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -65,7 +65,7 @@ org.apache.commons commons-dbcp2 - 2.1.1 + 2.4.0 commons-fileupload @@ -293,7 +293,7 @@ 2.16 always - true + log4j.configuration @@ -353,7 +353,7 @@ mysql mysql-connector-java - 5.0.3 + 8.0.11 @@ -369,7 +369,7 @@ ${db.test.url} ${db.test.username} ${db.test.password} - SET storage_engine=INNODB + SET default_storage_engine=INNODB SET SESSION sql_mode = 'ANSI_QUOTES' ${project.build.testOutputDirectory}/cgds.sql @@ -377,7 +377,7 @@ UTF-8 - characterEncoding=utf8, connectionCollation=utf8_general_ci + ${skipTests} diff --git a/core/src/main/java/org/mskcc/cbio/portal/dao/DaoCancerStudy.java b/core/src/main/java/org/mskcc/cbio/portal/dao/DaoCancerStudy.java index c9aca1aa5e6..eddc1ba0f05 100644 --- a/core/src/main/java/org/mskcc/cbio/portal/dao/DaoCancerStudy.java +++ b/core/src/main/java/org/mskcc/cbio/portal/dao/DaoCancerStudy.java @@ -298,7 +298,7 @@ public static void addCancerStudy(CancerStudy cancerStudy, boolean overwrite) th pstmt = con.prepareStatement("INSERT INTO cancer_study " + "( `CANCER_STUDY_IDENTIFIER`, `NAME`, " + "`DESCRIPTION`, `PUBLIC`, `TYPE_OF_CANCER_ID`, " - + "`PMID`, `CITATION`, `GROUPS`, `SHORT_NAME`, `STATUS`, `IMPORT_DATE` ) VALUES (?,?,?,?,?,?,?,?,?,?,?)", + + "`PMID`, `CITATION`, `GROUPS`, `SHORT_NAME`, `STATUS`, `IMPORT_DATE`,`REFERENCE_GENOME_ID` ) VALUES (?,?,?,?,?,?,?,?,?,?,?,?)", Statement.RETURN_GENERATED_KEYS); pstmt.setString(1, stableId); pstmt.setString(2, cancerStudy.getName()); @@ -319,6 +319,13 @@ public static void addCancerStudy(CancerStudy cancerStudy, boolean overwrite) th //TODO - use this field in parts of the system that build up the list of studies to display in home page: pstmt.setInt(10, Status.UNAVAILABLE.ordinal()); pstmt.setDate(11, java.sql.Date.valueOf(LocalDate.now())); + try { + ReferenceGenome referenceGenome = DaoReferenceGenome.getReferenceGenomeByGenomeName(cancerStudy.getReferenceGenome()); + pstmt.setInt(12, referenceGenome.getReferenceGenomeId()); + } + catch (NullPointerException e) { + pstmt.setInt(12,1); //#TODO default reference genome to use + } pstmt.executeUpdate(); rs = pstmt.getGeneratedKeys(); if (rs.next()) { @@ -356,7 +363,7 @@ public static void addCancerStudyTags(CancerStudyTags cancerStudyTags) throws Da /** * Return the cancerStudy identified by the internal cancer study ID, if it exists. * - * @param cancerStudyID Internal (int) Cancer Study ID. + * @param internalId Internal (int) Cancer Study ID. * @return Cancer Study Object, or null if there's no such study. */ public static CancerStudy getCancerStudyByInternalId(int internalId) throws DaoException { @@ -366,7 +373,7 @@ public static CancerStudy getCancerStudyByInternalId(int internalId) throws DaoE /** * Returns the cancerStudy identified by the stable identifier, if it exists. * - * @param cancerStudyStableId Cancer Study Stable ID. + * @param stableId Cancer Study Stable ID. * @return the CancerStudy, or null if there's no such study. */ public static CancerStudy getCancerStudyByStableId(String stableId) throws DaoException { @@ -624,6 +631,12 @@ private static CancerStudy extractCancerStudy(ResultSet rs) throws SQLException cancerStudy.setShortName(rs.getString("SHORT_NAME")); cancerStudy.setInternalId(rs.getInt("CANCER_STUDY_ID")); cancerStudy.setImportDate(rs.getDate("IMPORT_DATE")); + try { + cancerStudy.setReferenceGenome(DaoReferenceGenome.getReferenceGenomeByInternalId( + rs.getInt("REFERENCE_GENOME_ID")).getGenomeName()); + } catch (DaoException e) { + cancerStudy.setReferenceGenome(ReferenceGenome.HOMO_SAPIENS_DEFAULT_GENOME_NAME); + } return cancerStudy; } diff --git a/core/src/main/java/org/mskcc/cbio/portal/dao/DaoGene.java b/core/src/main/java/org/mskcc/cbio/portal/dao/DaoGene.java index 06df00fed2f..019950e9303 100644 --- a/core/src/main/java/org/mskcc/cbio/portal/dao/DaoGene.java +++ b/core/src/main/java/org/mskcc/cbio/portal/dao/DaoGene.java @@ -91,11 +91,10 @@ public static int updateGene(CanonicalGene gene) throws DaoException { int rows = 0; con = JdbcUtil.getDbConnection(DaoGene.class); pstmt = con.prepareStatement - ("UPDATE gene SET `HUGO_GENE_SYMBOL`=?, `TYPE`=?,`CYTOBAND`=? WHERE `ENTREZ_GENE_ID`=?"); + ("UPDATE gene SET `HUGO_GENE_SYMBOL`=?, `TYPE`=? WHERE `ENTREZ_GENE_ID`=?"); pstmt.setString(1, gene.getHugoGeneSymbolAllCaps()); pstmt.setString(2, gene.getType()); - pstmt.setString(3, gene.getCytoband()); - pstmt.setLong(4, gene.getEntrezGeneId()); + pstmt.setLong(3, gene.getEntrezGeneId()); rows += pstmt.executeUpdate(); if (rows != 1) { ProgressMonitor.logWarning("No change for " + gene.getEntrezGeneId() + " " + gene.getHugoGeneSymbolAllCaps() + "? Code " + rows); @@ -143,13 +142,12 @@ public static int addOrUpdateGene(CanonicalGene gene) throws DaoException { //add gene, referring to this genetic entity con = JdbcUtil.getDbConnection(DaoGene.class); pstmt = con.prepareStatement - ("INSERT INTO gene (`GENETIC_ENTITY_ID`, `ENTREZ_GENE_ID`,`HUGO_GENE_SYMBOL`,`TYPE`,`CYTOBAND`) " - + "VALUES (?,?,?,?,?)"); + ("INSERT INTO gene (`GENETIC_ENTITY_ID`, `ENTREZ_GENE_ID`,`HUGO_GENE_SYMBOL`,`TYPE`) " + + "VALUES (?,?,?,?)"); pstmt.setInt(1, geneticEntityId); pstmt.setLong(2, gene.getEntrezGeneId()); pstmt.setString(3, gene.getHugoGeneSymbolAllCaps()); pstmt.setString(4, gene.getType()); - pstmt.setString(5, gene.getCytoband()); rows += pstmt.executeUpdate(); } else { @@ -326,7 +324,6 @@ public static ArrayList getAllGenes() throws DaoException { Set aliases = mapAliases.get(entrezGeneId); CanonicalGene gene = new CanonicalGene(geneticEntityId, entrezGeneId, rs.getString("HUGO_GENE_SYMBOL"), aliases); - gene.setCytoband(rs.getString("CYTOBAND")); gene.setType(rs.getString("TYPE")); geneList.add(gene); } @@ -374,7 +371,6 @@ private static CanonicalGene extractGene(ResultSet rs) throws SQLException, DaoE Set aliases = getAliases(entrezGeneId); CanonicalGene gene = new CanonicalGene(geneticEntityId, entrezGeneId, rs.getString("HUGO_GENE_SYMBOL"), aliases); - gene.setCytoband(rs.getString("CYTOBAND")); gene.setType(rs.getString("TYPE")); return gene; diff --git a/core/src/main/java/org/mskcc/cbio/portal/dao/DaoGeneOptimized.java b/core/src/main/java/org/mskcc/cbio/portal/dao/DaoGeneOptimized.java index 2ff11db79f5..20c829f542e 100644 --- a/core/src/main/java/org/mskcc/cbio/portal/dao/DaoGeneOptimized.java +++ b/core/src/main/java/org/mskcc/cbio/portal/dao/DaoGeneOptimized.java @@ -343,7 +343,8 @@ public List guessGene(String geneId, String chr) { List ret = new ArrayList(); for (CanonicalGene cg : genes) { - String gchr = getChrFromCytoband(cg.getCytoband()); + //String gchr = getChrFromCytoband(cg.getCytoband()); + String gchr = null; if (gchr==null // TODO: should we exlude this? || gchr.equals(nchr)) { ret.add(cg); diff --git a/core/src/main/java/org/mskcc/cbio/portal/dao/DaoReferenceGenome.java b/core/src/main/java/org/mskcc/cbio/portal/dao/DaoReferenceGenome.java new file mode 100644 index 00000000000..6fa2864ac5f --- /dev/null +++ b/core/src/main/java/org/mskcc/cbio/portal/dao/DaoReferenceGenome.java @@ -0,0 +1,299 @@ +/* + * This file is part of cBioPortal. + * + * cBioPortal is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +package org.mskcc.cbio.portal.dao; + +import java.sql.*; +import org.mskcc.cbio.portal.model.*; +import org.mskcc.cbio.portal.util.SpringUtil; + +import java.util.*; + + +/** + * Adding or updating Reference Genomes used by molecular profiling + * @author Kelsey Zhu + */ +public final class DaoReferenceGenome { + + private static final Map byGenomeBuild = new HashMap(); + private static final Map byGenomeName = new HashMap(); + private static final Map byGenomeInternalId = new HashMap(); + private static final Map genomeInternalIds = new HashMap(); + + static { + SpringUtil.initDataSource(); + reCache(); + } + + private static synchronized void clearCache() { + byGenomeBuild.clear(); + byGenomeInternalId.clear(); + byGenomeName.clear(); + genomeInternalIds.clear(); + } + + private static synchronized void addCache(ReferenceGenome referenceGenome) { + byGenomeBuild.put(referenceGenome.getBuildName(), referenceGenome); + byGenomeName.put(referenceGenome.getGenomeName(), referenceGenome); + byGenomeInternalId.put(referenceGenome.getReferenceGenomeId(), referenceGenome); + genomeInternalIds.put(referenceGenome.getBuildName(), referenceGenome.getReferenceGenomeId()); + genomeInternalIds.put(referenceGenome.getGenomeName(), referenceGenome.getReferenceGenomeId()); + } + + private static synchronized void reCache() { + clearCache(); + Connection con = null; + PreparedStatement pstmt = null; + ResultSet rs = null; + try { + con = JdbcUtil.getDbConnection(DaoReferenceGenome.class); + pstmt = con.prepareStatement("SELECT * FROM reference_genome"); + rs = pstmt.executeQuery(); + while (rs.next()) { + ReferenceGenome referenceGenome = extractReferenceGenome(rs); + addCache(referenceGenome); + } + } catch (SQLException e) { + e.printStackTrace(); + } finally { + JdbcUtil.closeAll(DaoReferenceGenome.class, con, pstmt, rs); + } + } + + + /** + * Add a new reference genome to the Database. + * + * @param referenceGenome Reference Genome. + * @throws DaoException Database Error. + */ + public static void addReferenceGenome(ReferenceGenome referenceGenome) throws DaoException { + reCache(); + addReferenceGenome(referenceGenome, false); + } + + /** + * Add a new reference genome to the Database. + * @param referenceGenome + * @param overwrite if true, overwrite if exist. + * @throws DaoException + */ + public static void addReferenceGenome(ReferenceGenome referenceGenome, boolean overwrite) throws DaoException { + + ReferenceGenome existing = getReferenceGenomeByInternalId(referenceGenome.getReferenceGenomeId()); + if (existing!=null) { + if (!overwrite) { + throw new DaoException("Reference Genome " + referenceGenome.getBuildName() + "is already imported."); + } else { + updateReferenceGenome(referenceGenome); + } + } + else { + Connection con = null; + PreparedStatement pstmt = null; + ResultSet rs = null; + try { + con = JdbcUtil.getDbConnection(DaoReferenceGenome.class); + pstmt = con.prepareStatement("INSERT INTO reference_genome " + + "( `species`, `name`, " + + "`build_name`, `genome_size`, `URL`, " + + " `release_date` ) VALUES (?,?,?,?,?,?)", + Statement.RETURN_GENERATED_KEYS); + pstmt.setString(1, referenceGenome.getSpecies()); + pstmt.setString(2, referenceGenome.getGenomeName()); + pstmt.setString(3, referenceGenome.getBuildName()); + pstmt.setLong(4, referenceGenome.getGenomeSize()); + pstmt.setString(5, referenceGenome.getUrl()); + pstmt.setDate(6, new java.sql.Date(referenceGenome.getReleaseDate().getTime())); + + pstmt.executeUpdate(); + rs = pstmt.getGeneratedKeys(); + if (rs.next()) { + int autoId = rs.getInt(1); + referenceGenome.setReferenceGenomeId(autoId); + } + // update reference cache + addCache(referenceGenome); + } catch (SQLException e) { + throw new DaoException(e); + } finally { + JdbcUtil.closeAll(DaoReferenceGenome.class, con, pstmt, rs); + } + } + } + + /** + * Deletes all Reference Genomes. + * @throws DaoException Database Error. + */ + public static void deleteAllRecords() throws DaoException { + + Connection con = null; + PreparedStatement pstmt = null; + ResultSet rs = null; + try { + con = JdbcUtil.getDbConnection(DaoReferenceGenome.class); + JdbcUtil.disableForeignKeyCheck(con); + pstmt = con.prepareStatement("TRUNCATE TABLE reference_genome"); + pstmt.executeUpdate(); + JdbcUtil.enableForeignKeyCheck(con); + // clear cache + clearCache(); + } catch (SQLException e) { + throw new DaoException(e); + } finally { + JdbcUtil.closeAll(DaoReferenceGenome.class, con, pstmt, rs); + } + } + + /** + * Update existing reference genome. + * @param referenceGenome Reference Genome Object + * @throws DaoException + */ + public static int updateReferenceGenome(ReferenceGenome referenceGenome) throws DaoException { + + ReferenceGenome existing = getReferenceGenomeByInternalId(referenceGenome.getReferenceGenomeId()); + if (existing==null) { + throw new DaoException("Reference Genome " + referenceGenome.getBuildName() + "does not exist."); + } + Connection con = null; + PreparedStatement pstmt = null; + ResultSet rs = null; + int rows = 0; + try { + con = JdbcUtil.getDbConnection(DaoReferenceGenome.class); + pstmt = con.prepareStatement("UPDATE reference_genome " + + "SET `species`=?, `name`=?, " + + "`build_name`=?, `genome_size`=?, `URL`=?, " + + " `release_date`=? WHERE `reference_genome_id`=?"); + pstmt.setString(1, referenceGenome.getSpecies()); + pstmt.setString(2, referenceGenome.getGenomeName()); + pstmt.setString(3, referenceGenome.getBuildName()); + pstmt.setLong(4, referenceGenome.getGenomeSize()); + pstmt.setString(5, referenceGenome.getUrl()); + pstmt.setDate(6, new java.sql.Date(referenceGenome.getReleaseDate().getTime())); + pstmt.setInt(7, referenceGenome.getReferenceGenomeId()); + rows += pstmt.executeUpdate(); + // update reference cache + reCache(); + return rows; + } catch (SQLException e) { + throw new DaoException(e); + } finally { + JdbcUtil.closeAll(DaoReferenceGenome.class, con, pstmt, rs); + } + } + + /** + * Retrieve reference genome by internal DB ID + * @param internalId Reference Genome internal DB ID + * @throws DaoException Database Error. + */ + + public static ReferenceGenome getReferenceGenomeByInternalId(int internalId) throws DaoException { + ReferenceGenome genome = byGenomeInternalId.get(internalId); + return genome; + } + + /** + * Retrieve reference genome by genome build name + * @param buildName Reference Genome build name + * @throws DaoException Database Error. + */ + + public static ReferenceGenome getReferenceGenomeByBuildName(String buildName) throws DaoException { + return byGenomeBuild.get(buildName); + } + + + /** + * Retrieve reference genome by genome build name + * @param genomeName Reference Genome build name + * @throws DaoException Database Error. + */ + + public static ReferenceGenome getReferenceGenomeByGenomeName(String genomeName) throws DaoException { + //return byGenomeName.get(genomeName); + Connection con = null; + PreparedStatement pstmt = null; + ResultSet rs = null; + ReferenceGenome referenceGenome = null; + try { + con = JdbcUtil.getDbConnection(DaoReferenceGenome.class); + pstmt = con.prepareStatement("SELECT * FROM reference_genome WHERE `name` = ?"); + pstmt.setString(1,genomeName); + rs = pstmt.executeQuery(); + if (rs.next()) { + referenceGenome = extractReferenceGenome(rs); + } + } catch (SQLException e) { + e.printStackTrace(); + } finally { + JdbcUtil.closeAll(DaoReferenceGenome.class, con, pstmt, rs); + return referenceGenome; + } + } + + /** + * Retrieve reference genome of interest by genome name or genome assembly name + * @param name Name of Reference Genome or Genome Assembly + * @throws DaoException Database Error. + */ + @Deprecated + public static int getReferenceGenomeIdByName(String name) throws DaoException { + return getReferenceGenomeIdByName(name, ReferenceGenome.HOMO_SAPIENS); + } + + /** + * Retrieve reference genome of interest by genome name or genome assembly name + * @param name Name of Reference Genome or Genome Assembly + * @param species genetic species + * @throws DaoException Database Error. + */ + public static int getReferenceGenomeIdByName(String name, String species) throws DaoException { + try { + return genomeInternalIds.get(name); + } catch (java.lang.NullPointerException exp) { + if (species.equals(ReferenceGenome.HOMO_SAPIENS)) { + return genomeInternalIds.get(ReferenceGenome.HOMO_SAPIENS_DEFAULT_GENOME_BUILD); + } else if (species.equals(ReferenceGenome.MUS_MUSCULUS)) { + return genomeInternalIds.get(ReferenceGenome.MUS_MUSCULUS_DEFAULT_GENOME_BUILD); // NCBI_BUILD field was an optional in the past + } else { + throw new DaoException("Species not supproted yet"); + } + } + } + + /** + * Extracts Reference Genome JDBC Results. + * @param rs JDBC Result Set + */ + private static ReferenceGenome extractReferenceGenome(ResultSet rs) throws SQLException { + ReferenceGenome referenceGenome = new ReferenceGenome( + rs.getString("SPECIES"), + rs.getString("NAME"), + rs.getString("BUILD_NAME")); + referenceGenome.setReferenceGenomeId(rs.getInt("REFERENCE_GENOME_ID")); + referenceGenome.setGenomeSize(rs.getLong("GENOME_SIZE")); + referenceGenome.setReleaseDate(rs.getDate("RELEASE_DATE")); + referenceGenome.setUrl(rs.getString("URL")); + return referenceGenome; + } + +} \ No newline at end of file diff --git a/core/src/main/java/org/mskcc/cbio/portal/dao/DaoReferenceGenomeGene.java b/core/src/main/java/org/mskcc/cbio/portal/dao/DaoReferenceGenomeGene.java new file mode 100644 index 00000000000..36261a6bda5 --- /dev/null +++ b/core/src/main/java/org/mskcc/cbio/portal/dao/DaoReferenceGenomeGene.java @@ -0,0 +1,255 @@ +/* + * This file is part of cBioPortal. + * + * cBioPortal is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +package org.mskcc.cbio.portal.dao; + +import org.mskcc.cbio.portal.model.ReferenceGenomeGene; +import org.mskcc.cbio.portal.util.ProgressMonitor; + +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; + +/** + * Data Access Object to Reference Genome Gene Table + * Make a Singleton Class accessed by all objects throughout the system + * @author Kelsey Zhu + */ +public class DaoReferenceGenomeGene { + private static DaoReferenceGenomeGene instance = null; + + + protected DaoReferenceGenomeGene() { + //Exists only for default instantiation + } + + public static DaoReferenceGenomeGene getInstance() { + if (instance == null) { + instance = new DaoReferenceGenomeGene(); + } + return instance; + } + + /** + * Update Reference Genome Gene Record in the Database. + * @param gene Reference Genome Gene + */ + public int updateGene(ReferenceGenomeGene gene) throws DaoException { + Connection con = null; + PreparedStatement pstmt = null; + ResultSet rs = null; + boolean setBulkLoadAtEnd = false; + try { + MySQLbulkLoader.bulkLoadOff(); + int rows = 0; + con = JdbcUtil.getDbConnection(DaoReferenceGenome.class); + pstmt = con.prepareStatement + ("UPDATE reference_genome_gene SET `CHR`=?, `CYTOBAND`=?,`EXONIC_LENGTH`=?,`START`=?, `END`=? WHERE `ENTREZ_GENE_ID`=? AND `REFERENCE_GENOME_ID`=?"); + pstmt.setString(1, gene.getChr()); + pstmt.setString(2, gene.getCytoband()); + pstmt.setInt(3, gene.getExonicLength()); + pstmt.setLong(4, gene.getStart()); + pstmt.setLong(5, gene.getEnd()); + pstmt.setLong(6, gene.getEntrezGeneId()); + pstmt.setInt(7, gene.getReferenceGenomeId()); + + rows += pstmt.executeUpdate(); + if (rows != 1) { + ProgressMonitor.logWarning("No change for " + gene.getEntrezGeneId() + " " + gene.getReferenceGenomeId() + "? Code " + rows); + } + + return rows; + } catch (SQLException e) { + throw new DaoException(e); + } finally { + MySQLbulkLoader.bulkLoadOn(); + JdbcUtil.closeAll(DaoGene.class, con, pstmt, rs); + } + + } + + /** + * + * Adds a new reference genome gene Record to the Database or update the existing record. + * + * @param gene Reference Genome Gene Object. + * @return number of records successfully added. + * @throws DaoException Database Error. + */ + public int addOrUpdateGene(ReferenceGenomeGene gene) throws DaoException { + + Connection con = null; + PreparedStatement pstmt = null; + ResultSet rs = null; + try { + int rows = 0; + ReferenceGenomeGene existingGene = null; + + existingGene = getGene(gene.getEntrezGeneId(), gene.getReferenceGenomeId()); + + if (existingGene == null) { + //add gene, referring to this genetic entity + con = JdbcUtil.getDbConnection(DaoGene.class); + pstmt = con.prepareStatement + ("INSERT INTO `reference_genome_gene` (`ENTREZ_GENE_ID`, `REFERENCE_GENOME_ID`,`CHR`,`CYTOBAND`,`EXONIC_LENGTH`,`START`,`END`) " + + "VALUES (?,?,?,?,?,?,?)"); + pstmt.setLong(1, gene.getEntrezGeneId()); + pstmt.setInt(2, gene.getReferenceGenomeId()); + pstmt.setString(3, gene.getChr()); + pstmt.setString(4, gene.getCytoband()); + pstmt.setInt(5, gene.getExonicLength()); + pstmt.setLong(6, gene.getStart()); + pstmt.setLong(7, gene.getEnd()); + rows += pstmt.executeUpdate(); + } + else { + rows += updateGene(existingGene); + } + return rows; + } catch (SQLException e) { + throw new DaoException(e); + } finally { + JdbcUtil.closeAll(DaoGene.class, con, pstmt, rs); + } + } + + + /** + * Gets the Gene with the Specified Entrez Gene ID. + * For faster access, consider using DaoGeneOptimized. + * + * @param entrezGeneId ENTRZ GENE ID. + * @return Canonical Gene Object. + * @throws DaoException Database Error. + */ + public ReferenceGenomeGene getGene(long entrezGeneId, int referenceGenomeId) throws DaoException { + Connection con = null; + PreparedStatement pstmt = null; + ResultSet rs = null; + try { + con = JdbcUtil.getDbConnection(DaoReferenceGenomeGene.class); + pstmt = con.prepareStatement + ("SELECT * FROM `reference_genome_gene` WHERE `ENTREZ_GENE_ID` = ? AND `REFERENCE_GENOME_ID` = ?"); + pstmt.setLong(1, entrezGeneId); + pstmt.setInt(2, referenceGenomeId); + rs = pstmt.executeQuery(); + if (rs.next()) { + return extractGene(rs); + } else { + return null; + } + } catch (SQLException e) { + throw new DaoException(e); + } finally { + JdbcUtil.closeAll(DaoGene.class, con, pstmt, rs); + } + } + + /** + * Gets the Gene with the Specified Entrez Gene ID. + * For faster access, consider using DaoGeneOptimized. + * + * @param hugoGeneSymbol Hugo Gene Symbol. + * @return Canonical Gene Object. + * @throws DaoException Database Error. + */ + public ReferenceGenomeGene getGene(String hugoGeneSymbol, int referenceGenomeId) throws DaoException { + Connection con = null; + PreparedStatement pstmt = null; + ResultSet rs = null; + try { + con = JdbcUtil.getDbConnection(DaoReferenceGenomeGene.class); + pstmt = con.prepareStatement + ("SELECT * FROM `reference_genome_gene` JOIN `gene` ON `reference_genome_gene`.entrez_gene_id=" + + "`gene`.entrez_gene_id WHERE `HUGO_GENE_SYMBOL` = ? AND `REFERENCE_GENOME_ID` = ?"); + pstmt.setString(1, hugoGeneSymbol); + pstmt.setInt(2, referenceGenomeId); + rs = pstmt.executeQuery(); + if (rs.next()) { + return extractGene(rs); + } else { + return null; + } + } catch (SQLException e) { + throw new DaoException(e); + } finally { + JdbcUtil.closeAll(DaoGene.class, con, pstmt, rs); + } + } + + private ReferenceGenomeGene extractGene(ResultSet rs) throws SQLException, DaoException { + int entrezGeneId = rs.getInt("ENTREZ_GENE_ID"); + int reference_genome_id = rs.getInt("REFERENCE_GENOME_ID"); + String cytoband = rs.getString("CYTOBAND"); + ReferenceGenomeGene gene = new ReferenceGenomeGene(entrezGeneId, reference_genome_id); + gene.setChr(rs.getString("CHR")); + gene.setCytoband(rs.getString("CYTOBAND")); + gene.setExonicLength(rs.getInt("EXONIC_LENGTH")); + gene.setStart(rs.getLong("START")); + gene.setEnd(rs.getLong("END")); + return gene; + } + + /** + * Deletes the Reference Genome Gene Record with Entrez Gene ID and Referece Genome ID in the Database. + * + * @param entrezGeneId ENTREZ GENE ID + * @param referenceGenomeId REFERENCE GENOME ID + */ + public void deleteGene(int entrezGeneId, int referenceGenomeId) throws DaoException { + + Connection con = null; + PreparedStatement pstmt = null; + ResultSet rs = null; + try { + con = JdbcUtil.getDbConnection(DaoReferenceGenomeGene.class); + pstmt = con.prepareStatement("DELETE FROM `reference_genome_gene` WHERE ENTREZ_GENE_ID=? AND REFERENCE_GENOME_ID=?"); + pstmt.setLong(1, entrezGeneId); + pstmt.setInt(2, referenceGenomeId); + pstmt.executeUpdate(); + } catch (SQLException e) { + throw new DaoException(e); + } finally { + JdbcUtil.closeAll(DaoGene.class, con, pstmt, rs); + } + } + + /** + * Deletes all Reference Genome Gene Records in the Database. + * @throws DaoException Database Error. + * + * @deprecated only used by deprecated code, so deprecating this as well. + */ + public void deleteAllRecords() throws DaoException { + Connection con = null; + PreparedStatement pstmt = null; + ResultSet rs = null; + try { + con = JdbcUtil.getDbConnection(DaoReferenceGenomeGene.class); + JdbcUtil.disableForeignKeyCheck(con); + pstmt = con.prepareStatement("TRUNCATE TABLE `reference_gnome_gene`"); + pstmt.executeUpdate(); + JdbcUtil.enableForeignKeyCheck(con); + } catch (SQLException e) { + throw new DaoException(e); + } finally { + JdbcUtil.closeAll(DaoGene.class, con, pstmt, rs); + } + } + +} diff --git a/core/src/main/java/org/mskcc/cbio/portal/dao/JdbcUtil.java b/core/src/main/java/org/mskcc/cbio/portal/dao/JdbcUtil.java index 817610ef258..67845e35ec8 100644 --- a/core/src/main/java/org/mskcc/cbio/portal/dao/JdbcUtil.java +++ b/core/src/main/java/org/mskcc/cbio/portal/dao/JdbcUtil.java @@ -56,7 +56,7 @@ public class JdbcUtil { */ public static DataSource getDataSource() { if (dataSource == null) { - dataSource = new JdbcDataSource(); + dataSource = initDataSource(); } return dataSource; } @@ -69,6 +69,28 @@ public static void setDataSource(DataSource value) { dataSource = value; } + private static DataSource initDataSource() { + DatabaseProperties dbProperties = DatabaseProperties.getInstance(); + String host = dbProperties.getDbHost(); + String userName = dbProperties.getDbUser(); + String password = dbProperties.getDbPassword(); + String database = dbProperties.getDbName(); + String url ="jdbc:mysql://" + host + "/" + database + + "?user=" + userName + "&password=" + password + + "&zeroDateTimeBehavior=convertToNull"; + // Set up poolable data source + BasicDataSource dataSource = new BasicDataSource(); + dataSource.setDriverClassName("com.mysql.jdbc.Driver"); + dataSource.setUsername(userName); + dataSource.setPassword(password); + dataSource.setUrl(url); + // By pooling/reusing PreparedStatements, we get a major performance gain + dataSource.setPoolPreparedStatements(true); + dataSource.setMaxTotal(100); + activeConnectionCount = new HashMap(); + return dataSource; + } + /** * Gets Connection to the Database. * diff --git a/core/src/main/java/org/mskcc/cbio/portal/model/CancerStudy.java b/core/src/main/java/org/mskcc/cbio/portal/model/CancerStudy.java index bbb7ada54a7..ae98f239cb9 100644 --- a/core/src/main/java/org/mskcc/cbio/portal/model/CancerStudy.java +++ b/core/src/main/java/org/mskcc/cbio/portal/model/CancerStudy.java @@ -66,6 +66,7 @@ public class CancerStudy { private Set groups; private String shortName; private Date importDate; + private String referenceGenome; /** @@ -191,6 +192,12 @@ public String getCitation() { public void setCitation(String citation) { this.citation = citation; } + + public String getReferenceGenome() { return referenceGenome; } + + public void setReferenceGenome(String referenceGenome) { + this.referenceGenome = referenceGenome; + } /** * Gets the genetic profiles. diff --git a/core/src/main/java/org/mskcc/cbio/portal/model/CanonicalGene.java b/core/src/main/java/org/mskcc/cbio/portal/model/CanonicalGene.java index ca908b28528..5f5f841abe1 100644 --- a/core/src/main/java/org/mskcc/cbio/portal/model/CanonicalGene.java +++ b/core/src/main/java/org/mskcc/cbio/portal/model/CanonicalGene.java @@ -52,7 +52,6 @@ public class CanonicalGene extends Gene { private String hugoGeneSymbol; private Set aliases; private double somaticMutationFrequency; - private String cytoband; private String type; /** @@ -120,14 +119,6 @@ public void setType(String type) { this.type = type; } - public String getCytoband() { - return cytoband; - } - - public void setCytoband(String cytoband) { - this.cytoband = cytoband; - } - public Set getAliases() { if (aliases==null) { return Collections.emptySet(); diff --git a/core/src/main/java/org/mskcc/cbio/portal/model/CopyNumberSegmentFile.java b/core/src/main/java/org/mskcc/cbio/portal/model/CopyNumberSegmentFile.java index 1cb8b8f906a..a0aa80c3a9c 100644 --- a/core/src/main/java/org/mskcc/cbio/portal/model/CopyNumberSegmentFile.java +++ b/core/src/main/java/org/mskcc/cbio/portal/model/CopyNumberSegmentFile.java @@ -38,6 +38,7 @@ public static enum ReferenceGenomeId { hg18("hg18"), hg19("hg19"), + hg38("hg38"), mm10("mm10"); private String propertyName; diff --git a/core/src/main/java/org/mskcc/cbio/portal/model/ReferenceGenome.java b/core/src/main/java/org/mskcc/cbio/portal/model/ReferenceGenome.java new file mode 100644 index 00000000000..6b9ecf0c0cb --- /dev/null +++ b/core/src/main/java/org/mskcc/cbio/portal/model/ReferenceGenome.java @@ -0,0 +1,178 @@ +/* + * This file is part of cBioPortal. + * + * cBioPortal is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +package org.mskcc.cbio.portal.model; + + import java.math.BigInteger; + import java.util.Date; + import org.mskcc.cbio.portal.util.*; + +/** + * This represents the reference genome used by molecular profiling + * + * @author Kelsey Zhu + */ +public class ReferenceGenome { + + private int referenceGenomeId; // assigned by DB, auto increment sequence number + private String genomeName; + private String species; + private String buildName; //genome assembly name + private long genomeSize; //non-N bases + private String url; + private Date releaseDate; + public static String HOMO_SAPIENS = "human"; + public static String MUS_MUSCULUS = "mouse"; + public static String HOMO_SAPIENS_DEFAULT_GENOME_BUILD = "GRCh37"; + public static String HOMO_SAPIENS_DEFAULT_GENOME_NAME = "hg19"; + public static String MUS_MUSCULUS_DEFAULT_GENOME_BUILD = "GRCm38"; + public static String HOMO_SAPIENS_DEFAULT_GENOME_BUILD_PREFIX = "GRCh"; + public static String MUS_MUSCULUS_DEFAULT_GENOME_BUILD_PREFIX = "GRCm"; + + /** + * Constructor. + * @param genomeName Name of the reference genome. + * @param species Species of the reference genome. + * @param buildName Name of genome assembly + */ + public ReferenceGenome(String genomeName, String species, String buildName) { + super(); + this.genomeName = genomeName; + this.species = species; + this.buildName = buildName; + } + + /** + * Constructor. + * @param genomeName Name of the reference genome. + * @param species Species of the reference genome. + * @param buildName Name of genome assembly + * @param genomeSize Effective genome size + * @param url URL to download reference genome + * @param releaseDate Date genome assembly released + */ + public ReferenceGenome(String genomeName, String species, String buildName, + Long genomeSize, String url, Date releaseDate) { + super(); + this.genomeName = genomeName; + this.species = species; + this.buildName = buildName; + this.genomeSize = genomeSize; + this.url = url; + this.releaseDate = releaseDate; + } + + public void setReferenceGenomeId(int referenceGenomeId) { + this.referenceGenomeId = referenceGenomeId; + } + + public int getReferenceGenomeId() { + return referenceGenomeId; + } + + public void setGenomeName(String genomeName) { + this.genomeName = genomeName; + } + + public String getGenomeName() { + return this.genomeName; + } + + public void setSpecies(String species) { + this.species = species; + } + + public String getSpecies() { + return this.species; + } + + public void setBuildName(String buildName) { + this.buildName = buildName; + } + + public String getBuildName () { + return this.buildName; + } + + public void setGenomeSize(long genomeSize) { + this.genomeSize = genomeSize; + } + + public long getGenomeSize() { + return this.genomeSize; + } + + public void setUrl(String url) { + this.url = url; + } + + public String getUrl() { + return this.url; + } + + public void setReleaseDate(Date releaseDate) { + this.releaseDate = releaseDate; + } + + public Date getReleaseDate() { + return this.releaseDate; + } + + /** + * Equals. + * @param otherReferenceGenome Other Reference Genome. + * @return true of false. + */ + @Override + public boolean equals(Object otherReferenceGenome) { + if (this == otherReferenceGenome) { + return true; + } + + if (!(otherReferenceGenome instanceof ReferenceGenome)) { + return false; + } + + ReferenceGenome that = (ReferenceGenome) otherReferenceGenome; + return + EqualsUtil.areEqual(this.genomeName, that.genomeName) && + EqualsUtil.areEqual(this.species, + that.species) && + EqualsUtil.areEqual(this.buildName, that.buildName); + } + + @Override + public int hashCode() { + int result = 3; + result = 31 * result + this.referenceGenomeId; + result = 31 * result + (this.genomeName != null ? this.genomeName.hashCode() : 0); + result = 31 * result + (this.buildName != null ? this.buildName.hashCode() : 0); + result = 31 * result + (this.species != null ? this.species.hashCode() : 0); + return result; + } + + /** + * toString() Override. + * @return string summary of reference genome + */ + @Override + public String toString() { + return "Reference Genome [referenceGenomeID=" + referenceGenomeId + ", genomeName=" + genomeName + ", species=" + + species + ", buildName=" + buildName + "]"; + } + +} \ No newline at end of file diff --git a/core/src/main/java/org/mskcc/cbio/portal/model/ReferenceGenomeGene.java b/core/src/main/java/org/mskcc/cbio/portal/model/ReferenceGenomeGene.java new file mode 100644 index 00000000000..5370f0429c8 --- /dev/null +++ b/core/src/main/java/org/mskcc/cbio/portal/model/ReferenceGenomeGene.java @@ -0,0 +1,129 @@ +/* + * This file is part of cBioPortal. + * + * cBioPortal is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +package org.mskcc.cbio.portal.model; + + +/** + * Class to wrap Reference Genome Gene. + * @author Kelsey Zhu + */ +public class ReferenceGenomeGene { + private int referenceGenomeId; + private long entrezGeneId; + private String chr; + private String cytoband; + private int exonicLength; + private long start; + private long end; + + /** + * + * @param entrezGeneId ENTREZ_GENE_ID + * @param referenceGenomeId REFERENCE_GENOME_ID + */ + public ReferenceGenomeGene(long entrezGeneId, int referenceGenomeId) { + this.entrezGeneId = entrezGeneId; + this.referenceGenomeId = referenceGenomeId; + } + + /** + * + * @param referenceGenomeId REFERENCE_GENOME_ID + * @param chr Chromosome name + * @param cytoband CYTOBAND of the gene + * @param exonicLength EXONIC LENGTH of the gene + * @param start start point of the gene + * @param end end point of the gene + */ + public ReferenceGenomeGene(long entrezGeneId, int referenceGenomeId, String chr, + String cytoband, int exonicLength, long start, long end) { + + this.referenceGenomeId = referenceGenomeId; + this.entrezGeneId = entrezGeneId; + this.chr = chr; + this.cytoband = cytoband; + this.exonicLength = exonicLength; + this.start = start; + this.end = end; + } + + + public void setReferenceGenomeId(int referenceGenomeId) { this.referenceGenomeId = referenceGenomeId; } + + public int getReferenceGenomeId() { + return referenceGenomeId; + } + + public void setEntrezGeneId(long entrezGeneId) { this.entrezGeneId = entrezGeneId; } + + public long getEntrezGeneId() { return entrezGeneId; } + + public String getChr() { + return chr; + } + + public void setChr(String chr) { + this.chr = chr; + } + public String getCytoband() { + return cytoband; + } + + public void setCytoband(String cytoband) { + this.cytoband = cytoband; + } + + public int getExonicLength() { + return exonicLength; + } + + public void setExonicLength(int exonicLength) { + this.exonicLength = exonicLength; + } + + public long getStart() { return this.start; } + + public void setStart(long start) { this.start = start; } + + public long getEnd() { return this.end = end; } + + public void setEnd(long end) { this.end = end; } + + @Override + public boolean equals(Object obj0) { + if (!(obj0 instanceof ReferenceGenomeGene)) { + return false; + } + + ReferenceGenomeGene gene0 = (ReferenceGenomeGene) obj0; + if (gene0.entrezGeneId == entrezGeneId && gene0.referenceGenomeId == referenceGenomeId) { + return true; + } + return false; + } + + + @Override + public int hashCode() { + int result = 2; + result = 31 * result + (int)this.entrezGeneId; + result = 31 * result + this.referenceGenomeId; + return result; + } + +} \ No newline at end of file diff --git a/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportCopyNumberSegmentData.java b/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportCopyNumberSegmentData.java index d7b3859dce5..1c876a75530 100644 --- a/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportCopyNumberSegmentData.java +++ b/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportCopyNumberSegmentData.java @@ -153,7 +153,16 @@ private static boolean segmentDataExistsForCancerStudy(CancerStudy cancerStudy) private static void importCopyNumberSegmentFileMetadata(CancerStudy cancerStudy, Properties properties) throws DaoException { CopyNumberSegmentFile copyNumSegFile = new CopyNumberSegmentFile(); copyNumSegFile.cancerStudyId = cancerStudy.getInternalId(); - copyNumSegFile.referenceGenomeId = getRefGenId(properties.getProperty("reference_genome_id").trim()); + String referenceGenomeId = properties.getProperty("reference_genome_id").trim(); + String referenceGenome = cancerStudy.getReferenceGenome(); + if (referenceGenome == null) { + referenceGenome = ReferenceGenome.HOMO_SAPIENS_DEFAULT_GENOME_NAME; + } + if (!referenceGenomeId.equalsIgnoreCase(referenceGenome)) { + ProgressMonitor.setCurrentMessage(" Genome Build Name does not match, expecting " + + cancerStudy.getReferenceGenome()); + } + copyNumSegFile.referenceGenomeId = getRefGenId(referenceGenomeId); copyNumSegFile.description = properties.getProperty("description").trim(); copyNumSegFile.filename = properties.getProperty("data_filename").trim(); DaoCopyNumberSegmentFile.addCopyNumberSegmentFile(copyNumSegFile); diff --git a/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportExtendedMutationData.java b/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportExtendedMutationData.java index 59acd1b9966..eb8eea32c56 100644 --- a/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportExtendedMutationData.java +++ b/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportExtendedMutationData.java @@ -146,7 +146,18 @@ public void importData() throws IOException, DaoException { { String[] parts = line.split("\t", -1 ); // the -1 keeps trailing empty strings; see JavaDoc for String MafRecord record = mafUtil.parseRecord(line); + CancerStudy cancerStudy = DaoCancerStudy.getCancerStudyByInternalId(geneticProfile.getCancerStudyId()); + String genomeBuildName; + try { + String referenceGenome = cancerStudy.getReferenceGenome(); + genomeBuildName = DaoReferenceGenome.getReferenceGenomeByGenomeName(referenceGenome).getBuildName(); + } catch (NullPointerException e) { + genomeBuildName = ReferenceGenome.HOMO_SAPIENS_DEFAULT_GENOME_BUILD; + } + if (!record.getNcbiBuild().equalsIgnoreCase(genomeBuildName)) { + ProgressMonitor.setCurrentMessage("Genome Build Name does not match, expecting " + genomeBuildName); + } // process case id String barCode = record.getTumorSampleID(); Sample sample = DaoSample.getSampleByCancerStudyAndSampleId(geneticProfile.getCancerStudyId(), diff --git a/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportGeneData.java b/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportGeneData.java index 8d3636caa39..9f2ce530b8a 100644 --- a/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportGeneData.java +++ b/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportGeneData.java @@ -34,6 +34,8 @@ import org.mskcc.cbio.portal.dao.*; import org.mskcc.cbio.portal.model.CanonicalGene; +import org.mskcc.cbio.portal.model.ReferenceGenome; +import org.mskcc.cbio.portal.model.ReferenceGenomeGene; import org.mskcc.cbio.portal.util.*; import joptsimple.OptionException; @@ -131,31 +133,6 @@ public static void importData(File geneFile) throws IOException, DaoException { } if (gene!=null) { - if (!cytoband.equals("-")) { - if (species.equals("mouse")) { - //Usually three cytobands are represented in the gene info for mouse: - //First, only the chromosome number, then the chromosome number and the position - //of the gene in cM, and finally, by the "Correct" cytoband, that is the name of - //the chromosome and the cytoband, which is a letter (from A to H) followed by a - //numeric position, for example X A3 or 3 A1.2. - List cytobands = new ArrayList(); - cytobands.addAll(Arrays.asList(cytoband.split("\\|"))); - for (String i : cytobands) { - if (!i.contains("cM")) { //Skip cytobands containing cM - if (cytobands.size() <=2) { - cytoband = i; //Only one cytoband is left from these gene. - } else { - //We have more than one cytoband for these gene, so keep the one which - //has the cytoband. - if (i.contains("A") || i.contains("B") || i.contains("C") || i.contains("D") || i.contains("E") || i.contains("F") || i.contains("G") || i.contains("H")) { - cytoband = i; - } - } - } - } - } - gene.setCytoband(cytoband); //For human there is no need to parse the cytoband - } gene.setType(type); } } @@ -256,32 +233,266 @@ private static void logDuplicateGeneSymbolWarning(String symbol, Set cytobands = new ArrayList(); + cytobands.addAll(Arrays.asList(cytoband.split("\\|"))); + for (String i : cytobands) { + if (!i.contains("cM")) { //Skip cytobands containing cM + if (cytobands.size() <= 2) { + cytoband = i; //Only one cytoband is left from these gene. + } else { + //We have more than one cytoband for these gene, so keep the one which + //has the cytoband. + if (i.contains("A") || i.contains("B") || i.contains("C") || i.contains("D") || i.contains("E") || i.contains("F") || i.contains("G") || i.contains("H")) { + cytoband = i; + } + } + } + } + } + return cytoband; + } + return null; + } + + /** + * This method imports the gene lengths of a General Transfer Format (gtf) file. This file contains different genetic features from genes (CDS, exons, introns...) in each line. + * All features of a single gene contain the same Ensembl ID. Therefore, this method uses the Ensembl IDs to differentiate between different genes. All the features with the same + * Ensembl ID are in consecutive lines. This method uses the gene symbol to retrieve the Entrez ID, but different Ensembl IDs can share the same symbol. If these Ensembl IDs are + * located in different chromosomes, the method uses the length of the Ensembl ID according to the cytoband from the gene saved in the database. In the case multiple Ensembl IDs + * with the same symbol are on the same chromosome or no cytoband information is available, the length of the last Ensembl ID is taken. + * + * @param geneFile + * @throws IOException + * @throws DaoException + */ + + public static void importGeneLength(File geneFile, String genomeBuild, String species, boolean hasGenes) throws IOException, DaoException { + //Set the variables needed for the method + FileReader reader = new FileReader(geneFile); + BufferedReader buf = new BufferedReader(reader); + int referenceGenomeId = DaoReferenceGenome.getReferenceGenomeIdByName(genomeBuild, species); + String line; + ProgressMonitor.setCurrentMessage("\nUpdating gene lengths... \n"); //Display a message in the console + boolean geneUpdated = false; + + String previousEnsembl = ""; + String currentEnsembl = ""; + String previousSymbol = ""; + String currentSymbol = ""; + String previousChrom = ""; + String currentChrom = ""; + Long currentStart; + Long currentStop; + String cytoband = ""; + + String parts[] = null; + List loci = new ArrayList(); + int nrGenesUpdated = 0; + + //Iterate over the file and fill the hash map with the max and min values of each gene (start and end position) + while ((line=buf.readLine()) != null) { + if(line.charAt(0) == '#'){ + continue; + } + parts = line.split("\t"); + currentChrom = parts[0]; + cytoband = getCytoband(parts[7], species); + currentStart = Long.parseLong(parts[3]); + currentStop = Long.parseLong(parts[4]) + 1; // We have to add 1 here, because the last base is also included. + + if (parts[2].contains("exon") || parts[2].contains("CDS")) { + String info[] = parts[8].split(";"); + + //Retrieve the ensembl ID + for (String i : info) { + if (i.contains("gene_id")) { + String j[] = i.split(" "); + currentEnsembl = j[1].replaceAll("\"", ""); + } + else if (i.contains("gene_name")) { + String j[] = i.split(" "); + currentSymbol = j[2].replaceAll("\"", ""); + } + } + + /// Only in case of the first line + if (previousEnsembl.equals("")) { + previousEnsembl = currentEnsembl; + previousSymbol = currentSymbol; + previousChrom = currentChrom; + loci.add(new long[]{currentStart, currentStop}); //Add the new positions + } + /// For all other lines + else { + + /// If there is no switch from Ensembl ID + if (previousEnsembl.equals(currentEnsembl)) { + + loci.add(new long[]{currentStart, currentStop}); //Add the new positions + } + /// If there is a switch + else { + geneUpdated = updateLength(previousSymbol, previousChrom, loci, + referenceGenomeId, hasGenes, cytoband); + if (geneUpdated) { + nrGenesUpdated++; + } + /// At the end of writing a new gene, clear the loci and save the new ensemblID. + loci.clear(); + + previousEnsembl = currentEnsembl; + previousSymbol = currentSymbol; + previousChrom = currentChrom; + loci.add(new long[]{currentStart, currentStop}); //Add the new positions + } + } + } + } + + /// Write the last gene + /// First check if the gene exists in the database + geneUpdated = updateLength(previousSymbol, previousChrom, loci, referenceGenomeId, hasGenes, cytoband); + if (geneUpdated) { + nrGenesUpdated++; + } + + ProgressMonitor.setCurrentMessage("Updated length info for " + nrGenesUpdated + " genes\n"); + + buf.close(); + } + + /** + * This method receives a symbol, a chromosome and a list of loci (should be from the same gene), and with that it retrieves the database gene and it calculates the length + * of all its exons contained in loci. If the symbol is non-ambiguous, or the chromosome reported does not match the cytoband of the database gene, then length is not updated. + * The method reports a boolean stating if the gene length has been updated or not. + * + * @param symbol + * @param chromosome + * @param loci + * @return + * @throws IOException + * @throws DaoException + */ + public static boolean updateLength(String symbol, String chromosome, List loci, int refreneceGenomeId, + boolean hasGenes, String cytoband) throws IOException, DaoException { + DaoGeneOptimized daoGeneOptimized = DaoGeneOptimized.getInstance(); + boolean lengthUpdated = false; + /// Check if the gene is in the database + CanonicalGene gene = daoGeneOptimized.getNonAmbiguousGene(symbol, chromosome, false); //Identify unambiguously the gene (with the symbol and the chromosome) + DaoReferenceGenomeGene daoReferenceGenomeGene = DaoReferenceGenomeGene.getInstance(); + ReferenceGenomeGene refGene = DaoReferenceGenomeGene.getInstance().getGene(gene.getEntrezGeneId(), refreneceGenomeId); + /// If it's not in the database, don't add it + if (!(gene==null)) { + /// Calc length + long[] exonic = calculateGeneLength(loci); + + /// If there is no cytoband in the database, just write it (can also be an overwrite) + if (cytoband == null) { + if (hasGenes) { + daoGeneOptimized.updateGene(gene); + lengthUpdated = true; + } + } + + /// If there is a cytoband in database, check if cytoband-chr matches input-chr + else { + String cbChr = "chr"+cytoband.split("p|q")[0]; + if (cbChr.equals(chromosome)) { //Update the length only if the chromosome matches + if (hasGenes) { + //gene.setLength((int) exonic[2]); + daoGeneOptimized.updateGene(gene); + } + + // update reference genome gene + if (refGene == null) { + refGene = new ReferenceGenomeGene(gene.getGeneticEntityId(), refreneceGenomeId); + } + refGene.setEntrezGeneId(gene.getEntrezGeneId()); + refGene.setChr(chromosome.replace("chr", "")); + refGene.setCytoband(cytoband); + refGene.setExonicLength((int) exonic[2]); + refGene.setStart(exonic[0]); + refGene.setEnd(exonic[1]); + daoReferenceGenomeGene.addOrUpdateGene(refGene); + lengthUpdated = true; + } + else { + ProgressMonitor.logWarning("Cytoband does not match, gene not saved (likely another version of gene in gtf has correct chr and is saved)"); + } + } + } + return lengthUpdated; + } - static void importSuppGeneData(File suppGeneFile) throws IOException, DaoException { + /** + * This method uses a list of exon loci from the same gene and it adds the length of all of them to get the gene length. If some of the exons are + * overlapping, the overlapping part is only counted once in the calculation. For example, if an exon goes from position 3 to 10 and another one from + * position 5 to 11, when calculating the length these exons would be considered as a single exon going from position 3 to 11. + * + * @param loci + * @return + */ + public static long[] calculateGeneLength(List loci) { + long min = Long.MAX_VALUE, max=-1; + for (long[] l : loci) { + if (l[0]max) { + max = l[1]; + } + } + if (max < min) { + throw new IllegalArgumentException("Found error: max=" + max + ", min=" + min); + } + BitSet bitSet = new BitSet((int)(max-min)); + for (long[] l : loci) { + bitSet.set((int)(l[0]-min), ((int)(l[1]-min))); + } + + return new long[]{min, max, bitSet.cardinality()}; + } + + static void importSuppGeneData(File suppGeneFile, String referenceGenomeBuild) throws IOException, DaoException { MySQLbulkLoader.bulkLoadOff(); FileReader reader = new FileReader(suppGeneFile); BufferedReader buf = new BufferedReader(reader); String line; DaoGeneOptimized daoGene = DaoGeneOptimized.getInstance(); + DaoReferenceGenomeGene daoRefGene = DaoReferenceGenomeGene.getInstance(); + ReferenceGenome refGenome = DaoReferenceGenome.getReferenceGenomeByBuildName(referenceGenomeBuild); while ((line = buf.readLine()) != null) { ProgressMonitor.incrementCurValue(); ConsoleUtil.showProgress(); if (!line.startsWith("#")) { String parts[] = line.split("\t"); CanonicalGene gene = new CanonicalGene(parts[0]); + ReferenceGenomeGene refGene = new ReferenceGenomeGene( + gene.getEntrezGeneId(), + refGenome.getReferenceGenomeId()); if (!parts[1].isEmpty()) { gene.setType(parts[1]); } if (!parts[2].isEmpty()) { - gene.setCytoband(parts[2]); + refGene.setCytoband(parts[2]); } daoGene.addGene(gene); + daoRefGene.addOrUpdateGene(refGene); } } reader.close(); } - @Override + @Override public void run() { try { SpringUtil.initDataSource(); @@ -294,7 +505,9 @@ public void run() { parser.accepts( "genes", "ncbi genes file" ).withRequiredArg().describedAs( "ncbi_genes.txt" ).ofType( String.class ); parser.accepts( "supp-genes", "alternative genes file" ).withRequiredArg().describedAs( "supp-genes.txt" ).ofType( String.class ); parser.accepts( "microrna", "microrna file" ).withRequiredArg().describedAs( "microrna.txt" ).ofType( String.class ); - + parser.accepts( "gtf", "gtf file for calculating and storing gene lengths" ).withRequiredArg().describedAs( "gencode..annotation.gtf" ).ofType( String.class ); + parser.accepts( "genome-build", "genome build eg GRCh38" ).withRequiredArg().describedAs( "genome build" ).ofType( String.class ); + parser.accepts( "species", "different kinds of organisms eg. humna").withRequiredArg().describedAs( "species" ).ofType( String.class ); String progName = "importGenes"; OptionSet options = null; try { @@ -330,7 +543,8 @@ public void run() { numLines = FileUtil.getNumLines(suppGeneFile); System.out.println(" --> total number of lines: " + numLines); ProgressMonitor.setMaxValue(numLines); - ImportGeneData.importSuppGeneData(suppGeneFile); + ImportGeneData.importSuppGeneData(suppGeneFile, + (String)options.valueOf("genome-build")); } if(options.has("microrna")) { @@ -341,6 +555,20 @@ public void run() { ProgressMonitor.setMaxValue(numLines); ImportMicroRNAIDs.importData(miRNAFile); } + + if(options.has("gtf")) { + File lociFile = new File((String) options.valueOf("gtf")); + String species = ReferenceGenome.HOMO_SAPIENS; + if (options.has("species")) { + species = (String)options.valueOf("species"); + } + System.out.println("Reading loci data from: " + lociFile.getAbsolutePath()); + numLines = FileUtil.getNumLines(lociFile); + System.out.println(" --> total number of lines: " + numLines); + ProgressMonitor.setMaxValue(numLines); + ImportGeneData.importGeneLength(lociFile, (String)options.valueOf("genome-build"), + species, options.has("genes")); + } MySQLbulkLoader.flushAll(); System.err.println("Done. Restart tomcat to make sure the cache is replaced with the new data."); diff --git a/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportReferenceGenome.java b/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportReferenceGenome.java new file mode 100644 index 00000000000..417c3b22e6b --- /dev/null +++ b/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportReferenceGenome.java @@ -0,0 +1,184 @@ +/* + * This file is part of cBioPortal. + * + * cBioPortal is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +package org.mskcc.cbio.portal.scripts; + +import org.apache.commons.lang3.StringUtils; +import org.mskcc.cbio.portal.dao.*; +import org.mskcc.cbio.portal.model.ReferenceGenome; +import org.mskcc.cbio.portal.util.*; + +import joptsimple.OptionException; +import joptsimple.OptionParser; +import joptsimple.OptionSet; +import joptsimple.OptionSpec; + +import java.io.*; +import java.util.*; +import java.text.SimpleDateFormat; +import java.text.ParseException; + +/** + * Command Line Tool to Import Reference Genome Used by Molecular Profiling. + */ +public class ImportReferenceGenome extends ConsoleRunnable { + + /** + * Adds the genes parsed from the file into the Database. + * + * @param referenceGenomeFile File with reference genome information + * @throws IOException + * @throws DaoException + */ + public static void importData(File referenceGenomeFile) throws IOException, DaoException, ParseException { + + try (FileReader reader = new FileReader(referenceGenomeFile)) { + BufferedReader buf = new BufferedReader(reader); + String line; + Set referenceGenomes = new HashSet(); + while ((line = buf.readLine()) != null) { + ProgressMonitor.incrementCurValue(); + ConsoleUtil.showProgress(); + if (line.startsWith("#")) { + continue; + } + String parts[] = line.split("\t"); + String species = parts[0]; + String name = parts[1]; + String buildName = parts[2]; + String genomeSize = parts[3]; + String url = parts[4]; + String releaseDate = parts[5]; + + ReferenceGenome referenceGenome = new ReferenceGenome(name, species, buildName); + if (StringUtils.isNotEmpty(url)) { + referenceGenome.setUrl(url); + } + + if (StringUtils.isNotEmpty(genomeSize)) { + referenceGenome.setGenomeSize(Long.parseLong(genomeSize)); + } + + if (StringUtils.isNotEmpty(releaseDate)) { + SimpleDateFormat sdf = new SimpleDateFormat("yyyy-mm-dd"); + referenceGenome.setReleaseDate(sdf.parse(releaseDate)); + + } + referenceGenomes.add(referenceGenome); + } + addReferenceGenomesToDB(referenceGenomes); + } + + } + + /** + * Iterate over the genes found in the given maps and try to add them to the DB. + * + * @param referenceGenomes: reference genomes + * @throws DaoException + */ + private static void addReferenceGenomesToDB(Set referenceGenomes) throws DaoException { + + + int nrExisting = 0; + for (ReferenceGenome refGenome: referenceGenomes) { + if (DaoReferenceGenome.getReferenceGenomeByInternalId(refGenome.getReferenceGenomeId()) != null) { + ProgressMonitor.logWarning("Reference genome updated"); + int rows = DaoReferenceGenome.updateReferenceGenome(refGenome); + if (rows != 1) { + ProgressMonitor.logWarning("No change for " + refGenome.getGenomeName()); + } + } else { + ProgressMonitor.logWarning("New reference genome added"); + DaoReferenceGenome.addReferenceGenome(refGenome); + } + } + } + + + @Override + public void run() { + try { + SpringUtil.initDataSource(); + + String description = "Update reference_genome table "; + + // using a real options parser, helps avoid bugs + OptionParser parser = new OptionParser(); + OptionSpec help = parser.accepts( "help", "print this help info" ); + parser.accepts( "ref-genome", "reference genome file" ).withRequiredArg().describedAs("reference_genomes.txt").ofType( String.class ); + + String progName = "importReferenceGenomes"; + OptionSet options = null; + try { + options = parser.parse( args ); + } catch (OptionException e) { + throw new UsageException(progName, description, parser, + e.getMessage()); + } + + if( options.has( help ) ){ + throw new UsageException(progName, description, parser); + } + + ProgressMonitor.setConsoleMode(true); + + File referenceGenomeFile; + int numLines; + if(options.has("ref-genome")) { + File referenceFile = new File((String) options.valueOf("ref-genome")); + + System.out.println("Reading reference genome from: " + referenceFile.getAbsolutePath()); + numLines = FileUtil.getNumLines(referenceFile); + System.out.println(" --> total number of lines: " + numLines); + ProgressMonitor.setMaxValue(numLines); + MySQLbulkLoader.bulkLoadOn(); + ImportReferenceGenome.importData(referenceFile); + } + + MySQLbulkLoader.flushAll(); + System.err.println("Done. Restart tomcat to make sure the cache is replaced with the new data."); + + } + catch (RuntimeException e) { + throw e; + } + catch (Exception e) { + throw new RuntimeException(e); + } + } + + /** + * Makes an instance to run with the given command line arguments. + * + * @param args the command line arguments to be used + */ + public ImportReferenceGenome(String[] args) { + super(args); + } + + /** + * Runs the command as a script and exits with an appropriate exit code. + * + * @param args the arguments given on the command line + */ + public static void main(String[] args) { + ConsoleRunnable runner = new ImportReferenceGenome(args); + runner.runInConsole(); + } + +} \ No newline at end of file diff --git a/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportTabDelimData.java b/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportTabDelimData.java index 418ad942a0a..79229f451bd 100644 --- a/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportTabDelimData.java +++ b/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportTabDelimData.java @@ -825,7 +825,6 @@ private List importPhosphoGene(List genes, String ProgressMonitor.logWarning("Phosphoprotein " + phosphoSymbol + " not yet known in DB. Adding it to `gene` table with 3 aliases in `gene_alias` table."); phosphoGene = new CanonicalGene(phosphoSymbol, aliases); phosphoGene.setType(CanonicalGene.PHOSPHOPROTEIN_TYPE); - phosphoGene.setCytoband(gene.getCytoband()); daoGene.addGene(phosphoGene); } phosphoGenes.add(phosphoGene); diff --git a/core/src/main/java/org/mskcc/cbio/portal/servlet/CnaJSON.java b/core/src/main/java/org/mskcc/cbio/portal/servlet/CnaJSON.java index 21db27b7331..d48d53a7873 100644 --- a/core/src/main/java/org/mskcc/cbio/portal/servlet/CnaJSON.java +++ b/core/src/main/java/org/mskcc/cbio/portal/servlet/CnaJSON.java @@ -430,7 +430,6 @@ private void exportCnaEvent(Map data, Map mapMutatio CanonicalGene gene = daoGeneOptimized.getGene(cnaEvent.getEntrezGeneId()); String symbol = gene.getHugoGeneSymbolAllCaps(); data.get("gene").add(symbol); - data.get("cytoband").add(gene.getCytoband()); data.get("entrez").add(cnaEvent.getEntrezGeneId()); data.get("alter").add(cnaEvent.getAlteration().getCode()); data.get("mrna").add(mrna); diff --git a/core/src/main/java/org/mskcc/cbio/portal/servlet/GetCoExpressionJSON.java b/core/src/main/java/org/mskcc/cbio/portal/servlet/GetCoExpressionJSON.java index 5b04f61518b..f33402a9b07 100644 --- a/core/src/main/java/org/mskcc/cbio/portal/servlet/GetCoExpressionJSON.java +++ b/core/src/main/java/org/mskcc/cbio/portal/servlet/GetCoExpressionJSON.java @@ -186,7 +186,6 @@ protected void doPost(HttpServletRequest httpServletRequest, CanonicalGene comparedGene = daoGeneOptimized.getGene(compared_gene_id); ObjectNode _scores = mapper.createObjectNode(); _scores.put("gene", comparedGene.getHugoGeneSymbolAllCaps()); - _scores.put("cytoband", comparedGene.getCytoband()); _scores.put("pearson", pearson); _scores.put("spearman", spearman); fullResultJson.add(_scores); @@ -242,8 +241,7 @@ protected void doPost(HttpServletRequest httpServletRequest, double spearman = spearmansCorrelation.correlation(new_query_gene_exp, new_compared_gene_exp); CanonicalGene comparedGene = daoGeneOptimized.getGene(compared_gene_id); fullResutlStr.append( - comparedGene.getHugoGeneSymbolAllCaps() + "\t" + - comparedGene.getCytoband() + "\t" + + comparedGene.getHugoGeneSymbolAllCaps() + "\t" + "\t" + (double) Math.round(pearson * 100) / 100 + "\t" + (double) Math.round(spearman * 100) / 100 + "\n" ); diff --git a/core/src/main/java/org/mskcc/cbio/portal/servlet/MutationsJSON.java b/core/src/main/java/org/mskcc/cbio/portal/servlet/MutationsJSON.java index 6660d3e99f1..df259663f36 100644 --- a/core/src/main/java/org/mskcc/cbio/portal/servlet/MutationsJSON.java +++ b/core/src/main/java/org/mskcc/cbio/portal/servlet/MutationsJSON.java @@ -212,9 +212,6 @@ private void processGetSmgRequest(HttpServletRequest request, String hugo = gene.getHugoGeneSymbolAllCaps(); map.put("gene_symbol", hugo); - String cytoband = gene.getCytoband(); - map.put("cytoband", cytoband); - Integer count = Integer.parseInt(entry.getValue().get("count")); map.put("num_muts", count); diff --git a/core/src/main/java/org/mskcc/cbio/portal/util/CancerStudyReader.java b/core/src/main/java/org/mskcc/cbio/portal/util/CancerStudyReader.java index bb4db99129f..d48de86deff 100644 --- a/core/src/main/java/org/mskcc/cbio/portal/util/CancerStudyReader.java +++ b/core/src/main/java/org/mskcc/cbio/portal/util/CancerStudyReader.java @@ -35,6 +35,7 @@ import org.mskcc.cbio.portal.dao.DaoCancerStudy; import org.mskcc.cbio.portal.dao.DaoException; import org.mskcc.cbio.portal.model.CancerStudy; +import org.mskcc.cbio.portal.model.ReferenceGenome; import org.mskcc.cbio.portal.scripts.TrimmedProperties; import java.io.File; @@ -104,6 +105,11 @@ private static CancerStudy getCancerStudy(TrimmedProperties properties) cancerStudy.setCitation(properties.getProperty("citation")); cancerStudy.setGroupsInUpperCase(properties.getProperty("groups")); cancerStudy.setShortName(shortName); + String referenceGenome = properties.getProperty("reference_genome"); + if (referenceGenome == null) { + referenceGenome = ReferenceGenome.HOMO_SAPIENS_DEFAULT_GENOME_NAME; + } + cancerStudy.setReferenceGenome(referenceGenome); return cancerStudy; } diff --git a/core/src/main/java/org/mskcc/cbio/portal/util/EnrichmentsAnalysisUtil.java b/core/src/main/java/org/mskcc/cbio/portal/util/EnrichmentsAnalysisUtil.java index 5d5745f5dfd..418e9a22ba0 100644 --- a/core/src/main/java/org/mskcc/cbio/portal/util/EnrichmentsAnalysisUtil.java +++ b/core/src/main/java/org/mskcc/cbio/portal/util/EnrichmentsAnalysisUtil.java @@ -38,10 +38,17 @@ import org.apache.commons.math3.stat.inference.TestUtils; import org.codehaus.jackson.map.ObjectMapper; import org.codehaus.jackson.node.ObjectNode; +import org.mskcc.cbio.portal.dao.DaoCancerStudy; import org.mskcc.cbio.portal.dao.DaoException; import org.mskcc.cbio.portal.dao.DaoGeneOptimized; +import org.mskcc.cbio.portal.dao.DaoGeneticProfile; +import org.mskcc.cbio.portal.dao.DaoReferenceGenome; +import org.mskcc.cbio.portal.dao.DaoReferenceGenomeGene; import org.mskcc.cbio.portal.dao.DaoGeneticAlteration; +import org.mskcc.cbio.portal.model.CanonicalGene; import org.mskcc.cbio.portal.model.GeneticAlterationType; +import org.mskcc.cbio.portal.model.ReferenceGenome; +import org.mskcc.cbio.portal.model.Gene; import org.mskcc.cbio.portal.stats.FisherExact; /** @@ -109,8 +116,10 @@ public ObjectNode processMutHm(long entrezGeneId, ArrayList sampleList, } //get Gene Name and Cytoband DaoGeneOptimized daoGeneOptimized = DaoGeneOptimized.getInstance(); - String geneName = daoGeneOptimized.getGene(entrezGeneId).getHugoGeneSymbolAllCaps(); - String cytoband = daoGeneOptimized.getGene(entrezGeneId).getCytoband(); + Gene gene = daoGeneOptimized.getGene(entrezGeneId); + String geneName = ((CanonicalGene) gene).getHugoGeneSymbolAllCaps(); + String cytoband = getCytoband(((CanonicalGene) gene).getGeneticEntityId(), geneticProfileStableId); + //statistics analysis if (!(Arrays.asList(queriedGenes)).contains(geneName)) { //remove queried genes from result _datum.put(COL_NAME_GENE, geneName); @@ -150,8 +159,9 @@ public ObjectNode process(long entrezGeneId, String[] values, ArrayList } //get Gene Name and Cytoband DaoGeneOptimized daoGeneOptimized = DaoGeneOptimized.getInstance(); - String geneName = daoGeneOptimized.getGene(entrezGeneId).getHugoGeneSymbolAllCaps(); - String cytoband = daoGeneOptimized.getGene(entrezGeneId).getCytoband(); + Gene gene = daoGeneOptimized.getGene(entrezGeneId); + String geneName = ((CanonicalGene) gene).getHugoGeneSymbolAllCaps(); + String cytoband = getCytoband(((CanonicalGene) gene).getGeneticEntityId(), geneticProfileStableId); if (cytoband == null || cytoband.length() == 0) { cytoband = "--"; } @@ -642,4 +652,20 @@ private double runFisherExactTest(HashMap singleGeneCaseValueMa FisherExact fisher = new FisherExact(a + b + c + d); return fisher.getCumlativeP(a, b, c, d); } + + private String getCytoband(int geneticEntityId, String geneticProfileStableId) { + try { + int cancerStudyId = DaoGeneticProfile.getGeneticProfileByStableId(geneticProfileStableId).getCancerStudyId(); + String genomeName = null; + try { + genomeName = DaoCancerStudy.getCancerStudyByInternalId(cancerStudyId).getReferenceGenome(); + } catch (NullPointerException ne) { + genomeName = ReferenceGenome.HOMO_SAPIENS_DEFAULT_GENOME_NAME; + } + int genomeId = DaoReferenceGenome.getReferenceGenomeByGenomeName(genomeName).getReferenceGenomeId(); + return DaoReferenceGenomeGene.getInstance().getGene(geneticEntityId, genomeId).getCytoband(); + } catch (DaoException e) { + return null; + } + } } diff --git a/core/src/main/scripts/importer/cbio_importer.py b/core/src/main/scripts/importer/cbio_importer.py new file mode 100644 index 00000000000..1d805b52400 --- /dev/null +++ b/core/src/main/scripts/importer/cbio_importer.py @@ -0,0 +1,185 @@ +import sys +import logging +import argparse +from sqlalchemy import create_engine +from sqlalchemy import exc +import smtplib +from email.mime.multipart import MIMEMultipart +from email.mime.text import MIMEText +import cbioportalImporter +import validateData + +def send_mail(physician, patient, sample): + me = "cbioportal@uhnresearch.ca" + physician = "kzhu@uhnresearch.ca" + + # Create message container - the correct MIME type is multipart/alternative. + msg = MIMEMultipart('alternative') + msg['Subject'] = "Link" + msg['From'] = me + msg['To'] = physician + + #http://localhost:8081/cbioportal/case.do#/patient?studyId=OCTANE&sampleId=OCT-01-0001_Tumour + # Create the body of the message (a plain-text and an HTML version). + text = "Dear Dr. %s!\nnew sample is available for your patient %s\n"\ + "Here is the link to view new sample:\n"\ + "http://localhost:8081/cbioportal/case.do#/patient?studyId=OCTANE&sampleId=%s"%(physician, patient, sample) + html = """\ + + New Sample Availabe for your Review + +

Dear Dr. %s
+ new sample is available for your patient %s
+ Here is the link + to view new sample. +

+ + + """%(physician, patient, sample) + + # Record the MIME types of both parts - text/plain and text/html. + part1 = MIMEText(text, 'plain') + part2 = MIMEText(html, 'html') + + # Attach parts into message container. + # According to RFC 2046, the last part of a multipart message, in this case + # the HTML message, is best and preferred. + msg.attach(part1) + msg.attach(part2) + + # Send the message via local SMTP server. + s = smtplib.SMTP('smtp.uhnresearch.ca') + # sendmail function takes 3 arguments: sender's address, recipient's address + # and message to send - here it is sent as one string. + s.sendmail(me, physician, msg.as_string()) + s.quit() + +def get_options(): + parser = argparse.ArgumentParser(description='cBioPortal Importer') + parser.add_argument('-u', '--url_server', + type=str, + default='http://localhost/cbioportal', + help='URL to cBioPortal server. You can ' + 'set this if your URL is not ' + 'http://localhost/cbioportal') + parser.add_argument('-html', '--html_table', type=str, required=False, + help='path to html report output file') + parser.add_argument('-s', '--study_directory', type=str, required=False, + help='path to directory.') + parser.add_argument('-r', '--relaxed_clinical_definitions', required=False, + action='store_true', default=False, + help='Option to enable relaxed mode for validator when ' + 'validating clinical data without header definitions') + parser.add_argument('-m', '--strict_maf_checks', required=False, + action='store_true', default=False, + help='Option to enable strict mode for validator when ' + 'validating mutation data') + parser.add_argument('-n', '--no_portal_checks', default=False, + action='store_true', + help='Skip tests requiring information ' + 'from the cBioPortal installation') + parser.add_argument('-P', '--portal_properties', type=str, + help='portal.properties file path (default: assumed hg19)', + required=False) + parser.add_argument('-jar', '--jar_path', type=str, required=False, + help='Path to scripts JAR file (default: $PORTAL_HOME/scripts/target/scripts-*.jar)') + parser.add_argument('-c', '--cancer_study', type=str, required=True, + help='Cancer study identifier') + parser.add_argument('-o', '--override_warning', action='store_true', + help='override warnings and continue importing') + parser.add_argument('-v', '--verbose', required=False, action='store_true', + help='report status info messages in addition ') + parser = parser.parse_args() + return parser + +def get_sample_info(connection): + try: + sql_str = """ + select p.stable_id as patient_id, s.STABLE_ID as sample_id, cp.ATTR_VALUE as physician + from patient p + join sample s on s.PATIENT_ID = p.INTERNAL_ID + join clinical_patient cp on cp.INTERNAL_ID = p.INTERNAL_ID + where p. CANCER_STUDY_ID = %s + and cp.ATTR_ID = 'TREATING_PHYSICIAN' + """%(cancer_study_id) + return connection.execute(sql_str) + except: + raise + +def get_study_id(cancer_study_identifier): + try: + sql_str = """ + select cancer_study_id from cancer_study where CANCER_STUDY_IDENTIFIER = '%s' + """%cancer_study_identifier + result = connection.execute(sql_str) + for row in result: + return row['cancer_study_id'] + except: + raise + +def get_db_connection(): + try: + # mysql-python + engine = create_engine('mysql+mysqldb://cbio_user:cbi0pass@localhost/cgds') + return engine.connect() + except exc.SQLAlchemyError: + raise + +def get_logger(): + logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') + return logging.getLogger('cbio_importer') + +def get_sample_dic(sample_list): + res_dic = {} + for row in sample_list: + patient_id = row['patient_id'] + sample_id = row['sample_id'] + if patient_id not in res_dic.keys(): + res_dic[patient_id] = [sample_id] + res_dic['physician'] = row['physician'] + else: + res_dic[patient_id].append(sample_id) + return res_dic + +if __name__ == '__main__': + # Parse user input + args = get_options() + + + connection = get_db_connection() + logger = get_logger() + + logger.info(args) + + cancer_study_id = get_study_id(args.cancer_study) + logger.info("Cancer Study ID: %s"%cancer_study_id) + + try: + logger.info("check exisiting samples...") + sample_list_old = get_sample_info(connection) + + old_samples = get_sample_dic(sample_list_old) + logger.info(old_samples) + + # Import study + # exit_code = validateData.main_validate(args) + # if not exit_code in [1,2]: + # cbioportalImporter.main(args) + # else: + # logger.error('Validation of study {status}.'.format( + # status={0: 'succeeded', + # 1: 'failed', + # 2: 'not performed as problems occurred', + # 3: 'succeeded with warnings'}.get(exit_code, 'unknown'))) + # system.exit(exit_code) + # get sample list again + logger.info("check new samples...") + sample_list_new = get_sample_info(connection) + new_samples = get_sample_dic(sample_list_new) + logger.info(new_samples) + # try import study + except exc.SQLAlchemyError as e: + logger.error(e.message) + + finally: + connection.close() \ No newline at end of file diff --git a/core/src/main/scripts/importer/cbioportal_common.py b/core/src/main/scripts/importer/cbioportal_common.py index 5ee61824861..ae5e01f3635 100644 --- a/core/src/main/scripts/importer/cbioportal_common.py +++ b/core/src/main/scripts/importer/cbioportal_common.py @@ -778,7 +778,10 @@ def parse_metadata_file(filename, ) if meta_file_type in (MetaFileTypes.SEG, MetaFileTypes.GISTIC_GENES): - valid_segment_reference_genomes = ['hg19'] + # Todo: Restore validation for reference genome in segment files + # Validation can be restored to normal when hg18 data on public portal and data hub has been + # liftovered to hg19. It was decided in the data hub call of August 14 2018 to remove validation until then. + valid_segment_reference_genomes = ['hg19','hg38'] if meta_dictionary['reference_genome_id'] not in valid_segment_reference_genomes: logger.error( 'Reference_genome_id is not %s', diff --git a/core/src/main/scripts/importer/metaImport.py b/core/src/main/scripts/importer/metaImport.py index 3bbad84b8cf..5ead265f68a 100755 --- a/core/src/main/scripts/importer/metaImport.py +++ b/core/src/main/scripts/importer/metaImport.py @@ -71,9 +71,15 @@ def interface(): action='store_true', help='Skip tests requiring information ' 'from the cBioPortal installation') - parser.add_argument('-P', '--portal_properties', type=str, - help='portal.properties file path (default: assumed hg19)', - required=False) + parser.add_argument('-species', '--species', type=str, default='human', + help='species information (default: assumed human)', + required=False) + parser.add_argument('-genome', '--reference_genome', type=str, default='hg19', + help='reference genome build (default: assumed hg19)', + required=False) + parser.add_argument('-build', '--genome_build', type=str, default='37', + help='reference genome build (default: assumed 37 for reference genome hg19)', + required=False) parser.add_argument('-jar', '--jar_path', type=str, required=False, help=( 'Path to scripts JAR file (default: locate it ' diff --git a/core/src/main/scripts/importer/validateData.py b/core/src/main/scripts/importer/validateData.py index ce5fbdb2814..4b5d8f2cc9a 100755 --- a/core/src/main/scripts/importer/validateData.py +++ b/core/src/main/scripts/importer/validateData.py @@ -293,21 +293,34 @@ def __init__(self, cancer_type_dict, hugo_entrez_map, alias_entrez_map, gene_set self.species = 'human' self.ncbi_build = '37' self.genome_build = 'hg19' - - def load_genome_info(self, properties_filename): - """Retrieves the species and genome information from portal.properties.""" - with open(properties_filename, 'r') as properties_file: - for line in properties_file: - line = line.strip() - if line.startswith('#') or '=' not in line: - continue - sp_line = line.split('=', 1) - if sp_line[0] == 'species': - self.species = sp_line[1] - elif sp_line[0] == 'ncbi.build': - self.ncbi_build = sp_line[1] - elif sp_line[0] == 'ucsc.build': - self.genome_build = sp_line[1] + #Set defaults for genome version and species + self.__species = 'human' + self.__ncbi_build = '37' + self.__genome_build = 'hg19' + + @property + def species(self): + return self.__species + + @species.setter + def species(self, species): + self.__species= species + + @property + def genome_build(self): + return self.__genome_build + + @genome_build.setter + def genome_build(self, genome_build): + self.__genome_build= genome_build + + @property + def ncbi_build(self): + return self.__ncbi_build + + @ncbi_build.setter + def ncbi_build(self, ncbi_build): + self.__ncbi_build = ncbi_build class Validator(object): @@ -4314,29 +4327,29 @@ def validate_defined_caselists(cancer_study_id, case_list_ids, file_types, logge "'add_global_case_list: true' to the meta_study.txt file", cancer_study_id + '_all') - if 'meta_mutations_extended' in file_types: - if cancer_study_id + '_sequenced' not in case_list_ids: - logger.error( - "No case list found with stable_id '%s', please add this " - "case list to specify which samples are profiled for mutations. This " - "is required for calculation of samples with mutations in OncoPrint and Study Summary.", - cancer_study_id + '_sequenced') - - if 'meta_CNA' in file_types: - if cancer_study_id + '_cna' not in case_list_ids: - logger.error( - "No case list found with stable_id '%s', please add this " - "case list to specify which samples are profiled for mutations. This " - "is required for calculation of samples with CNA in OncoPrint and Study Summary.", - cancer_study_id + '_cna') - - if 'meta_mutations_extended' in file_types and 'meta_CNA' in file_types: - if cancer_study_id + '_cnaseq' not in case_list_ids: - logger.warning( - "No case list found with stable_id '%s', please add this " - "case list to specify which samples are profiled for this data type. On the query page, this " - "case list will be selected by default when both mutation and CNA data are available.", - cancer_study_id + '_cnaseq') + if 'meta_mutations_extended' in file_types: + if cancer_study_id + '_sequenced' not in case_list_ids: + logger.error( + "No case list found with stable_id '%s', please add this " + "case list to specify which samples are profiled for mutations. This " + "is required for calculation of samples with mutations in OncoPrint and Study Summary.", + cancer_study_id + '_sequenced') + + if 'meta_CNA' in file_types: + if cancer_study_id + '_cna' not in case_list_ids: + logger.error( + "No case list found with stable_id '%s', please add this " + "case list to specify which samples are profiled for mutations. This " + "is required for calculation of samples with CNA in OncoPrint and Study Summary.", + cancer_study_id + '_cna') + + if 'meta_mutations_extended' in file_types and 'meta_CNA' in file_types: + if cancer_study_id + '_cnaseq' not in case_list_ids: + logger.warning( + "No case list found with stable_id '%s', please add this " + "case list to specify which samples are profiled for this data type. On the query page, this " + "case list will be selected by default when both mutation and CNA data are available.", + cancer_study_id + '_cnaseq') def validateStudyTags(tags_file_path, logger): """Validate the study tags file.""" @@ -4614,9 +4627,15 @@ def interface(args=None): action='store_true', help='Skip tests requiring information ' 'from the cBioPortal installation') - parser.add_argument('-P', '--portal_properties', type=str, - help='portal.properties file path (default: assumed hg19)', + parser.add_argument('-species', '--species', type=str, default='human', + help='species information (default: assumed human)', required=False) + parser.add_argument('-genome', '--reference_genome', type=str, default='hg19', + help='reference genome build (default: assumed hg19)', + required=False) + parser.add_argument('-build', '--genome_build', type=str, default='37', + help='reference genome build (default: assumed 37 for reference genome hg19)', + required=False) parser.add_argument('-html', '--html_table', type=str, required=False, help='path to html report output file') parser.add_argument('-e', '--error_file', type=str, required=False, @@ -4921,9 +4940,11 @@ def main_validate(args): else: portal_instance = load_portal_info(server_url, logger) - if args.portal_properties: - portal_instance.load_genome_info(args.portal_properties) - + # specify species and genomic information + portal_instance.species = args.species + portal_instance.genome_build = args.reference_genome + portal_instance.ncbi_build = args.genome_build + validate_study(study_dir, portal_instance, logger, relaxed_mode, strict_maf_checks) if html_handler is not None: diff --git a/core/src/test/java/org/mskcc/cbio/portal/dao/TestDaoCancerStudy.java b/core/src/test/java/org/mskcc/cbio/portal/dao/TestDaoCancerStudy.java index 596d70eb6ee..8a20dc5de01 100644 --- a/core/src/test/java/org/mskcc/cbio/portal/dao/TestDaoCancerStudy.java +++ b/core/src/test/java/org/mskcc/cbio/portal/dao/TestDaoCancerStudy.java @@ -40,6 +40,7 @@ import java.io.IOException; import java.util.ArrayList; +import org.mskcc.cbio.portal.model.ReferenceGenome; import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; import org.springframework.test.context.transaction.TransactionConfiguration; @@ -129,14 +130,17 @@ public void testDaoCancerStudy2() throws DaoException, IOException { CancerStudy cancerStudy1 = new CancerStudy("GBM public study x", "GBM Description", "tcga_gbm1", "brca", true); + cancerStudy1.setReferenceGenome(ReferenceGenome.HOMO_SAPIENS_DEFAULT_GENOME_NAME); DaoCancerStudy.addCancerStudy(cancerStudy1); CancerStudy cancerStudy2 = new CancerStudy("GBM private study x", "GBM Description 2", "tcga_gbm2", "brca", false); + cancerStudy2.setReferenceGenome(ReferenceGenome.HOMO_SAPIENS_DEFAULT_GENOME_NAME); DaoCancerStudy.addCancerStudy(cancerStudy2); CancerStudy cancerStudy3 = new CancerStudy("Breast", "Breast Description", "tcga_gbm3", "brca", false); + cancerStudy3.setReferenceGenome(ReferenceGenome.HOMO_SAPIENS_DEFAULT_GENOME_NAME); DaoCancerStudy.addCancerStudy(cancerStudy3); ArrayList list = DaoCancerStudy.getAllCancerStudies(); diff --git a/core/src/test/java/org/mskcc/cbio/portal/scripts/TestImportGeneData.java b/core/src/test/java/org/mskcc/cbio/portal/scripts/TestImportGeneData.java index df3f3fcca3e..374783e4570 100644 --- a/core/src/test/java/org/mskcc/cbio/portal/scripts/TestImportGeneData.java +++ b/core/src/test/java/org/mskcc/cbio/portal/scripts/TestImportGeneData.java @@ -40,7 +40,10 @@ import java.io.File; import org.mskcc.cbio.portal.dao.DaoGeneOptimized; +import org.mskcc.cbio.portal.dao.DaoReferenceGenomeGene; import org.mskcc.cbio.portal.model.CanonicalGene; +import org.mskcc.cbio.portal.model.ReferenceGenome; +import org.mskcc.cbio.portal.model.ReferenceGenomeGene; import org.mskcc.cbio.portal.util.ProgressMonitor; import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; @@ -65,11 +68,11 @@ public void testImportGeneData() throws Exception { DaoGeneOptimized daoGene = DaoGeneOptimized.getInstance(); ProgressMonitor.setConsoleMode(false); + /* those isoforms from MSKCC clinical bioinformatics pipeline need to be manually added File file = new File("src/test/resources/supp-genes.txt"); - - ImportGeneData.importSuppGeneData(file); + ImportGeneData.importSuppGeneData(file, ReferenceGenome.HOMO_SAPIENS_DEFAULT_GENOME_BUILD);*/ - file = new File("src/test/resources/genes_test.txt"); + File file = new File("src/test/resources/genes_test.txt"); ImportGeneData.importData(file); CanonicalGene gene = daoGene.getGene(10); diff --git a/core/src/test/java/org/mskcc/cbio/portal/scripts/TestImportReferenceGenome.java b/core/src/test/java/org/mskcc/cbio/portal/scripts/TestImportReferenceGenome.java new file mode 100644 index 00000000000..d269ae7f61f --- /dev/null +++ b/core/src/test/java/org/mskcc/cbio/portal/scripts/TestImportReferenceGenome.java @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2015 Memorial Sloan-Kettering Cancer Center. + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY, WITHOUT EVEN THE IMPLIED WARRANTY OF MERCHANTABILITY OR FITNESS + * FOR A PARTICULAR PURPOSE. The software and documentation provided hereunder + * is on an "as is" basis, and Memorial Sloan-Kettering Cancer Center has no + * obligations to provide maintenance, support, updates, enhancements or + * modifications. In no event shall Memorial Sloan-Kettering Cancer Center be + * liable to any party for direct, indirect, special, incidental or + * consequential damages, including lost profits, arising out of the use of this + * software and its documentation, even if Memorial Sloan-Kettering Cancer + * Center has been advised of the possibility of such damage. + */ + +/* + * This file is part of cBioPortal. + * + * cBioPortal is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +package org.mskcc.cbio.portal.scripts; + +import org.junit.Test; +import org.junit.runner.RunWith; + +import static org.junit.Assert.assertEquals; + +import java.io.File; + +import org.mskcc.cbio.portal.dao.DaoReferenceGenome; +import org.mskcc.cbio.portal.model.ReferenceGenome; +import org.mskcc.cbio.portal.util.ProgressMonitor; +import org.springframework.test.context.ContextConfiguration; +import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; +import org.springframework.test.context.transaction.TransactionConfiguration; +import org.springframework.transaction.annotation.Transactional; + +/** + * JUnit tests for ImportGeneData class. + */ +@RunWith(SpringJUnit4ClassRunner.class) +@ContextConfiguration(locations = { "classpath:/applicationContext-dao.xml" }) +@TransactionConfiguration(transactionManager = "transactionManager", defaultRollback = true) +@Transactional +public class TestImportReferenceGenome { + + @Test + /* + * Checks that ImportGeneData works by calculating the length from three genes + * in genes_test.txt. The file genes_test.txt contains real data. + */ + public void testImportReferenceGenome() throws Exception { + + ProgressMonitor.setConsoleMode(false); + + File file = new File("src/test/resources/reference_genomes.txt"); + + ImportReferenceGenome.importData(file); + + ReferenceGenome genome = DaoReferenceGenome.getReferenceGenomeByInternalId(1); + assertEquals("GRCh37", genome.getBuildName()); + assertEquals(1, DaoReferenceGenome.getReferenceGenomeIdByName("GRCh37")); + + + } + +} \ No newline at end of file diff --git a/core/src/test/resources/cancer_study.txt b/core/src/test/resources/cancer_study.txt index 6a1e5025416..5c88e1435e5 100644 --- a/core/src/test/resources/cancer_study.txt +++ b/core/src/test/resources/cancer_study.txt @@ -2,4 +2,5 @@ cancer_study_identifier: test_brca type_of_cancer: brca name: Breast Cancer study 1 description: Breast cancer data. Description to be added. -short_name: Breast (Test) \ No newline at end of file +short_name: Breast (Test) +reference_genome: hg19 \ No newline at end of file diff --git a/core/src/test/resources/reference_genomes.txt b/core/src/test/resources/reference_genomes.txt new file mode 100644 index 00000000000..5bdc713cc2d --- /dev/null +++ b/core/src/test/resources/reference_genomes.txt @@ -0,0 +1,4 @@ +#species name build_name nonN_bases URL release_date +#human hg19 GRCh37 2897310462 http://hgdownload.cse.ucsc.edu/goldenPath/hg19 2009-02-01 00:00:00 +#human hg38 GRCh38 3049315783 http://hgdownload.cse.ucsc.edu/goldenPath/hg38 2013-12-24 00:00:00 +mouse mm10 GRCm38 2652783500 http://hgdownload.cse.ucsc.edu/goldenPath/mm10 2011-12-01 00:00:00 \ No newline at end of file diff --git a/core/src/test/resources/seed_mini.sql b/core/src/test/resources/seed_mini.sql index 3159386771c..019cbdc6c8d 100644 --- a/core/src/test/resources/seed_mini.sql +++ b/core/src/test/resources/seed_mini.sql @@ -95,73 +95,63 @@ INSERT INTO "type_of_cancer" ("TYPE_OF_CANCER_ID","NAME","CLINICAL_TRIAL_KEYWORD INSERT INTO "type_of_cancer" ("TYPE_OF_CANCER_ID","NAME","CLINICAL_TRIAL_KEYWORDS","DEDICATED_COLOR","SHORT_NAME","PARENT") VALUES ('bpdcn','Blastic Plasmacytoid Dendritic Cell Neoplasm','blastic plasmacytoid dendritic cell neoplasm','LightSalmon','BPDCN','tissue'); INSERT INTO "type_of_cancer" ("TYPE_OF_CANCER_ID","NAME","CLINICAL_TRIAL_KEYWORDS","DEDICATED_COLOR","SHORT_NAME","PARENT") VALUES ('brca','Breast Invasive Carcinoma','breast,breast invasive','HotPink','Breast','tissue'); +-- reference_genome +INSERT INTO `reference_genome` VALUES (1, 'human', 'hg19', 'GRCh37', 2897310462, 'http://hgdownload.cse.ucsc.edu/goldenPath/hg19/bigZips', '2009-02-01'); +INSERT INTO `reference_genome` VALUES (2, 'human', 'hg38', 'GRCh38', 3049315783, 'http://hgdownload.cse.ucsc.edu/goldenPath/hg38/bigZips', '2013-12-01'); + -- cancer_study -INSERT INTO "cancer_study" ("CANCER_STUDY_ID", "CANCER_STUDY_IDENTIFIER", "TYPE_OF_CANCER_ID", "NAME", "SHORT_NAME", "DESCRIPTION", "PUBLIC", "PMID", "CITATION", "GROUPS") -VALUES (1,'study_tcga_pub','brca','Breast Invasive Carcinoma (TCGA, Nature 2012)','BRCA (TCGA)','The Cancer Genome Atlas (TCGA) Breast Invasive Carcinoma project. 825 cases.
Nature 2012. Raw data via the TCGA Data Portal.',1,'23000897,26451490','TCGA, Nature 2012, ...','SU2C-PI3K;PUBLIC;GDAC'); +INSERT INTO "cancer_study" ("CANCER_STUDY_ID", "CANCER_STUDY_IDENTIFIER", "TYPE_OF_CANCER_ID", "NAME", "SHORT_NAME", "DESCRIPTION", "PUBLIC", "PMID", "CITATION", "GROUPS","REFERENCE_GENOME_ID") +VALUES (1,'study_tcga_pub','brca','Breast Invasive Carcinoma (TCGA, Nature 2012)','BRCA (TCGA)','The Cancer Genome Atlas (TCGA) Breast Invasive Carcinoma project. 825 cases.
Nature 2012. Raw data via the TCGA Data Portal.',1,'23000897','TCGA, Nature 2012','SU2C-PI3K;PUBLIC;GDAC',1); -- gene as genetic_entity INSERT INTO "genetic_entity" ("ENTITY_TYPE") VALUES ('GENE'); SET @max_entity_id = (Select MAX(ID) from genetic_entity); -INSERT INTO "gene" ("GENETIC_ENTITY_ID","ENTREZ_GENE_ID","HUGO_GENE_SYMBOL","TYPE","CYTOBAND") VALUES (@max_entity_id, 207,'AKT1','protein-coding','14q32.32'); -INSERT INTO "genetic_entity" ("ENTITY_TYPE") VALUES ('GENE'); -SET @max_entity_id = (Select MAX(ID) from genetic_entity); -INSERT INTO "gene" ("GENETIC_ENTITY_ID","ENTREZ_GENE_ID","HUGO_GENE_SYMBOL","TYPE","CYTOBAND") VALUES (@max_entity_id,208,'AKT2','protein-coding','19q13.1-q13.2'); +INSERT INTO "gene" ("GENETIC_ENTITY_ID","ENTREZ_GENE_ID","HUGO_GENE_SYMBOL","TYPE") VALUES (@max_entity_id, 207,'AKT1','protein-coding'); INSERT INTO "genetic_entity" ("ENTITY_TYPE") VALUES ('GENE'); SET @max_entity_id = (Select MAX(ID) from genetic_entity); -INSERT INTO "gene" ("GENETIC_ENTITY_ID","ENTREZ_GENE_ID","HUGO_GENE_SYMBOL","TYPE","CYTOBAND") VALUES (@max_entity_id,10000,'AKT3','protein-coding','1q44'); +INSERT INTO "gene" ("GENETIC_ENTITY_ID","ENTREZ_GENE_ID","HUGO_GENE_SYMBOL","TYPE") VALUES (@max_entity_id,208,'AKT2','protein-coding'); INSERT INTO "genetic_entity" ("ENTITY_TYPE") VALUES ('GENE'); SET @max_entity_id = (Select MAX(ID) from genetic_entity); -INSERT INTO "gene" ("GENETIC_ENTITY_ID","ENTREZ_GENE_ID","HUGO_GENE_SYMBOL","TYPE","CYTOBAND") VALUES (@max_entity_id,369,'ARAF','protein-coding','Xp11.4-p11.2'); +INSERT INTO "gene" ("GENETIC_ENTITY_ID","ENTREZ_GENE_ID","HUGO_GENE_SYMBOL","TYPE") VALUES (@max_entity_id,10000,'AKT3','protein-coding'); INSERT INTO "genetic_entity" ("ENTITY_TYPE") VALUES ('GENE'); SET @max_entity_id = (Select MAX(ID) from genetic_entity); -INSERT INTO "gene" ("GENETIC_ENTITY_ID","ENTREZ_GENE_ID","HUGO_GENE_SYMBOL","TYPE","CYTOBAND") VALUES (@max_entity_id,472,'ATM','protein-coding','11q22-q23'); +INSERT INTO "gene" ("GENETIC_ENTITY_ID","ENTREZ_GENE_ID","HUGO_GENE_SYMBOL","TYPE") VALUES (@max_entity_id,369,'ARAF','protein-coding'); INSERT INTO "genetic_entity" ("ENTITY_TYPE") VALUES ('GENE'); SET @max_entity_id = (Select MAX(ID) from genetic_entity); -INSERT INTO "gene" ("GENETIC_ENTITY_ID","ENTREZ_GENE_ID","HUGO_GENE_SYMBOL","TYPE","CYTOBAND") VALUES (@max_entity_id,673,'BRAF','protein-coding','7q34'); +INSERT INTO "gene" ("GENETIC_ENTITY_ID","ENTREZ_GENE_ID","HUGO_GENE_SYMBOL","TYPE") VALUES (@max_entity_id,472,'ATM','protein-coding'); INSERT INTO "genetic_entity" ("ENTITY_TYPE") VALUES ('GENE'); SET @max_entity_id = (Select MAX(ID) from genetic_entity); -INSERT INTO "gene" ("GENETIC_ENTITY_ID","ENTREZ_GENE_ID","HUGO_GENE_SYMBOL","TYPE","CYTOBAND") VALUES (@max_entity_id,672,'BRCA1','protein-coding','17q21'); +INSERT INTO "gene" ("GENETIC_ENTITY_ID","ENTREZ_GENE_ID","HUGO_GENE_SYMBOL","TYPE") VALUES (@max_entity_id,673,'BRAF','protein-coding'); INSERT INTO "genetic_entity" ("ENTITY_TYPE") VALUES ('GENE'); SET @max_entity_id = (Select MAX(ID) from genetic_entity); -INSERT INTO "gene" ("GENETIC_ENTITY_ID","ENTREZ_GENE_ID","HUGO_GENE_SYMBOL","TYPE","CYTOBAND") VALUES (@max_entity_id,675,'BRCA2','protein-coding','13q12.3'); +INSERT INTO "gene" ("GENETIC_ENTITY_ID","ENTREZ_GENE_ID","HUGO_GENE_SYMBOL","TYPE") VALUES (@max_entity_id,672,'BRCA1','protein-coding'); INSERT INTO "genetic_entity" ("ENTITY_TYPE") VALUES ('GENE'); SET @max_entity_id = (Select MAX(ID) from genetic_entity); -INSERT INTO "gene" ("GENETIC_ENTITY_ID","ENTREZ_GENE_ID","HUGO_GENE_SYMBOL","TYPE","CYTOBAND") VALUES (@max_entity_id,3265,'HRAS','protein-coding','11p15.5'); +INSERT INTO "gene" ("GENETIC_ENTITY_ID","ENTREZ_GENE_ID","HUGO_GENE_SYMBOL","TYPE") VALUES (@max_entity_id,675,'BRCA2','protein-coding'); INSERT INTO "genetic_entity" ("ENTITY_TYPE") VALUES ('GENE'); SET @max_entity_id = (Select MAX(ID) from genetic_entity); -INSERT INTO "gene" ("GENETIC_ENTITY_ID","ENTREZ_GENE_ID","HUGO_GENE_SYMBOL","TYPE","CYTOBAND") VALUES (@max_entity_id,3845,'KRAS','protein-coding','12p12.1'); +INSERT INTO "gene" ("GENETIC_ENTITY_ID","ENTREZ_GENE_ID","HUGO_GENE_SYMBOL","TYPE") VALUES (@max_entity_id,3265,'HRAS','protein-coding'); INSERT INTO "genetic_entity" ("ENTITY_TYPE") VALUES ('GENE'); SET @max_entity_id = (Select MAX(ID) from genetic_entity); -INSERT INTO "gene" ("GENETIC_ENTITY_ID","ENTREZ_GENE_ID","HUGO_GENE_SYMBOL","TYPE","CYTOBAND") VALUES (@max_entity_id,4893,'NRAS','protein-coding','1p13.2'); +INSERT INTO "gene" ("GENETIC_ENTITY_ID","ENTREZ_GENE_ID","HUGO_GENE_SYMBOL","TYPE") VALUES (@max_entity_id,3845,'KRAS','protein-coding'); INSERT INTO "genetic_entity" ("ENTITY_TYPE") VALUES ('GENE'); SET @max_entity_id = (Select MAX(ID) from genetic_entity); -INSERT INTO "gene" ("GENETIC_ENTITY_ID","ENTREZ_GENE_ID","HUGO_GENE_SYMBOL","TYPE","CYTOBAND") VALUES (@max_entity_id,51259,'TMEM216','protein-coding','11q13.1'); +INSERT INTO "gene" ("GENETIC_ENTITY_ID","ENTREZ_GENE_ID","HUGO_GENE_SYMBOL","TYPE") VALUES (@max_entity_id,4893,'NRAS','protein-coding'); INSERT INTO "genetic_entity" ("ENTITY_TYPE") VALUES ('GENE'); SET @max_entity_id = (Select MAX(ID) from genetic_entity); -INSERT INTO "gene" ("GENETIC_ENTITY_ID","ENTREZ_GENE_ID","HUGO_GENE_SYMBOL","TYPE","CYTOBAND") VALUES (@max_entity_id,282770,'OR10AG1','protein-coding','11q11'); +INSERT INTO "gene" ("GENETIC_ENTITY_ID","ENTREZ_GENE_ID","HUGO_GENE_SYMBOL","TYPE") VALUES (@max_entity_id,51259,'TMEM216','protein-coding'); INSERT INTO "genetic_entity" ("ENTITY_TYPE") VALUES ('GENE'); SET @max_entity_id = (Select MAX(ID) from genetic_entity); -INSERT INTO "gene" ("GENETIC_ENTITY_ID","ENTREZ_GENE_ID","HUGO_GENE_SYMBOL","TYPE","CYTOBAND") VALUES (@max_entity_id,983,'CDK1','protein-coding','10q21.1'); +INSERT INTO "gene" ("GENETIC_ENTITY_ID","ENTREZ_GENE_ID","HUGO_GENE_SYMBOL","TYPE") VALUES (@max_entity_id,282770,'OR10AG1','protein-coding'); INSERT INTO "genetic_entity" ("ENTITY_TYPE") VALUES ('GENE'); SET @max_entity_id = (Select MAX(ID) from genetic_entity); -INSERT INTO "gene" ("GENETIC_ENTITY_ID","ENTREZ_GENE_ID","HUGO_GENE_SYMBOL","TYPE","CYTOBAND") VALUES (@max_entity_id,8085,'KMT2D','protein-coding','12q13.12'); +INSERT INTO "gene" ("GENETIC_ENTITY_ID","ENTREZ_GENE_ID","HUGO_GENE_SYMBOL","TYPE") VALUES (@max_entity_id,983,'CDK1','protein-coding'); INSERT INTO "genetic_entity" ("ENTITY_TYPE") VALUES ('GENE'); --- add genes for structural variant events SET @max_entity_id = (Select MAX(ID) from genetic_entity); -INSERT INTO "gene" ("GENETIC_ENTITY_ID","ENTREZ_GENE_ID","HUGO_GENE_SYMBOL","TYPE","CYTOBAND") VALUES (@max_entity_id,57670,'KIAA1549','protein-coding','7q34'); +INSERT INTO "gene" ("GENETIC_ENTITY_ID","ENTREZ_GENE_ID","HUGO_GENE_SYMBOL","TYPE") VALUES (@max_entity_id,8085,'KMT2D','protein-coding'); INSERT INTO "genetic_entity" ("ENTITY_TYPE") VALUES ('GENE'); SET @max_entity_id = (Select MAX(ID) from genetic_entity); -INSERT INTO "gene" ("GENETIC_ENTITY_ID","ENTREZ_GENE_ID","HUGO_GENE_SYMBOL","TYPE","CYTOBAND") VALUES (@max_entity_id,27436,'EML4','protein-coding','2p21'); -INSERT INTO "genetic_entity" ("ENTITY_TYPE") VALUES ('GENE'); -SET @max_entity_id = (Select MAX(ID) from genetic_entity); -INSERT INTO "gene" ("GENETIC_ENTITY_ID","ENTREZ_GENE_ID","HUGO_GENE_SYMBOL","TYPE","CYTOBAND") VALUES (@max_entity_id,238,'ALK','protein-coding','2p23.2-p23.1'); -INSERT INTO "genetic_entity" ("ENTITY_TYPE") VALUES ('GENE'); -SET @max_entity_id = (Select MAX(ID) from genetic_entity); -INSERT INTO "gene" ("GENETIC_ENTITY_ID","ENTREZ_GENE_ID","HUGO_GENE_SYMBOL","TYPE","CYTOBAND") VALUES (@max_entity_id,2115,'ETV1','protein-coding','7p21.2'); -INSERT INTO "genetic_entity" ("ENTITY_TYPE") VALUES ('GENE'); -SET @max_entity_id = (Select MAX(ID) from genetic_entity); -INSERT INTO "gene" ("GENETIC_ENTITY_ID","ENTREZ_GENE_ID","HUGO_GENE_SYMBOL","TYPE","CYTOBAND") VALUES (@max_entity_id,7273,'TTN','protein-coding','2q31.2'); - +INSERT INTO "gene" ("GENETIC_ENTITY_ID","ENTREZ_GENE_ID","HUGO_GENE_SYMBOL","TYPE") VALUES (@max_entity_id,31259,'TMEM216','protein-coding'); -- cna_event INSERT INTO "cna_event" ("CNA_EVENT_ID","ENTREZ_GENE_ID","ALTERATION") VALUES (20093,207,-2); INSERT INTO "cna_event" ("CNA_EVENT_ID","ENTREZ_GENE_ID","ALTERATION") VALUES (20092,207,2); @@ -234,20 +224,15 @@ INSERT INTO "gene_alias" ("ENTREZ_GENE_ID","GENE_ALIAS") VALUES (3845,'KRAS2'); INSERT INTO "gene_alias" ("ENTREZ_GENE_ID","GENE_ALIAS") VALUES (4893,'N-ras'); INSERT INTO "gene_alias" ("ENTREZ_GENE_ID","GENE_ALIAS") VALUES (4893,'NCMS'); -INSERT INTO `reference_genome` VALUES (1, 'human', 'hg19', 'GRCh37', NULL, 'http://hgdownload.cse.ucsc.edu/goldenPath/hg19/bigZips', '2009-02-01 00:00:00'); -INSERT INTO `reference_genome` VALUES (2, 'human', 'hg38', 'GRCh38', NULL, 'http://hgdownload.cse.ucsc.edu/goldenPath/hg38/bigZips', '2013-12-01 00:00:00'); -INSERT INTO `reference_genome` VALUES (3, 'mouse', 'mm10', 'GRCm38', NULL, 'http://hgdownload.cse.ucsc.edu//goldenPath/mm10/bigZips', '2012-01-01 00:00:00'); - INSERT INTO reference_genome_gene (ENTREZ_GENE_ID,CYTOBAND,EXONIC_LENGTH,START,END,CHR,REFERENCE_GENOME_ID) VALUES(207,'14q32.33',10838,105235686,105262088,14,1); -INSERT INTO reference_genome_gene (ENTREZ_GENE_ID,CYTOBAND,EXONIC_LENGTH,START,END,CHR,REFERENCE_GENOME_ID) VALUES(207,'14q32.33',10838,104769349,104795751,14,2); -INSERT INTO reference_genome_gene (ENTREZ_GENE_ID,CYTOBAND,EXONIC_LENGTH,START,END,CHR,REFERENCE_GENOME_ID) VALUES(208,'19q13.2',0, 40736224, 40791443,19,1); -INSERT INTO reference_genome_gene (ENTREZ_GENE_ID,CYTOBAND,EXONIC_LENGTH,START,END,CHR,REFERENCE_GENOME_ID) VALUES(208,'19q13.2',0, 40230317, 40285536,19,2); -INSERT INTO reference_genome_gene (ENTREZ_GENE_ID,CYTOBAND,EXONIC_LENGTH,START,END,CHR,REFERENCE_GENOME_ID) VALUES(51259,'11q12.2',0, 61159159, 61166335,11,1); -INSERT INTO reference_genome_gene (ENTREZ_GENE_ID,CYTOBAND,EXONIC_LENGTH,START,END,CHR,REFERENCE_GENOME_ID) VALUES(51259,'11q12.2',0, 61391687, 61398863,11,2); -INSERT INTO reference_genome_gene (ENTREZ_GENE_ID,CYTOBAND,EXONIC_LENGTH,START,END,CHR,REFERENCE_GENOME_ID) VALUES(282770,'11q12.1',0, 55734975, 55735990,11,1); -INSERT INTO reference_genome_gene (ENTREZ_GENE_ID,CYTOBAND,EXONIC_LENGTH,START,END,CHR,REFERENCE_GENOME_ID) VALUES(282770,'11q12.1',0, 55967558, 55968463,11,2); +INSERT INTO reference_genome_gene (ENTREZ_GENE_ID,CYTOBAND,EXONIC_LENGTH,START,END,CHR,REFERENCE_GENOME_ID) VALUES(207,'14q32.33',11162,104769349,104795751,14,2); +INSERT INTO reference_genome_gene (ENTREZ_GENE_ID,CYTOBAND,EXONIC_LENGTH,START,END,CHR,REFERENCE_GENOME_ID) VALUES(208,'19q13.2',15035, 40736224, 40791443,19,1); +INSERT INTO reference_genome_gene (ENTREZ_GENE_ID,CYTOBAND,EXONIC_LENGTH,START,END,CHR,REFERENCE_GENOME_ID) VALUES(208,'19q13.2',15035, 40230317, 40285536,19,2); +INSERT INTO reference_genome_gene (ENTREZ_GENE_ID,CYTOBAND,EXONIC_LENGTH,START,END,CHR,REFERENCE_GENOME_ID) VALUES(51259,'11q12.2',2364, 61159159, 61166335,11,1); +INSERT INTO reference_genome_gene (ENTREZ_GENE_ID,CYTOBAND,EXONIC_LENGTH,START,END,CHR,REFERENCE_GENOME_ID) VALUES(51259,'11q12.2',2364, 61391687, 61398863,11,2); +INSERT INTO reference_genome_gene (ENTREZ_GENE_ID,CYTOBAND,EXONIC_LENGTH,START,END,CHR,REFERENCE_GENOME_ID) VALUES(282770,'11q12.1',2814, 55734975, 55735990,11,1); +INSERT INTO reference_genome_gene (ENTREZ_GENE_ID,CYTOBAND,EXONIC_LENGTH,START,END,CHR,REFERENCE_GENOME_ID) VALUES(282770,'11q12.1',2814, 55967558, 55968463,11,2); --- genetic_profile INSERT INTO "genetic_profile" ("GENETIC_PROFILE_ID", "STABLE_ID", "CANCER_STUDY_ID", "GENETIC_ALTERATION_TYPE", "DATATYPE", "NAME", "DESCRIPTION", "SHOW_PROFILE_IN_ANALYSIS_TAB") VALUES (2,'study_tcga_pub_gistic',1,'COPY_NUMBER_ALTERATION','DISCRETE','Putative copy-number alterations from GISTIC','Putative copy-number from GISTIC 2.0. Values: -2 = homozygous deletion; -1 = hemizygous deletion; 0 = neutral / no change; 1 = gain; 2 = high level amplification.','1'); INSERT INTO "genetic_profile" ("GENETIC_PROFILE_ID", "STABLE_ID", "CANCER_STUDY_ID", "GENETIC_ALTERATION_TYPE", "DATATYPE", "NAME", "DESCRIPTION", "SHOW_PROFILE_IN_ANALYSIS_TAB") VALUES (3,'study_tcga_pub_mrna',1,'MRNA_EXPRESSION','Z-SCORE','mRNA expression (microarray)','Expression levels (Agilent microarray).','0'); INSERT INTO "genetic_profile" ("GENETIC_PROFILE_ID", "STABLE_ID", "CANCER_STUDY_ID", "GENETIC_ALTERATION_TYPE", "DATATYPE", "NAME", "DESCRIPTION", "SHOW_PROFILE_IN_ANALYSIS_TAB") VALUES (4,'study_tcga_pub_log2CNA',1,'COPY_NUMBER_ALTERATION','LOG2-VALUE','Log2 copy-number values','Log2 copy-number values for each gene (from Affymetrix SNP6).','0'); diff --git a/core/src/test/resources/testCancerStudy.txt b/core/src/test/resources/testCancerStudy.txt index f08a676d595..e364692dede 100644 --- a/core/src/test/resources/testCancerStudy.txt +++ b/core/src/test/resources/testCancerStudy.txt @@ -2,3 +2,4 @@ type_of_cancer: GBM cancer_study_identifier: tcga_gbm name: Glioblastoma (TCGA) description: The Cancer Genome Atlas (TCGA) Glioblastoma project. 206 primary glioblastoma samples.
Nature 2008. Raw data via the TCGA Data Portal. +reference_genome: hg19 diff --git a/db-scripts/src/main/resources/adjust_col_size_to_utf8.sql b/db-scripts/src/main/resources/adjust_col_size_to_utf8.sql new file mode 100644 index 00000000000..a860fbaa134 --- /dev/null +++ b/db-scripts/src/main/resources/adjust_col_size_to_utf8.sql @@ -0,0 +1,16 @@ +DROP PROCEDURE IF EXISTS adjust_col_size_to_utf8; +DELIMITER $$ +CREATE PROCEDURE adjust_col_size_to_utf8() BEGIN + IF ((SELECT MAX(LENGTH(TUMOR_SEQ_ALLELE)) FROM mutation_event) < 256 + AND (SELECT MAX(LENGTH(REFERENCE_ALLELE)) FROM mutation_event) < 256) + THEN + ALTER TABLE mutation_event + MODIFY REFERENCE_ALLELE varchar(255), + MODIFY TUMOR_SEQ_ALLELE varchar(255), + MODIFY MUTATION_TYPE varchar(64), + MODIFY LINK_XVAR varchar(255), + MODIFY LINK_PDB varchar(255), + MODIFY LINK_MSA varchar(255); + END IF; +END$$ +DELIMITER ; \ No newline at end of file diff --git a/db-scripts/src/main/resources/cgds.sql b/db-scripts/src/main/resources/cgds.sql index 7719cbab983..8b93c29d8b5 100644 --- a/db-scripts/src/main/resources/cgds.sql +++ b/db-scripts/src/main/resources/cgds.sql @@ -113,6 +113,20 @@ CREATE TABLE `type_of_cancer` ( PRIMARY KEY (`TYPE_OF_CANCER_ID`) ); +-- -------------------------------------------------------- +CREATE TABLE `reference_genome` ( + `REFERENCE_GENOME_ID` int(4) NOT NULL AUTO_INCREMENT, + `SPECIES` varchar(64) NOT NULL, + `NAME` varchar(64) NOT NULL, + `BUILD_NAME` varchar(64) NOT NULL, + `GENOME_SIZE` bigint(20) NULL, + `URL` varchar(256) NOT NULL, + `RELEASE_DATE` datetime DEFAULT NULL, + PRIMARY KEY (`REFERENCE_GENOME_ID`), + UNIQUE INDEX `BUILD_NAME_UNIQUE` (`BUILD_NAME` ASC), + CHECK(`SPECIES` = 'human') +); + -- -------------------------------------------------------- CREATE TABLE `cancer_study` ( `CANCER_STUDY_ID` int(11) NOT NULL auto_increment, @@ -127,9 +141,11 @@ CREATE TABLE `cancer_study` ( `GROUPS` varchar(200) DEFAULT NULL, `STATUS` int(1) DEFAULT NULL, `IMPORT_DATE` datetime DEFAULT NULL, + `REFERENCE_GENOME_ID` int(4) DEFAULT 1, PRIMARY KEY (`CANCER_STUDY_ID`), UNIQUE (`CANCER_STUDY_IDENTIFIER`), - FOREIGN KEY (`TYPE_OF_CANCER_ID`) REFERENCES `type_of_cancer` (`TYPE_OF_CANCER_ID`) + FOREIGN KEY (`TYPE_OF_CANCER_ID`) REFERENCES `type_of_cancer` (`TYPE_OF_CANCER_ID`), + FOREIGN KEY (`REFERENCE_GENOME_ID`) REFERENCES `reference_genome` (`REFERENCE_GENOME_ID`) ON DELETE RESTRICT ); -- -------------------------------------------------------- @@ -209,7 +225,6 @@ CREATE TABLE `gene` ( `HUGO_GENE_SYMBOL` varchar(255) NOT NULL, `GENETIC_ENTITY_ID` int(11) NOT NULL, `TYPE` varchar(50), - `CYTOBAND` varchar(64), PRIMARY KEY (`ENTREZ_GENE_ID`), UNIQUE KEY `GENETIC_ENTITY_ID_UNIQUE` (`GENETIC_ENTITY_ID`), KEY `HUGO_GENE_SYMBOL` (`HUGO_GENE_SYMBOL`), @@ -290,19 +305,6 @@ CREATE TABLE `uniprot_id_mapping` ( FOREIGN KEY (`ENTREZ_GENE_ID`) REFERENCES `gene` (`ENTREZ_GENE_ID`) ); --- -------------------------------------------------------- -CREATE TABLE `reference_genome` ( - `REFERENCE_GENOME_ID` int(4) NOT NULL AUTO_INCREMENT, - `SPECIES` varchar(64) NOT NULL, - `NAME` varchar(64) NOT NULL, - `BUILD_NAME` varchar(64) NOT NULL, - `GENOME_SIZE` bigint(20) NULL, - `URL` varchar(256) NOT NULL, - `RELEASE_DATE` datetime DEFAULT NULL, - PRIMARY KEY (`REFERENCE_GENOME_ID`), - UNIQUE INDEX `BUILD_NAME_UNIQUE` (`BUILD_NAME` ASC) -); - -- -------------------------------------------------------- CREATE TABLE `genetic_profile` ( `GENETIC_PROFILE_ID` int(11) NOT NULL AUTO_INCREMENT, @@ -820,7 +822,7 @@ CREATE TABLE `clinical_event_data` ( CREATE TABLE `reference_genome_gene` ( `ENTREZ_GENE_ID` int(11) NOT NULL, `REFERENCE_GENOME_ID` int(4) NOT NULL, - `CHR` varchar(4) DEFAULT NULL, + `CHR` varchar(5) DEFAULT NULL, `CYTOBAND` varchar(64) DEFAULT NULL, `EXONIC_LENGTH` int(11) DEFAULT NULL, `START` bigint(20) DEFAULT NULL, diff --git a/db-scripts/src/main/resources/migration.sql b/db-scripts/src/main/resources/migration.sql index 67de6db1ddf..ac2fac60c5c 100644 --- a/db-scripts/src/main/resources/migration.sql +++ b/db-scripts/src/main/resources/migration.sql @@ -396,12 +396,12 @@ CREATE TABLE `reference_genome` ( UNIQUE INDEX `BUILD_NAME_UNIQUE` (`BUILD_NAME` ASC) ); -INSERT INTO `reference_genome` -VALUES (1, 'human', 'hg19', 'GRCh37', NULL, 'http://hgdownload.cse.ucsc.edu/goldenPath/hg19/bigZips', '2009-02-01'); -INSERT INTO `reference_genome` -VALUES (2, 'human', 'hg38', 'GRCh38', NULL, 'http://hgdownload.cse.ucsc.edu/goldenPath/hg38/bigZips', '2013-12-01'); -INSERT INTO `reference_genome` -VALUES (3, 'mouse', 'mm10', 'GRCm38', NULL, 'http://hgdownload.cse.ucsc.edu//goldenPath/mm10/bigZips', '2012-01-01'); +INSERT INTO `reference_genome` +VALUES (1, 'human', 'hg19', 'GRCh37', 2897310462, 'http://hgdownload.cse.ucsc.edu/goldenPath/hg19/bigZips', '2009-02-01'); +INSERT INTO `reference_genome` +VALUES (2, 'human', 'hg38', 'GRCh38', 3049315783, 'http://hgdownload.cse.ucsc.edu/goldenPath/hg38/bigZips', '2013-12-01'); +INSERT INTO `reference_genome` +VALUES (3, 'mouse', 'mm10', 'GRCm38', 2652783500, 'http://hgdownload.cse.ucsc.edu//goldenPath/mm10/bigZips', '2012-01-01'); CREATE TABLE `reference_genome_gene` ( `ENTREZ_GENE_ID` int(11) NOT NULL, diff --git a/docs/File-Formats.md b/docs/File-Formats.md index 39fcc625107..6e3bacd1702 100644 --- a/docs/File-Formats.md +++ b/docs/File-Formats.md @@ -622,7 +622,7 @@ The extended MAF format recognized by the portal has: 1. **Hugo_Symbol (Required)**: A [HUGO](http://www.genenames.org/) gene symbol. 2. **Entrez_Gene_Id (Optional, but recommended)**: A [Entrez Gene](http://www.ncbi.nlm.nih.gov/gene) identifier. 3. **Center (Optional)**: The sequencing center. -4. **NCBI_Build (Optional)1**: Must be "GRCh37" for human, and "GRCm38" for mouse. +4. **NCBI_Build (Required)1**: The Genome Reference Consortium Build is used by a variant calling software. It must be "GRCh37" or "GRCh38" for a human, and "GRCm38" for a mouse. 5. **Chromosome (Optional)**: A chromosome number, e.g., "7". 6. **Start_Position (Optional)**: Start position of event. 7. **End_Position (Optional)**: End position of event. diff --git a/docs/Import-reference-genome.md b/docs/Import-reference-genome.md new file mode 100644 index 00000000000..6d7a0da7ddf --- /dev/null +++ b/docs/Import-reference-genome.md @@ -0,0 +1,49 @@ +# Add a new reference genome to the database +This manual is intended to guide you to load new reference genome(s) to the database. + +### What is Reference Genome (also know as reference assembly)? +A digital nucleic acid sequence database assembled by scientists as a representative example of a species' set of genes. Reference genomes can be accessed online using +dedicated browsers such as [UCSC Genome Browser](https://genome.ucsc.edu/cgi-bin/hgGateway). + +### Why does reference genomes matter? +Mutation and Segment plots on Patient View are using genomic coordinates. The **cytoband** column in the Mutated Gene table on Study view rely on the Genome Reference Consortium Build +matching the version used to analyse the original data (before it was loaded into cBioPortal DB). + +## How to load reference genome(s) to the database? + +### 1. prepare a tab delimited txt file with the following fields: +1. species: the group of organisms e.g human +2. name: the name of reference genome as used by the UCSC browser eg. hg19 +3. build_name: the version of Genome Reference Consortium Build published by NCBI e.g GRCh38 +4. nonN_bases: the total number of non-N bases in reference genome FASTA-formatted file +5. URL: the URL to download the reference genome +6. release_date: when reference genome released, in a format of yyyy-mm-dd. The release date normally is included in the README.txt file in the download directory. + +here is a sample reference genome file: +``` +#species name build_name nonN_bases URL release_date +human hg19 GRCh37 2897310462 http://hgdownload.cse.ucsc.edu/goldenPath/hg19 2009-02-01 +human hg38 GRCh38 3049315783 http://hgdownload.cse.ucsc.edu/goldenPath/hg38 2013-12-24 +mouse mm10 GRCm38 2652783500 http://hgdownload.cse.ucsc.edu/goldenPath/mm10 2011-12-01 +``` +### 2. Using import script to load reference genome(s) data to the database: +``` + cd /core/src/main/scripts + export PORTAL_HOME= +./importReferenceGenome.pl --ref-genome +``` +### 3. output of the script runs successfully +``` +$ ./importReferenceGenome.pl --ref-genome ~/myspace/cbioportal/core/src/test/resources/reference_genomes.txt + Reading reference genome from: /Users/kelsyzhu/myspace/cbioportal/core/src/test/resources/reference_genomes.txt + --> total number of lines: 3 + + Done. Restart tomcat to make sure the cache is replaced with the new data. + + Warnings / Errors: + ------------------- + 0. New reference genome added; 3x + Done. + Total time: 3432 ms + +``` \ No newline at end of file diff --git a/docs/Updating-gene-and-gene_alias-tables.md b/docs/Updating-gene-and-gene_alias-tables.md index 5f95a126a6c..082c8ff03eb 100644 --- a/docs/Updating-gene-and-gene_alias-tables.md +++ b/docs/Updating-gene-and-gene_alias-tables.md @@ -42,9 +42,18 @@ ALTER TABLE `geneset` AUTO_INCREMENT = 1; 5- To import gene data type the following commands when in the folder `/core/src/main/scripts`: ``` export PORTAL_HOME= -./importGenes.pl --genes +export JAVA_HOME= +./importGenes.pl --genes --gtf --genome-build +``` +**IMPORTANT NOTE**: +1. The **reference_genome** table needs to be populated before updating the **gene** table. Further details can be found in [this document](import-reference-genome.md). +2. Use **--species** option when importing genes for a species other than human +3. Use the **gene** table if you query information such as hugo symbols, types of the gene +4. Use **reference_genome_gene** table if you query information such as chromosome, cytoband, exonic length, or the start or end of the gene +5. Load genes only to the **reference_genome_gene** table without updating the **gene** table, please use the following command: +``` +./importGenes.pl --gtf --genome-build ``` - 6- :warning: Check the `gene` and `gene_alias` tables to verify that they are filled correctly. ```sql SELECT count(*) FROM cbioportal.gene; diff --git a/docs/Using-the-dataset-validator.md b/docs/Using-the-dataset-validator.md index 34a48f6a3c1..830927fd053 100644 --- a/docs/Using-the-dataset-validator.md +++ b/docs/Using-the-dataset-validator.md @@ -38,8 +38,12 @@ optional arguments: -n, --no_portal_checks Skip tests requiring information from the cBioPortal installation - -P PORTAL_PROPERTIES, --portal_properties PORTAL_PROPERTIES - portal.properties file path (default: assumed hg19) + -species SPECIES, --species SPECIES + species information (default: assumed human) + -genome REFERENCE_GENOME, --reference_genome REFERENCE_GENOME + reference genome build (default: assumed hg19) + -build GENOME_BUILD, --genome_build GENOME_BUILD + reference genome build (default: assumed 37 for reference genome hg19) -html HTML_TABLE, --html_table HTML_TABLE path to html report output file -e ERROR_FILE, --error_file ERROR_FILE @@ -61,7 +65,10 @@ optional arguments: reporting "GeneA, GeneB, GeneC, 213 more" ``` -For more information on the `--portal_info_dir` option, see [Offline validation](#offline-validation) below. If your cBioPortal is not using `hg19`, you must use the `--portal_properties` option. For more information, see [Validation of non-human data](#validation-of-non-human-data). +For more information on the `--portal_info_dir` option, see [Offline validation](#offline-validation) below. If your cBioPortal is not using `hg19`, +you must use all three `--species` , `--reference_genome` , `--genome_build` options. + +For more information, see [Validation of non-human data](#validation-of-non-human-data). When running the validator with parameter `-r` the validator will run the validation of the clinical data it will ignore all failing checks about values in the headers of the clinical data file. @@ -421,25 +428,25 @@ Validation of study succeeded with warnings. ``` ## Validation of non-human data ## -When importing a study, the validator assumes by default that the following parameters from `portal.properties` are set to: +When importing a study, the validator assumes by default that the following three parameters +`--species` , `--reference_genome` , `--genome_build` are set to the following: ``` -species=human -ncbi.build=37 -ucsc.build=hg19 +--species human +--reference_genome hg19 +--genome_build 37 ``` cBioPortal is gradually introducing support for mouse. If you want to load mouse studies and you have [set up your database for mouse](Import-the-Seed-Database.md#download-the-cbioportal-database), you should set the previous parameters to: ``` -species=mouse -ncbi.build=38 -ucsc.build=mm10 +--species mouse +--reference_genome mm10 +--genome_build 38 ``` -If your `portal.properties` does not have the default (human) settings, you should introduce a new parameter `-P` in your command. This parameter should point to either `portal.properties` or a file which contains the new global variables. - -As an example, the command for the "Example 1" listed above incorporating the `-P` parameter is given: +As an example, the command for the mouse example using the three parameters is given: ``` ./validateData.py -s ../../../test/scripts/test_data/study_es_0/ -P ../../../../../src/main/resources/portal.properties -u http://localhost:8080 -v +./validateData.py -s ../../../test/scripts/test_data/study_es_0/ --species mouse --reference_genome mm10 --genome_build 38 -u http://localhost:8080/cbioportal -v ``` ## Running the validator for multiple studies @@ -474,8 +481,12 @@ optional arguments: -n, --no_portal_checks Skip tests requiring information from the cBioPortal installation - -P PORTAL_PROPERTIES, --portal_properties PORTAL_PROPERTIES - portal.properties file path (default: assumed hg19) + -species SPECIES, --species SPECIES + species information (default: assumed human) + -genome REFERENCE_GENOME, --reference_genome REFERENCE_GENOME + reference genome build (default: assumed hg19) + -build GENOME_BUILD, --genome_build GENOME_BUILD + reference genome build (default: assumed 37 for reference genome hg19) -m, --strict_maf_checks Option to enable strict mode for validator when validating mutation data diff --git a/docs/Using-the-metaImport-script.md b/docs/Using-the-metaImport-script.md index 79494e617b8..f799850bed2 100644 --- a/docs/Using-the-metaImport-script.md +++ b/docs/Using-the-metaImport-script.md @@ -10,10 +10,11 @@ and then run the following command: ``` This will tell you the parameters you can use: ``` -$ ./metaImport.py -h + $./metaImport.py -h usage: metaImport.py [-h] -s STUDY_DIRECTORY [-u URL_SERVER | -p PORTAL_INFO_DIR | -n] - [-P PORTAL_PROPERTIES] [-jar JAR_PATH] [-html HTML_TABLE] + [-species SPECIES] [-genome REFERENCE_GENOME] + [-build GENOME_BUILD] [-jar JAR_PATH] [-html HTML_TABLE] [-v] [-o] [-r] [-m] [-a MAX_REPORTED_VALUES] cBioPortal meta Importer @@ -31,8 +32,12 @@ optional arguments: -n, --no_portal_checks Skip tests requiring information from the cBioPortal installation - -P PORTAL_PROPERTIES, --portal_properties PORTAL_PROPERTIES - portal.properties file path (default: assumed hg19) + -species SPECIES, --species SPECIES + species information (default: assumed human) + -genome REFERENCE_GENOME, --reference_genome REFERENCE_GENOME + reference genome build (default: assumed hg19) + -build GENOME_BUILD, --genome_build GENOME_BUILD + reference genome build (default: assumed 37) -jar JAR_PATH, --jar_path JAR_PATH Path to scripts JAR file (default: locate it relative to the import script) @@ -41,6 +46,9 @@ optional arguments: -v, --verbose report status info messages while validating -o, --override_warning override warnings and continue importing + -r, --relaxed_clinical_definitions + Option to enable relaxed mode for validator when + validating clinical data without header definitions -m, --strict_maf_checks Option to enable strict mode for validator when validating mutation data diff --git a/model/src/main/java/org/cbioportal/model/CancerStudy.java b/model/src/main/java/org/cbioportal/model/CancerStudy.java index 0389dd27306..a3d41bd0812 100644 --- a/model/src/main/java/org/cbioportal/model/CancerStudy.java +++ b/model/src/main/java/org/cbioportal/model/CancerStudy.java @@ -30,6 +30,7 @@ public class CancerStudy implements Serializable { private Integer methylationHm27SampleCount; private Integer rppaSampleCount; private Integer completeSampleCount; + private String referenceGenome; public Integer getCancerStudyId() { return cancerStudyId; @@ -214,4 +215,8 @@ public Integer getCompleteSampleCount() { public void setCompleteSampleCount(Integer completeSampleCount) { this.completeSampleCount = completeSampleCount; } + + public String getReferenceGenome() { return referenceGenome; } + + public void setReferenceGenome(String referenceGenome) { this.referenceGenome = referenceGenome; } } \ No newline at end of file diff --git a/model/src/main/java/org/cbioportal/model/Gene.java b/model/src/main/java/org/cbioportal/model/Gene.java index 2a589142071..751dc7593fe 100644 --- a/model/src/main/java/org/cbioportal/model/Gene.java +++ b/model/src/main/java/org/cbioportal/model/Gene.java @@ -5,14 +5,18 @@ public class Gene implements Serializable { + @NotNull + private Integer geneticEntityId; @NotNull private Integer entrezGeneId; @NotNull private String hugoGeneSymbol; private String type; - private String cytoband; - private String chromosome; + public Integer getGeneticEntityId() { return geneticEntityId; } + + public void setGeneticEntityId(Integer geneticEntityId) { this.geneticEntityId = geneticEntityId; } + public Integer getEntrezGeneId() { return entrezGeneId; } @@ -36,20 +40,4 @@ public String getType() { public void setType(String type) { this.type = type; } - - public String getCytoband() { - return cytoband; - } - - public void setCytoband(String cytoband) { - this.cytoband = cytoband; - } - - public String getChromosome() { - return chromosome; - } - - public void setChromosome(String chromosome) { - this.chromosome = chromosome; - } -} \ No newline at end of file +} diff --git a/model/src/main/java/org/cbioportal/model/Mutation.java b/model/src/main/java/org/cbioportal/model/Mutation.java index cf4b78a75b1..1f4296ebb9f 100644 --- a/model/src/main/java/org/cbioportal/model/Mutation.java +++ b/model/src/main/java/org/cbioportal/model/Mutation.java @@ -14,6 +14,7 @@ public class Mutation extends Alteration implements Serializable { private Integer normalAltCount; private Integer normalRefCount; private String aminoAcidChange; + private String chr; private Long startPosition; private Long endPosition; private String referenceAllele; @@ -99,6 +100,10 @@ public String getAminoAcidChange() { public void setAminoAcidChange(String aminoAcidChange) { this.aminoAcidChange = aminoAcidChange; } + + public String getChr() { return chr; } + + public void setChr(String chr) { this.chr = chr; } public Long getStartPosition() { return startPosition; diff --git a/model/src/main/java/org/cbioportal/model/ReferenceGenome.java b/model/src/main/java/org/cbioportal/model/ReferenceGenome.java new file mode 100644 index 00000000000..287b17272ef --- /dev/null +++ b/model/src/main/java/org/cbioportal/model/ReferenceGenome.java @@ -0,0 +1,176 @@ +/* + * This file is part of cBioPortal. + * + * cBioPortal is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +package org.cbioportal.model; + +import java.util.Date; + + +/** + * This represents the reference genome used by molecular profiling + * + * @author Kelsey Zhu + */ +public class ReferenceGenome { + + private int referenceGenomeId; // assigned by DB, auto increment sequence number + private String genomeName; + private String species; + private String buildName; //genome assembly name + private long genomeSize; //non-N bases + private String url; + private Date releaseDate; + public static String HOMO_SAPIENS = "human"; + public static String MUS_MUSCULUS = "mouse"; + public static String HOMO_SAPIENS_DEFAULT_GENOME_BUILD = "GRCh37"; + public static String HOMO_SAPIENS_DEFAULT_GENOME_NAME = "hg19"; + public static String MUS_MUSCULUS_DEFAULT_GENOME_BUILD = "GRCm38"; + public static String HOMO_SAPIENS_DEFAULT_GENOME_BUILD_PREFIX = "GRCh"; + public static String MUS_MUSCULUS_DEFAULT_GENOME_BUILD_PREFIX = "GRCm"; + + /** + * Constructor. + * @param genomeName Name of the reference genome. + * @param species Species of the reference genome. + * @param buildName Name of genome assembly + */ + public ReferenceGenome(String genomeName, String species, String buildName) { + super(); + this.genomeName = genomeName; + this.species = species; + this.buildName = buildName; + } + + /** + * Constructor. + * @param genomeName Name of the reference genome. + * @param species Species of the reference genome. + * @param buildName Name of genome assembly + * @param genomeSize Effective genome size + * @param url URL to download reference genome + * @param releaseDate Date genome assembly released + */ + public ReferenceGenome(String genomeName, String species, String buildName, + Long genomeSize, String url, Date releaseDate) { + super(); + this.genomeName = genomeName; + this.species = species; + this.buildName = buildName; + this.genomeSize = genomeSize; + this.url = url; + this.releaseDate = releaseDate; + } + + public void setReferenceGenomeId(int referenceGenomeId) { + this.referenceGenomeId = referenceGenomeId; + } + + public int getReferenceGenomeId() { + return referenceGenomeId; + } + + public void setGenomeName(String genomeName) { + this.genomeName = genomeName; + } + + public String getGenomeName() { + return this.genomeName; + } + + public void setSpecies(String species) { + this.species = species; + } + + public String getSpecies() { + return this.species; + } + + public void setBuildName(String buildName) { + this.buildName = buildName; + } + + public String getBuildName () { + return this.buildName; + } + + public void setGenomeSize(long genomeSize) { + this.genomeSize = genomeSize; + } + + public long getGenomeSize() { + return this.genomeSize; + } + + public void setUrl(String url) { + this.url = url; + } + + public String getUrl() { + return this.url; + } + + public void setReleaseDate(Date releaseDate) { + this.releaseDate = releaseDate; + } + + public Date getReleaseDate() { + return this.releaseDate; + } + + /** + * Equals. + * @param otherReferenceGenome Other Reference Genome. + * @return true of false. + */ + @Override + public boolean equals(Object otherReferenceGenome) { + if (this == otherReferenceGenome) { + return true; + } + + if (!(otherReferenceGenome instanceof ReferenceGenome)) { + return false; + } + + ReferenceGenome that = (ReferenceGenome) otherReferenceGenome; + return + (this.genomeName).equals(that.genomeName) && + (this.species).equals(that.species) && + (this.buildName).equals(that.buildName); + } + + @Override + public int hashCode() { + int result = 3; + result = 31 * result + this.referenceGenomeId; + result = 31 * result + (this.genomeName != null ? this.genomeName.hashCode() : 0); + result = 31 * result + (this.buildName != null ? this.buildName.hashCode() : 0); + result = 31 * result + (this.species != null ? this.species.hashCode() : 0); + return result; + } + + /** + * toString() Override. + * @return string summary of reference genome + */ + @Override + public String toString() { + return "Reference Genome [referenceGenomeID=" + referenceGenomeId + ", genomeName=" + genomeName + ", species=" + + species + ", buildName=" + buildName + "]"; + } + +} \ No newline at end of file diff --git a/model/src/main/java/org/cbioportal/model/ReferenceGenomeGene.java b/model/src/main/java/org/cbioportal/model/ReferenceGenomeGene.java new file mode 100644 index 00000000000..c5f5ecc4094 --- /dev/null +++ b/model/src/main/java/org/cbioportal/model/ReferenceGenomeGene.java @@ -0,0 +1,90 @@ +/* + * This file is part of cBioPortal. + * + * cBioPortal is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +package org.cbioportal.model; + +import java.io.Serializable; +import java.net.Inet4Address; +import javax.validation.constraints.NotNull; + +/** + * Class to wrap Reference Genome Gene. + * @author Kelsey Zhu + */ +public class ReferenceGenomeGene implements Serializable { + @NotNull + private Integer referenceGenomeId; + @NotNull + private Integer entrezGeneId; + private String hugoGeneSymbol; + private String chromosome; + private String cytoband; + private Integer length; + private Long start; + private Long end; + + public void setReferenceGenomeId(Integer referenceGenomeId) { this.referenceGenomeId = referenceGenomeId; } + + public Integer getReferenceGenomeId() { + return referenceGenomeId; + } + + public Integer getEntrezGeneId() { return entrezGeneId; } + + public void setEntrezGeneId(Integer entrezGeneId) { this.entrezGeneId = entrezGeneId; } + + public String getHugoGeneSymbol() { + return hugoGeneSymbol; + } + + public void setHugoGeneSymbol(String hugoGeneSymbol) { + this.hugoGeneSymbol = hugoGeneSymbol; + } + + public String getChromosome() { + return chromosome; + } + + public void setChromosome(String chromosome) { + this.chromosome = chromosome; + } + + public String getCytoband() { + return cytoband; + } + + public void setCytoband(String cytoband) { + this.cytoband = cytoband; + } + + public Integer getLength() { + return length; + } + + public void setLength(int length) { + this.length = length; + } + + public Long getStart() { return this.start; } + + public void setStart(Long start) { this.start = start; } + + public Long getEnd() { return this.end = end; } + + public void setEnd(Long end) { this.end = end; } + +} diff --git a/persistence/persistence-api/src/main/java/org/cbioportal/persistence/GeneRepository.java b/persistence/persistence-api/src/main/java/org/cbioportal/persistence/GeneRepository.java index d962e78a325..89e36fde7e4 100644 --- a/persistence/persistence-api/src/main/java/org/cbioportal/persistence/GeneRepository.java +++ b/persistence/persistence-api/src/main/java/org/cbioportal/persistence/GeneRepository.java @@ -43,6 +43,8 @@ List getAllGenes(String keyword, String alias, String projection, Integer String direction); BaseMeta getMetaGenes(String keyword, String alias); + + Gene getGeneByGeneticEntityId(Integer geneticEntityId); Gene getGeneByEntrezGeneId(Integer entrezGeneId); diff --git a/persistence/persistence-api/src/main/java/org/cbioportal/persistence/ReferenceGenomeGeneRepository.java b/persistence/persistence-api/src/main/java/org/cbioportal/persistence/ReferenceGenomeGeneRepository.java new file mode 100644 index 00000000000..ecf47ee4b7d --- /dev/null +++ b/persistence/persistence-api/src/main/java/org/cbioportal/persistence/ReferenceGenomeGeneRepository.java @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2016 Memorial Sloan Kettering Cancer Center. + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY, WITHOUT EVEN THE IMPLIED WARRANTY OF MERCHANTABILITY OR FITNESS + * FOR A PARTICULAR PURPOSE. The software and documentation provided hereunder + * is on an "as is" basis, and Memorial Sloan-Kettering Cancer Center has no + * obligations to provide maintenance, support, updates, enhancements or + * modifications. In no event shall Memorial Sloan-Kettering Cancer Center be + * liable to any party for direct, indirect, special, incidental or + * consequential damages, including lost profits, arising out of the use of this + * software and its documentation, even if Memorial Sloan-Kettering Cancer + * Center has been advised of the possibility of such damage. + */ + +/* + * This file is part of cBioPortal. + * + * cBioPortal is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ +package org.cbioportal.persistence; + +import org.cbioportal.model.ReferenceGenomeGene; +import java.util.List; + +public interface ReferenceGenomeGeneRepository { + List getAllGenesByGenomeName(String genomeName); + List getGenesByHugoGeneSymbolsAndGenomeName(List geneIds, String genomeName); + List getGenesByGenomeName(List geneIds, String genomeName); + ReferenceGenomeGene getReferenceGenomeGene(Integer geneId, String genomeName); + ReferenceGenomeGene getReferenceGenomeGeneByEntityId(Integer geneticEntityId, String genomeName); +} \ No newline at end of file diff --git a/persistence/persistence-mybatis/src/main/java/org/cbioportal/persistence/mybatis/GeneMapper.java b/persistence/persistence-mybatis/src/main/java/org/cbioportal/persistence/mybatis/GeneMapper.java index aa2942a5bad..79cc63318df 100644 --- a/persistence/persistence-mybatis/src/main/java/org/cbioportal/persistence/mybatis/GeneMapper.java +++ b/persistence/persistence-mybatis/src/main/java/org/cbioportal/persistence/mybatis/GeneMapper.java @@ -13,6 +13,8 @@ List getGenes(String keyword, String alias, String projection, Integer lim BaseMeta getMetaGenes(String keyword, String alias); + Gene getGeneByGeneticEntityId(Integer geneticEntityId, String projection); + Gene getGeneByEntrezGeneId(Integer entrezGeneId, String projection); Gene getGeneByHugoGeneSymbol(String hugoGeneSymbol, String projection); diff --git a/persistence/persistence-mybatis/src/main/java/org/cbioportal/persistence/mybatis/GeneMyBatisRepository.java b/persistence/persistence-mybatis/src/main/java/org/cbioportal/persistence/mybatis/GeneMyBatisRepository.java index 4a66cbb0d1a..0f5167060d3 100644 --- a/persistence/persistence-mybatis/src/main/java/org/cbioportal/persistence/mybatis/GeneMyBatisRepository.java +++ b/persistence/persistence-mybatis/src/main/java/org/cbioportal/persistence/mybatis/GeneMyBatisRepository.java @@ -33,6 +33,11 @@ public BaseMeta getMetaGenes(String keyword, String alias) { return geneMapper.getMetaGenes(keyword, alias); } + @Override + public Gene getGeneByGeneticEntityId(Integer geneticEntityId) { + return geneMapper.getGeneByGeneticEntityId(geneticEntityId, PersistenceConstants.DETAILED_PROJECTION); + } + @Override public Gene getGeneByEntrezGeneId(Integer entrezGeneId) { diff --git a/persistence/persistence-mybatis/src/main/java/org/cbioportal/persistence/mybatis/MolecularDataMyBatisRepository.java b/persistence/persistence-mybatis/src/main/java/org/cbioportal/persistence/mybatis/MolecularDataMyBatisRepository.java index 4f7da75f679..19de3a25248 100644 --- a/persistence/persistence-mybatis/src/main/java/org/cbioportal/persistence/mybatis/MolecularDataMyBatisRepository.java +++ b/persistence/persistence-mybatis/src/main/java/org/cbioportal/persistence/mybatis/MolecularDataMyBatisRepository.java @@ -18,8 +18,12 @@ public class MolecularDataMyBatisRepository implements MolecularDataRepository { @Override public String getCommaSeparatedSampleIdsOfMolecularProfile(String molecularProfileId) { - - return molecularDataMapper.getCommaSeparatedSampleIdsOfMolecularProfiles(Arrays.asList(molecularProfileId)).get(0); + try { + return molecularDataMapper.getCommaSeparatedSampleIdsOfMolecularProfiles( + Arrays.asList(molecularProfileId)).get(0); + } catch (IndexOutOfBoundsException e) { + return null; + } } @Override diff --git a/persistence/persistence-mybatis/src/main/java/org/cbioportal/persistence/mybatis/ReferenceGenomeGeneMapper.java b/persistence/persistence-mybatis/src/main/java/org/cbioportal/persistence/mybatis/ReferenceGenomeGeneMapper.java new file mode 100644 index 00000000000..39e7bdb5d50 --- /dev/null +++ b/persistence/persistence-mybatis/src/main/java/org/cbioportal/persistence/mybatis/ReferenceGenomeGeneMapper.java @@ -0,0 +1,13 @@ +package org.cbioportal.persistence.mybatis; + +import org.cbioportal.model.ReferenceGenomeGene; +import java.util.List; + +public interface ReferenceGenomeGeneMapper { + List getAllGenesByGenomeName(String genomeName, String projection); + List getGenesByHugoGeneSymbolsAndGenomeName(List geneIds, String genomeName, String projection); + List getGenesByGenomeName(List geneIds, String genomeName, String projection); + ReferenceGenomeGene getReferenceGenomeGene(Integer geneId, String genomeName, String projection); + ReferenceGenomeGene getReferenceGenomeGeneByEntityId(Integer geneticEntityId, String genomeName, String projection); +} + diff --git a/persistence/persistence-mybatis/src/main/java/org/cbioportal/persistence/mybatis/ReferenceGenomeGeneMyBatisRepository.java b/persistence/persistence-mybatis/src/main/java/org/cbioportal/persistence/mybatis/ReferenceGenomeGeneMyBatisRepository.java new file mode 100644 index 00000000000..01d04fe9be4 --- /dev/null +++ b/persistence/persistence-mybatis/src/main/java/org/cbioportal/persistence/mybatis/ReferenceGenomeGeneMyBatisRepository.java @@ -0,0 +1,47 @@ +package org.cbioportal.persistence.mybatis; + + +import java.util.List; + +import org.cbioportal.model.ReferenceGenomeGene; +import org.cbioportal.persistence.ReferenceGenomeGeneRepository; +import org.cbioportal.persistence.PersistenceConstants; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Repository; + +@Repository +public class ReferenceGenomeGeneMyBatisRepository implements ReferenceGenomeGeneRepository { + + @Autowired + private ReferenceGenomeGeneMapper referenceGenomeGeneMapper; + + @Override + public List getAllGenesByGenomeName(String genomeName) { + + return referenceGenomeGeneMapper.getAllGenesByGenomeName(genomeName, PersistenceConstants.SUMMARY_PROJECTION); + } + + @Override + public List getGenesByHugoGeneSymbolsAndGenomeName(List geneIds, String genomeName) { + + return referenceGenomeGeneMapper.getGenesByHugoGeneSymbolsAndGenomeName(geneIds, genomeName, PersistenceConstants.SUMMARY_PROJECTION); + } + + @Override + public List getGenesByGenomeName(List geneIds, String genomeName) { + + return referenceGenomeGeneMapper.getGenesByGenomeName(geneIds, genomeName, PersistenceConstants.SUMMARY_PROJECTION); + } + + @Override + public ReferenceGenomeGene getReferenceGenomeGene(Integer geneId, String genomeName) { + + return referenceGenomeGeneMapper.getReferenceGenomeGene(geneId, genomeName, PersistenceConstants.SUMMARY_PROJECTION); + } + + @Override + public ReferenceGenomeGene getReferenceGenomeGeneByEntityId(Integer geneticEntityId, String genomeName) { + return referenceGenomeGeneMapper.getReferenceGenomeGeneByEntityId(geneticEntityId, genomeName, + PersistenceConstants.SUMMARY_PROJECTION); + } +} diff --git a/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/DiscreteCopyNumberMapper.xml b/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/DiscreteCopyNumberMapper.xml index 0fc27018499..e67b9973201 100644 --- a/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/DiscreteCopyNumberMapper.xml +++ b/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/DiscreteCopyNumberMapper.xml @@ -155,7 +155,6 @@ SELECT cna_event.ENTREZ_GENE_ID AS entrezGeneId, gene.HUGO_GENE_SYMBOL AS hugoGeneSymbol, - gene.CYTOBAND AS cytoband, cna_event.ALTERATION AS alteration, COUNT(DISTINCT(sample_cna_event.SAMPLE_ID)) AS numberOfAlteredCases FROM cna_event @@ -184,13 +183,16 @@ SELECT cna_event.ENTREZ_GENE_ID AS entrezGeneId, gene.HUGO_GENE_SYMBOL AS hugoGeneSymbol, - gene.CYTOBAND AS cytoband, + reference_genome_gene.CYTOBAND as cytoband, cna_event.ALTERATION AS alteration, COUNT(DISTINCT(sample_cna_event.SAMPLE_ID)) AS numberOfAlteredCases FROM cna_event INNER JOIN sample_cna_event ON cna_event.CNA_EVENT_ID = sample_cna_event.CNA_EVENT_ID INNER JOIN genetic_profile ON sample_cna_event.GENETIC_PROFILE_ID = genetic_profile.GENETIC_PROFILE_ID INNER JOIN gene ON cna_event.ENTREZ_GENE_ID = gene.ENTREZ_GENE_ID + INNER JOIN cancer_study ON cancer_study.CANCER_STUDY_ID = genetic_profile.CANCER_STUDY_ID + INNER JOIN reference_genome_gene ON reference_genome_gene.ENTREZ_GENE_ID = cna_event.ENTREZ_GENE_ID + AND reference_genome_gene.reference_genome_id = cancer_study.reference_genome_id WHERE sample_cna_event.SAMPLE_ID IN ( @@ -243,7 +245,6 @@ SELECT cna_event.ENTREZ_GENE_ID AS entrezGeneId, gene.HUGO_GENE_SYMBOL AS hugoGeneSymbol, - gene.CYTOBAND AS cytoband, cna_event.ALTERATION AS alteration, COUNT(DISTINCT(patient.INTERNAL_ID)) AS numberOfAlteredCases FROM cna_event diff --git a/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/GeneMapper.xml b/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/GeneMapper.xml index 84bcd124d58..5942a725612 100644 --- a/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/GeneMapper.xml +++ b/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/GeneMapper.xml @@ -9,8 +9,7 @@ gene.HUGO_GENE_SYMBOL AS "${prefix}hugoGeneSymbol" , - gene.TYPE AS "${prefix}type", - gene.CYTOBAND AS "${prefix}cytoband" + gene.TYPE AS "${prefix}type" @@ -62,6 +61,15 @@ + + @@ -81,6 +84,7 @@ FROM genetic_profile INNER JOIN cancer_study ON genetic_profile.CANCER_STUDY_ID = cancer_study.CANCER_STUDY_ID + INNER JOIN reference_genome ON cancer_study.REFERENCE_GENOME_ID = reference_genome.REFERENCE_GENOME_ID genetic_profile.STABLE_ID IN @@ -112,6 +116,7 @@ FROM genetic_profile INNER JOIN cancer_study ON genetic_profile.CANCER_STUDY_ID = cancer_study.CANCER_STUDY_ID + INNER JOIN reference_genome ON cancer_study.REFERENCE_GENOME_ID = reference_genome.REFERENCE_GENOME_ID WHERE genetic_profile.GENETIC_PROFILE_ID in (SELECT REFERRED_GENETIC_PROFILE_ID @@ -129,6 +134,7 @@ FROM genetic_profile INNER JOIN cancer_study ON genetic_profile.CANCER_STUDY_ID = cancer_study.CANCER_STUDY_ID + INNER JOIN reference_genome ON cancer_study.REFERENCE_GENOME_ID = reference_genome.REFERENCE_GENOME_ID WHERE genetic_profile.GENETIC_PROFILE_ID in (SELECT REFERRING_GENETIC_PROFILE_ID diff --git a/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/MutationMapper.xml b/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/MutationMapper.xml index faa2022b4c8..167921f214c 100644 --- a/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/MutationMapper.xml +++ b/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/MutationMapper.xml @@ -20,6 +20,7 @@ mutation.NORMAL_ALT_COUNT AS normalAltCount, mutation.NORMAL_REF_COUNT AS normalRefCount, mutation.AMINO_ACID_CHANGE AS aminoAcidChange, + mutation_event.CHR as chr, mutation_event.START_POSITION AS startPosition, mutation_event.END_POSITION AS endPosition, mutation_event.REFERENCE_ALLELE AS referenceAllele, diff --git a/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/PatientMapper.xml b/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/PatientMapper.xml index 514ac0a741e..768e9f3c083 100644 --- a/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/PatientMapper.xml +++ b/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/PatientMapper.xml @@ -20,6 +20,7 @@ FROM patient INNER JOIN cancer_study ON patient.CANCER_STUDY_ID = cancer_study.CANCER_STUDY_ID + INNER JOIN reference_genome ON cancer_study.REFERENCE_GENOME_ID = reference_genome.REFERENCE_GENOME_ID @@ -87,6 +88,7 @@ FROM sample INNER JOIN patient ON sample.PATIENT_ID = patient.INTERNAL_ID INNER JOIN cancer_study ON patient.CANCER_STUDY_ID = cancer_study.CANCER_STUDY_ID + INNER JOIN reference_genome ON cancer_study.REFERENCE_GENOME_ID = reference_genome.REFERENCE_GENOME_ID WHERE cancer_study.CANCER_STUDY_IDENTIFIER = #{studyIds[0]} AND diff --git a/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/ReferenceGenomeGeneMapper.xml b/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/ReferenceGenomeGeneMapper.xml new file mode 100644 index 00000000000..d13293cacd1 --- /dev/null +++ b/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/ReferenceGenomeGeneMapper.xml @@ -0,0 +1,110 @@ + + + + + + + + reference_genome_gene.ENTREZ_GENE_ID AS "${prefix}entrezGeneId", + reference_genome_gene.REFERENCE_GENOME_ID AS "${prefix}referenceGenomeId" + + , + gene.hugo_gene_symbol AS "${prefix}hugoGeneSymbol", + reference_genome_gene.CHR AS "${prefix}chromosome", + reference_genome_gene.CYTOBAND AS "${prefix}cytoband", + reference_genome_gene.EXONIC_LENGTH AS "${prefix}length", + reference_genome_gene.START AS "${prefix}start", + reference_genome_gene.END AS "${prefix}end" + + + + + + + + + + + + diff --git a/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/SampleListMapper.xml b/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/SampleListMapper.xml index 8daeafee23b..9d09f23c0b3 100644 --- a/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/SampleListMapper.xml +++ b/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/SampleListMapper.xml @@ -30,6 +30,7 @@ FROM sample_list INNER JOIN cancer_study ON sample_list.CANCER_STUDY_ID = cancer_study.CANCER_STUDY_ID + INNER JOIN reference_genome ON cancer_study.REFERENCE_GENOME_ID = reference_genome.REFERENCE_GENOME_ID WHERE cancer_study.CANCER_STUDY_IDENTIFIER = #{studyId} @@ -61,6 +62,7 @@ FROM sample_list INNER JOIN cancer_study ON sample_list.CANCER_STUDY_ID = cancer_study.CANCER_STUDY_ID + INNER JOIN reference_genome ON cancer_study.REFERENCE_GENOME_ID = reference_genome.REFERENCE_GENOME_ID WHERE sample_list.STABLE_ID = #{sampleListId} @@ -71,6 +73,7 @@ FROM sample_list INNER JOIN cancer_study ON sample_list.CANCER_STUDY_ID = cancer_study.CANCER_STUDY_ID + INNER JOIN reference_genome ON cancer_study.REFERENCE_GENOME_ID = reference_genome.REFERENCE_GENOME_ID sample_list.STABLE_ID IN diff --git a/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/SampleMapper.xml b/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/SampleMapper.xml index 577e24c0e14..d3f34ec412a 100644 --- a/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/SampleMapper.xml +++ b/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/SampleMapper.xml @@ -26,6 +26,7 @@ FROM sample INNER JOIN patient ON sample.PATIENT_ID = patient.INTERNAL_ID INNER JOIN cancer_study ON patient.CANCER_STUDY_ID = cancer_study.CANCER_STUDY_ID + INNER JOIN reference_genome ON cancer_study.REFERENCE_GENOME_ID = reference_genome.REFERENCE_GENOME_ID diff --git a/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/SecurityMapper.xml b/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/SecurityMapper.xml index 20e2b0cf4e6..77eb4a73004 100644 --- a/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/SecurityMapper.xml +++ b/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/SecurityMapper.xml @@ -41,7 +41,7 @@ diff --git a/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/SignificantlyMutatedGeneMapper.xml b/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/SignificantlyMutatedGeneMapper.xml index 7564e80900f..b58a5a52270 100644 --- a/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/SignificantlyMutatedGeneMapper.xml +++ b/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/SignificantlyMutatedGeneMapper.xml @@ -9,7 +9,7 @@ mut_sig.CANCER_STUDY_ID AS cancerStudyId, cancer_study.CANCER_STUDY_IDENTIFIER AS cancerStudyIdentifier, gene.HUGO_GENE_SYMBOL AS hugoGeneSymbol, - mut_sig.RANK AS rank, + mut_sig.`RANK` AS `rank`, mut_sig.NumBasesCovered AS numBasesCovered, mut_sig.NumMutations AS numMutations, mut_sig.P_VALUE AS pValue, diff --git a/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/StudyMapper.xml b/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/StudyMapper.xml index d2d9cc22c3a..6ca3d9ab83d 100644 --- a/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/StudyMapper.xml +++ b/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/StudyMapper.xml @@ -18,7 +18,8 @@ cancer_study.CITATION AS "${prefix}citation", cancer_study.GROUPS AS "${prefix}groups", cancer_study.STATUS AS "${prefix}status", - cancer_study.IMPORT_DATE AS "${prefix}importDate" + cancer_study.IMPORT_DATE AS "${prefix}importDate", + reference_genome.NAME AS "${prefix}referenceGenome" @@ -42,6 +43,7 @@ FROM cancer_study INNER JOIN sample_list ON cancer_study.CANCER_STUDY_ID = sample_list.CANCER_STUDY_ID INNER JOIN sample_list_list ON sample_list.LIST_ID = sample_list_list.LIST_ID + INNER JOIN reference_genome ON cancer_study.REFERENCE_GENOME_ID = reference_genome.REFERENCE_GENOME_ID INNER JOIN type_of_cancer ON cancer_study.TYPE_OF_CANCER_ID = type_of_cancer.TYPE_OF_CANCER_ID diff --git a/persistence/persistence-mybatis/src/test/java/org/cbioportal/persistence/mybatis/DiscreteCopyNumberMyBatisRepositoryTest.java b/persistence/persistence-mybatis/src/test/java/org/cbioportal/persistence/mybatis/DiscreteCopyNumberMyBatisRepositoryTest.java index 5b2005df8c9..4130e664298 100644 --- a/persistence/persistence-mybatis/src/test/java/org/cbioportal/persistence/mybatis/DiscreteCopyNumberMyBatisRepositoryTest.java +++ b/persistence/persistence-mybatis/src/test/java/org/cbioportal/persistence/mybatis/DiscreteCopyNumberMyBatisRepositoryTest.java @@ -1,8 +1,6 @@ package org.cbioportal.persistence.mybatis; -import org.cbioportal.model.CopyNumberCountByGene; -import org.cbioportal.model.DiscreteCopyNumberData; -import org.cbioportal.model.Gene; +import org.cbioportal.model.*; import org.cbioportal.model.meta.BaseMeta; import org.junit.Assert; import org.junit.Test; @@ -24,6 +22,9 @@ public class DiscreteCopyNumberMyBatisRepositoryTest { @Autowired private DiscreteCopyNumberMyBatisRepository discreteCopyNumberMyBatisRepository; + @Autowired + private ReferenceGenomeGeneMyBatisRepository refGeneMyBatisRepository; + @Test public void getDiscreteCopyNumbersInMolecularProfileBySampleListIdSummaryProjection() throws Exception { @@ -62,7 +63,7 @@ public void getDiscreteCopyNumbersInMolecularProfileBySampleListIdDetailedProjec List result = discreteCopyNumberMyBatisRepository .getDiscreteCopyNumbersInMolecularProfileBySampleListId("study_tcga_pub_gistic", "study_tcga_pub_all", entrezGeneIds, alterations, "DETAILED"); - + Assert.assertEquals(3, result.size()); DiscreteCopyNumberData discreteCopyNumberData = result.get(0); Assert.assertEquals("study_tcga_pub_gistic", discreteCopyNumberData.getMolecularProfileId()); @@ -72,8 +73,9 @@ public void getDiscreteCopyNumbersInMolecularProfileBySampleListIdDetailedProjec Gene gene = discreteCopyNumberData.getGene(); Assert.assertEquals((Integer) 207, gene.getEntrezGeneId()); Assert.assertEquals("AKT1", gene.getHugoGeneSymbol()); - Assert.assertEquals("protein-coding", gene.getType()); - Assert.assertEquals("14q32.32", gene.getCytoband()); + ReferenceGenomeGene refGene = refGeneMyBatisRepository.getReferenceGenomeGene(gene.getEntrezGeneId(), "hg19"); + Assert.assertEquals("14q32.33", refGene.getCytoband()); + Assert.assertEquals((Integer) 10838, refGene.getLength()); } @Test diff --git a/persistence/persistence-mybatis/src/test/java/org/cbioportal/persistence/mybatis/GeneMyBatisRepositoryTest.java b/persistence/persistence-mybatis/src/test/java/org/cbioportal/persistence/mybatis/GeneMyBatisRepositoryTest.java index e5c7b9e948f..9d3aff17b74 100644 --- a/persistence/persistence-mybatis/src/test/java/org/cbioportal/persistence/mybatis/GeneMyBatisRepositoryTest.java +++ b/persistence/persistence-mybatis/src/test/java/org/cbioportal/persistence/mybatis/GeneMyBatisRepositoryTest.java @@ -74,7 +74,6 @@ public void getAllGenesSummaryProjection() throws Exception { Assert.assertEquals((Integer) 207, gene.getEntrezGeneId()); Assert.assertEquals("AKT1", gene.getHugoGeneSymbol()); Assert.assertEquals("protein-coding", gene.getType()); - Assert.assertEquals("14q32.32", gene.getCytoband()); } @Test @@ -87,7 +86,6 @@ public void getAllGenesDetailedProjection() throws Exception { Assert.assertEquals((Integer) 207, gene.getEntrezGeneId()); Assert.assertEquals("AKT1", gene.getHugoGeneSymbol()); Assert.assertEquals("protein-coding", gene.getType()); - Assert.assertEquals("14q32.32", gene.getCytoband()); } @Test @@ -138,7 +136,6 @@ public void getGeneByEntrezGeneId() throws Exception { Assert.assertEquals((Integer) 207, result.getEntrezGeneId()); Assert.assertEquals("AKT1", result.getHugoGeneSymbol()); Assert.assertEquals("protein-coding", result.getType()); - Assert.assertEquals("14q32.32", result.getCytoband()); } @Test @@ -157,7 +154,6 @@ public void getGeneByHugoGeneSymbol() throws Exception { Assert.assertEquals((Integer) 207, result.getEntrezGeneId()); Assert.assertEquals("AKT1", result.getHugoGeneSymbol()); Assert.assertEquals("protein-coding", result.getType()); - Assert.assertEquals("14q32.32", result.getCytoband()); } @Test @@ -210,7 +206,6 @@ public void fetchGenesByEntrezGeneIds() throws Exception { Assert.assertEquals((Integer) 207, gene.getEntrezGeneId()); Assert.assertEquals("AKT1", gene.getHugoGeneSymbol()); Assert.assertEquals("protein-coding", gene.getType()); - Assert.assertEquals("14q32.32", gene.getCytoband()); } @Test @@ -227,7 +222,6 @@ public void fetchGenesByHugoGeneSymbols() throws Exception { Assert.assertEquals((Integer) 207, gene.getEntrezGeneId()); Assert.assertEquals("AKT1", gene.getHugoGeneSymbol()); Assert.assertEquals("protein-coding", gene.getType()); - Assert.assertEquals("14q32.32", gene.getCytoband()); } @Test diff --git a/persistence/persistence-mybatis/src/test/java/org/cbioportal/persistence/mybatis/MolecularProfileMyBatisRepositoryTest.java b/persistence/persistence-mybatis/src/test/java/org/cbioportal/persistence/mybatis/MolecularProfileMyBatisRepositoryTest.java index 0e3549d149b..680d427e917 100644 --- a/persistence/persistence-mybatis/src/test/java/org/cbioportal/persistence/mybatis/MolecularProfileMyBatisRepositoryTest.java +++ b/persistence/persistence-mybatis/src/test/java/org/cbioportal/persistence/mybatis/MolecularProfileMyBatisRepositoryTest.java @@ -88,8 +88,8 @@ public void getAllMolecularProfilesDetailedProjection() throws Exception { " Invasive Carcinoma project. 825 cases.
Nature 2012. Raw data via the TCGA Data Portal.", cancerStudy.getDescription()); Assert.assertEquals(true, cancerStudy.getPublicStudy()); - Assert.assertEquals("23000897,26451490", cancerStudy.getPmid()); - Assert.assertEquals("TCGA, Nature 2012, ...", cancerStudy.getCitation()); + Assert.assertEquals("23000897", cancerStudy.getPmid()); + Assert.assertEquals("TCGA, Nature 2012", cancerStudy.getCitation()); Assert.assertEquals("SU2C-PI3K;PUBLIC;GDAC", cancerStudy.getGroups()); Assert.assertEquals((Integer)0 , cancerStudy.getStatus()); } @@ -165,8 +165,8 @@ public void getMolecularProfile() throws Exception { " Invasive Carcinoma project. 825 cases.
Nature 2012. Raw data via the TCGA Data Portal.", cancerStudy.getDescription()); Assert.assertEquals(true, cancerStudy.getPublicStudy()); - Assert.assertEquals("23000897,26451490", cancerStudy.getPmid()); - Assert.assertEquals("TCGA, Nature 2012, ...", cancerStudy.getCitation()); + Assert.assertEquals("23000897", cancerStudy.getPmid()); + Assert.assertEquals("TCGA, Nature 2012", cancerStudy.getCitation()); Assert.assertEquals("SU2C-PI3K;PUBLIC;GDAC", cancerStudy.getGroups()); Assert.assertEquals((Integer)0 , cancerStudy.getStatus()); } diff --git a/persistence/persistence-mybatis/src/test/java/org/cbioportal/persistence/mybatis/MutationMyBatisRepositoryTest.java b/persistence/persistence-mybatis/src/test/java/org/cbioportal/persistence/mybatis/MutationMyBatisRepositoryTest.java index dfcde1dc5c5..f895d6c763a 100644 --- a/persistence/persistence-mybatis/src/test/java/org/cbioportal/persistence/mybatis/MutationMyBatisRepositoryTest.java +++ b/persistence/persistence-mybatis/src/test/java/org/cbioportal/persistence/mybatis/MutationMyBatisRepositoryTest.java @@ -157,7 +157,6 @@ public void getMutationsInMolecularProfileBySampleListIdDetailedProjection() thr Assert.assertEquals((Integer) 207, gene.getEntrezGeneId()); Assert.assertEquals("AKT1", gene.getHugoGeneSymbol()); Assert.assertEquals("protein-coding", gene.getType()); - Assert.assertEquals("14q32.32", gene.getCytoband()); } @Test diff --git a/persistence/persistence-mybatis/src/test/java/org/cbioportal/persistence/mybatis/PatientMyBatisRepositoryTest.java b/persistence/persistence-mybatis/src/test/java/org/cbioportal/persistence/mybatis/PatientMyBatisRepositoryTest.java index 1ebbb89e4ad..bfa31f18f39 100644 --- a/persistence/persistence-mybatis/src/test/java/org/cbioportal/persistence/mybatis/PatientMyBatisRepositoryTest.java +++ b/persistence/persistence-mybatis/src/test/java/org/cbioportal/persistence/mybatis/PatientMyBatisRepositoryTest.java @@ -93,8 +93,8 @@ public void getAllPatientsInStudyDetailedProjection() throws Exception { " Invasive Carcinoma project. 825 cases.
Nature 2012. Raw data via the TCGA Data Portal.", cancerStudy.getDescription()); Assert.assertEquals(true, cancerStudy.getPublicStudy()); - Assert.assertEquals("23000897,26451490", cancerStudy.getPmid()); - Assert.assertEquals("TCGA, Nature 2012, ...", cancerStudy.getCitation()); + Assert.assertEquals("23000897", cancerStudy.getPmid()); + Assert.assertEquals("TCGA, Nature 2012", cancerStudy.getCitation()); Assert.assertEquals("SU2C-PI3K;PUBLIC;GDAC", cancerStudy.getGroups()); Assert.assertEquals((Integer)0 , cancerStudy.getStatus()); } @@ -158,8 +158,8 @@ public void getPatientInStudy() throws Exception { " Invasive Carcinoma project. 825 cases.
Nature 2012. Raw data via the TCGA Data Portal.", cancerStudy.getDescription()); Assert.assertEquals(true, cancerStudy.getPublicStudy()); - Assert.assertEquals("23000897,26451490", cancerStudy.getPmid()); - Assert.assertEquals("TCGA, Nature 2012, ...", cancerStudy.getCitation()); + Assert.assertEquals("23000897", cancerStudy.getPmid()); + Assert.assertEquals("TCGA, Nature 2012", cancerStudy.getCitation()); Assert.assertEquals("SU2C-PI3K;PUBLIC;GDAC", cancerStudy.getGroups()); Assert.assertEquals((Integer)0 , cancerStudy.getStatus()); } diff --git a/persistence/persistence-mybatis/src/test/java/org/cbioportal/persistence/mybatis/ReferenceGenomeGeneMyBatisRepositoryTest.java b/persistence/persistence-mybatis/src/test/java/org/cbioportal/persistence/mybatis/ReferenceGenomeGeneMyBatisRepositoryTest.java new file mode 100644 index 00000000000..6540612ecf9 --- /dev/null +++ b/persistence/persistence-mybatis/src/test/java/org/cbioportal/persistence/mybatis/ReferenceGenomeGeneMyBatisRepositoryTest.java @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2016 Memorial Sloan-Kettering Cancer Center. + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY, WITHOUT EVEN THE IMPLIED WARRANTY OF MERCHANTABILITY OR FITNESS + * FOR A PARTICULAR PURPOSE. The software and documentation provided hereunder + * is on an "as is" basis, and Memorial Sloan-Kettering Cancer Center has no + * obligations to provide maintenance, support, updates, enhancements or + * modifications. In no event shall Memorial Sloan-Kettering Cancer Center be + * liable to any party for direct, indirect, special, incidental or + * consequential damages, including lost profits, arising out of the use of this + * software and its documentation, even if Memorial Sloan-Kettering Cancer + * Center has been advised of the possibility of such damage. + */ + +/* + * This file is part of cBioPortal. + * + * cBioPortal is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ +package org.cbioportal.persistence.mybatis; + +import org.cbioportal.model.ReferenceGenome; +import org.cbioportal.model.ReferenceGenomeGene; +import org.cbioportal.model.Gene; +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Configurable; +import org.springframework.test.context.ContextConfiguration; +import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; + +import java.util.ArrayList; +import java.util.List; + +@RunWith(SpringJUnit4ClassRunner.class) +@ContextConfiguration("/testContextDatabase.xml") +@Configurable +public class ReferenceGenomeGeneMyBatisRepositoryTest { + + @Autowired + private ReferenceGenomeGeneMyBatisRepository refGeneMyBatisRepository; + + @Autowired + private GeneMyBatisRepository geneMyBatisRepository; + + @Test + public void getAllGenesByGenomeName() throws Exception { + + List result = refGeneMyBatisRepository.getAllGenesByGenomeName(ReferenceGenome.HOMO_SAPIENS_DEFAULT_GENOME_NAME); + + ReferenceGenomeGene refGene = result.get(0); + Gene gene = geneMyBatisRepository.getGeneByEntrezGeneId(refGene.getEntrezGeneId()); + Assert.assertEquals((Integer) 207, gene.getEntrezGeneId()); + Assert.assertEquals("AKT1", gene.getHugoGeneSymbol()); + } + + + @Test + public void getReferenceGenomeGene() throws Exception { + String genomeName = ReferenceGenome.HOMO_SAPIENS_DEFAULT_GENOME_NAME; + ReferenceGenomeGene refGene = refGeneMyBatisRepository.getReferenceGenomeGene(207, genomeName); + Gene gene = geneMyBatisRepository.getGeneByEntrezGeneId(refGene.getEntrezGeneId()); + Assert.assertEquals((Integer) 207, gene.getEntrezGeneId()); + Assert.assertEquals("AKT1", gene.getHugoGeneSymbol()); + Assert.assertEquals("14q32.33", refGene.getCytoband()); + } + + @Test + public void getReferenceGenomeGeneByEntityId() throws Exception { + ReferenceGenomeGene refGene = refGeneMyBatisRepository.getReferenceGenomeGeneByEntityId((Integer)2, "hg38"); + Gene gene = geneMyBatisRepository.getGeneByEntrezGeneId(refGene.getEntrezGeneId()); + Assert.assertEquals((Integer) 208, gene.getEntrezGeneId()); + Assert.assertEquals("AKT2", gene.getHugoGeneSymbol()); + Assert.assertEquals("19q13.2", refGene.getCytoband()); + } + + + @Test + public void getGenesByGenomeName() throws Exception { + + List geneIds = new ArrayList<>(); + geneIds.add((Integer)207); + geneIds.add((Integer)208); + + List result = refGeneMyBatisRepository.getGenesByGenomeName(geneIds, ReferenceGenome.HOMO_SAPIENS_DEFAULT_GENOME_NAME); + + Assert.assertEquals(2, result.size()); + ReferenceGenomeGene refGene = result.get(0); + Gene gene = geneMyBatisRepository.getGeneByEntrezGeneId(refGene.getEntrezGeneId()); + Assert.assertEquals((Integer) 207, gene.getEntrezGeneId()); + Assert.assertEquals("AKT1", gene.getHugoGeneSymbol()); + Assert.assertEquals("14q32.33", refGene.getCytoband()); + } + +} + diff --git a/persistence/persistence-mybatis/src/test/java/org/cbioportal/persistence/mybatis/SampleListMyBatisRepositoryTest.java b/persistence/persistence-mybatis/src/test/java/org/cbioportal/persistence/mybatis/SampleListMyBatisRepositoryTest.java index 4d913c37c92..83b8f4805f9 100644 --- a/persistence/persistence-mybatis/src/test/java/org/cbioportal/persistence/mybatis/SampleListMyBatisRepositoryTest.java +++ b/persistence/persistence-mybatis/src/test/java/org/cbioportal/persistence/mybatis/SampleListMyBatisRepositoryTest.java @@ -80,8 +80,8 @@ public void getAllSampleListsDetailedProjection() throws Exception { " Invasive Carcinoma project. 825 cases.
Nature 2012. Raw data via the TCGA Data Portal.", cancerStudy.getDescription()); Assert.assertEquals(true, cancerStudy.getPublicStudy()); - Assert.assertEquals("23000897,26451490", cancerStudy.getPmid()); - Assert.assertEquals("TCGA, Nature 2012, ...", cancerStudy.getCitation()); + Assert.assertEquals("23000897", cancerStudy.getPmid()); + Assert.assertEquals("TCGA, Nature 2012", cancerStudy.getCitation()); Assert.assertEquals("SU2C-PI3K;PUBLIC;GDAC", cancerStudy.getGroups()); Assert.assertEquals((Integer)0 , cancerStudy.getStatus()); } @@ -148,8 +148,8 @@ public void getSampleList() throws Exception { " Invasive Carcinoma project. 825 cases.
Nature 2012. Raw data via the TCGA Data Portal.", cancerStudy.getDescription()); Assert.assertEquals(true, cancerStudy.getPublicStudy()); - Assert.assertEquals("23000897,26451490", cancerStudy.getPmid()); - Assert.assertEquals("TCGA, Nature 2012, ...", cancerStudy.getCitation()); + Assert.assertEquals("23000897", cancerStudy.getPmid()); + Assert.assertEquals("TCGA, Nature 2012", cancerStudy.getCitation()); Assert.assertEquals("SU2C-PI3K;PUBLIC;GDAC", cancerStudy.getGroups()); Assert.assertEquals((Integer)0 , cancerStudy.getStatus()); } @@ -218,8 +218,8 @@ public void getAllSampleListsInStudyDetailedProjection() throws Exception { " Invasive Carcinoma project. 825 cases.
Nature 2012. Raw data via the TCGA Data Portal.", cancerStudy.getDescription()); Assert.assertEquals(true, cancerStudy.getPublicStudy()); - Assert.assertEquals("23000897,26451490", cancerStudy.getPmid()); - Assert.assertEquals("TCGA, Nature 2012, ...", cancerStudy.getCitation()); + Assert.assertEquals("23000897", cancerStudy.getPmid()); + Assert.assertEquals("TCGA, Nature 2012", cancerStudy.getCitation()); Assert.assertEquals("SU2C-PI3K;PUBLIC;GDAC", cancerStudy.getGroups()); Assert.assertEquals((Integer)0 , cancerStudy.getStatus()); } diff --git a/persistence/persistence-mybatis/src/test/java/org/cbioportal/persistence/mybatis/SampleMyBatisRepositoryTest.java b/persistence/persistence-mybatis/src/test/java/org/cbioportal/persistence/mybatis/SampleMyBatisRepositoryTest.java index 69ed15989e7..499a1ebf369 100644 --- a/persistence/persistence-mybatis/src/test/java/org/cbioportal/persistence/mybatis/SampleMyBatisRepositoryTest.java +++ b/persistence/persistence-mybatis/src/test/java/org/cbioportal/persistence/mybatis/SampleMyBatisRepositoryTest.java @@ -81,8 +81,8 @@ public void getAllSamplesInStudyDetailedProjection() throws Exception { " Invasive Carcinoma project. 825 cases.
Nature 2012. Raw data via the TCGA Data Portal.", cancerStudy.getDescription()); Assert.assertEquals(true, cancerStudy.getPublicStudy()); - Assert.assertEquals("23000897,26451490", cancerStudy.getPmid()); - Assert.assertEquals("TCGA, Nature 2012, ...", cancerStudy.getCitation()); + Assert.assertEquals("23000897", cancerStudy.getPmid()); + Assert.assertEquals("TCGA, Nature 2012", cancerStudy.getCitation()); Assert.assertEquals("SU2C-PI3K;PUBLIC;GDAC", cancerStudy.getGroups()); Assert.assertEquals((Integer)0 , cancerStudy.getStatus()); } @@ -152,8 +152,8 @@ public void getSampleInStudy() throws Exception { " Invasive Carcinoma project. 825 cases.
Nature 2012. Raw data via the TCGA Data Portal.", cancerStudy.getDescription()); Assert.assertEquals(true, cancerStudy.getPublicStudy()); - Assert.assertEquals("23000897,26451490", cancerStudy.getPmid()); - Assert.assertEquals("TCGA, Nature 2012, ...", cancerStudy.getCitation()); + Assert.assertEquals("23000897", cancerStudy.getPmid()); + Assert.assertEquals("TCGA, Nature 2012", cancerStudy.getCitation()); Assert.assertEquals("SU2C-PI3K;PUBLIC;GDAC", cancerStudy.getGroups()); Assert.assertEquals((Integer)0 , cancerStudy.getStatus()); } @@ -215,8 +215,8 @@ public void getAllSamplesOfPatientInStudyDetailedProjection() throws Exception { " Invasive Carcinoma project. 825 cases.
Nature 2012. Raw data via the TCGA Data Portal.", cancerStudy.getDescription()); Assert.assertEquals(true, cancerStudy.getPublicStudy()); - Assert.assertEquals("23000897,26451490", cancerStudy.getPmid()); - Assert.assertEquals("TCGA, Nature 2012, ...", cancerStudy.getCitation()); + Assert.assertEquals("23000897", cancerStudy.getPmid()); + Assert.assertEquals("TCGA, Nature 2012", cancerStudy.getCitation()); Assert.assertEquals("SU2C-PI3K;PUBLIC;GDAC", cancerStudy.getGroups()); Assert.assertEquals((Integer)0 , cancerStudy.getStatus()); } diff --git a/persistence/persistence-mybatis/src/test/java/org/cbioportal/persistence/mybatis/StudyMyBatisRepositoryTest.java b/persistence/persistence-mybatis/src/test/java/org/cbioportal/persistence/mybatis/StudyMyBatisRepositoryTest.java index caa5a192aae..f2149143c37 100644 --- a/persistence/persistence-mybatis/src/test/java/org/cbioportal/persistence/mybatis/StudyMyBatisRepositoryTest.java +++ b/persistence/persistence-mybatis/src/test/java/org/cbioportal/persistence/mybatis/StudyMyBatisRepositoryTest.java @@ -56,8 +56,8 @@ public void getAllStudiesSummaryProjection() throws Exception { " Invasive Carcinoma project. 825 cases.
Nature 2012. Raw data via the TCGA Data Portal.", cancerStudy.getDescription()); Assert.assertEquals(true, cancerStudy.getPublicStudy()); - Assert.assertEquals("23000897,26451490", cancerStudy.getPmid()); - Assert.assertEquals("TCGA, Nature 2012, ...", cancerStudy.getCitation()); + Assert.assertEquals("23000897", cancerStudy.getPmid()); + Assert.assertEquals("TCGA, Nature 2012", cancerStudy.getCitation()); Assert.assertEquals("SU2C-PI3K;PUBLIC;GDAC", cancerStudy.getGroups()); Assert.assertEquals((Integer) 0 , cancerStudy.getStatus()); Assert.assertEquals(simpleDateFormat.parse("2011-12-18 13:17:17+00:00"), cancerStudy.getImportDate()); @@ -84,8 +84,8 @@ public void getAllStudiesDetailedProjection() throws Exception { " Invasive Carcinoma project. 825 cases.
Nature 2012. Raw data via the TCGA Data Portal.", cancerStudy.getDescription()); Assert.assertEquals(true, cancerStudy.getPublicStudy()); - Assert.assertEquals("23000897,26451490", cancerStudy.getPmid()); - Assert.assertEquals("TCGA, Nature 2012, ...", cancerStudy.getCitation()); + Assert.assertEquals("23000897", cancerStudy.getPmid()); + Assert.assertEquals("TCGA, Nature 2012", cancerStudy.getCitation()); Assert.assertEquals("SU2C-PI3K;PUBLIC;GDAC", cancerStudy.getGroups()); Assert.assertEquals((Integer) 0 , cancerStudy.getStatus()); Assert.assertEquals(simpleDateFormat.parse("2011-12-18 13:17:17"), cancerStudy.getImportDate()); @@ -160,8 +160,8 @@ public void getStudy() throws Exception { " Invasive Carcinoma project. 825 cases.
Nature 2012. Raw data via the TCGA Data Portal.", result.getDescription()); Assert.assertEquals(true, result.getPublicStudy()); - Assert.assertEquals("23000897,26451490", result.getPmid()); - Assert.assertEquals("TCGA, Nature 2012, ...", result.getCitation()); + Assert.assertEquals("23000897", result.getPmid()); + Assert.assertEquals("TCGA, Nature 2012", result.getCitation()); Assert.assertEquals("SU2C-PI3K;PUBLIC;GDAC", result.getGroups()); Assert.assertEquals((Integer) 0 , result.getStatus()); Assert.assertEquals(simpleDateFormat.parse("2011-12-18 13:17:17"), result.getImportDate()); @@ -203,8 +203,8 @@ public void fetchStudies() throws Exception { " Invasive Carcinoma project. 825 cases.
Nature 2012. Raw data via the TCGA Data Portal.", cancerStudy.getDescription()); Assert.assertEquals(true, cancerStudy.getPublicStudy()); - Assert.assertEquals("23000897,26451490", cancerStudy.getPmid()); - Assert.assertEquals("TCGA, Nature 2012, ...", cancerStudy.getCitation()); + Assert.assertEquals("23000897", cancerStudy.getPmid()); + Assert.assertEquals("TCGA, Nature 2012", cancerStudy.getCitation()); Assert.assertEquals("SU2C-PI3K;PUBLIC;GDAC", cancerStudy.getGroups()); Assert.assertEquals((Integer) 0 , cancerStudy.getStatus()); Assert.assertEquals(simpleDateFormat.parse("2011-12-18 13:17:17"), cancerStudy.getImportDate()); diff --git a/persistence/persistence-mybatis/src/test/resources/testSql.sql b/persistence/persistence-mybatis/src/test/resources/testSql.sql index 9d02a6db0ce..216740b2233 100644 --- a/persistence/persistence-mybatis/src/test/resources/testSql.sql +++ b/persistence/persistence-mybatis/src/test/resources/testSql.sql @@ -1,8 +1,11 @@ INSERT INTO type_of_cancer (TYPE_OF_CANCER_ID,NAME,CLINICAL_TRIAL_KEYWORDS,DEDICATED_COLOR,SHORT_NAME,PARENT) VALUES ('brca','Breast Invasive Carcinoma','breast,breast invasive','HotPink','Breast','tissue'); INSERT INTO type_of_cancer (TYPE_OF_CANCER_ID,NAME,CLINICAL_TRIAL_KEYWORDS,DEDICATED_COLOR,SHORT_NAME,PARENT) VALUES ('acc','Adrenocortical Carcinoma','adrenocortical carcinoma','Purple','ACC','adrenal_gland'); -INSERT INTO cancer_study (CANCER_STUDY_ID,CANCER_STUDY_IDENTIFIER,TYPE_OF_CANCER_ID,NAME,SHORT_NAME,DESCRIPTION,PUBLIC,PMID,CITATION,GROUPS,STATUS,IMPORT_DATE) VALUES(1,'study_tcga_pub','brca','Breast Invasive Carcinoma (TCGA, Nature 2012)','BRCA (TCGA)','The Cancer Genome Atlas (TCGA) Breast Invasive Carcinoma project. 825 cases.
Nature 2012. Raw data via the TCGA Data Portal.',1,'23000897,26451490','TCGA, Nature 2012, ...','SU2C-PI3K;PUBLIC;GDAC',0,'2011-12-18 13:17:17+00:00'); -INSERT INTO cancer_study (CANCER_STUDY_ID,CANCER_STUDY_IDENTIFIER,TYPE_OF_CANCER_ID,NAME,SHORT_NAME,DESCRIPTION,PUBLIC,PMID,CITATION,GROUPS,STATUS,IMPORT_DATE) VALUES(2,'acc_tcga','acc','Adrenocortical Carcinoma (TCGA, Provisional)','ACC (TCGA)','TCGA Adrenocortical Carcinoma; raw data at the NCI.',1,'23000897','TCGA, Nature 2012','SU2C-PI3K;PUBLIC;GDAC',0,'2013-10-12 11:11:15+00:00'); +INSERT INTO `reference_genome` VALUES (1, 'human', 'hg19', 'GRCh37', NULL, 'http://hgdownload.cse.ucsc.edu/goldenPath/hg19/bigZips', '2009-02-01 00:00:00'); +INSERT INTO `reference_genome` VALUES (2, 'human', 'hg38', 'GRCh38', NULL, 'http://hgdownload.cse.ucsc.edu/goldenPath/hg38/bigZips', '2013-12-01 00:00:00'); + +INSERT INTO cancer_study (CANCER_STUDY_ID,CANCER_STUDY_IDENTIFIER,TYPE_OF_CANCER_ID,NAME,SHORT_NAME,DESCRIPTION,PUBLIC,PMID,CITATION,GROUPS,STATUS,IMPORT_DATE,REFERENCE_GENOME_ID) VALUES(1,'study_tcga_pub','brca','Breast Invasive Carcinoma (TCGA, Nature 2012)','BRCA (TCGA)','The Cancer Genome Atlas (TCGA) Breast Invasive Carcinoma project. 825 cases.
Nature 2012. Raw data via the TCGA Data Portal.',1,'23000897','TCGA, Nature 2012','SU2C-PI3K;PUBLIC;GDAC',0,'2011-12-18 13:17:17+00:00',1); +INSERT INTO cancer_study (CANCER_STUDY_ID,CANCER_STUDY_IDENTIFIER,TYPE_OF_CANCER_ID,NAME,SHORT_NAME,DESCRIPTION,PUBLIC,PMID,CITATION,GROUPS,STATUS,IMPORT_DATE,REFERENCE_GENOME_ID) VALUES(2,'acc_tcga','acc','Adrenocortical Carcinoma (TCGA, Provisional)','ACC (TCGA)','TCGA Adrenocortical Carcinoma; raw data at the NCI.',1,'23000897','TCGA, Nature 2012','SU2C-PI3K;PUBLIC;GDAC',0,'2013-10-12 11:11:15+00:00',1); INSERT INTO cancer_study_tags (CANCER_STUDY_ID,TAGS) VALUES(1,'{"Analyst": {"Name": "Jack", "Email": "jack@something.com"}, "Load id": 35}'); INSERT INTO cancer_study_tags (CANCER_STUDY_ID,TAGS) VALUES(2,'{"Load id": 36}'); @@ -28,42 +31,37 @@ INSERT INTO genetic_entity (ID,ENTITY_TYPE) VALUES (18,'GENESET'); INSERT INTO genetic_entity (ID,ENTITY_TYPE) VALUES (19,'TREATMENT'); INSERT INTO genetic_entity (ID,ENTITY_TYPE) VALUES (20,'TREATMENT'); -INSERT INTO gene (ENTREZ_GENE_ID,HUGO_GENE_SYMBOL,GENETIC_ENTITY_ID,TYPE,CYTOBAND) VALUES (207,'AKT1',1,'protein-coding','14q32.32'); -INSERT INTO gene (ENTREZ_GENE_ID,HUGO_GENE_SYMBOL,GENETIC_ENTITY_ID,TYPE,CYTOBAND) VALUES (208,'AKT2',2,'protein-coding','19q13.1-q13.2'); -INSERT INTO gene (ENTREZ_GENE_ID,HUGO_GENE_SYMBOL,GENETIC_ENTITY_ID,TYPE,CYTOBAND) VALUES (10000,'AKT3',3,'protein-coding','1q44'); -INSERT INTO gene (ENTREZ_GENE_ID,HUGO_GENE_SYMBOL,GENETIC_ENTITY_ID,TYPE,CYTOBAND) VALUES (369,'ARAF',4,'protein-coding','Xp11.4-p11.2'); -INSERT INTO gene (ENTREZ_GENE_ID,HUGO_GENE_SYMBOL,GENETIC_ENTITY_ID,TYPE,CYTOBAND) VALUES (472,'ATM',5,'protein-coding','11q22-q23'); -INSERT INTO gene (ENTREZ_GENE_ID,HUGO_GENE_SYMBOL,GENETIC_ENTITY_ID,TYPE,CYTOBAND) VALUES (673,'BRAF',6,'protein-coding','7q34'); -INSERT INTO gene (ENTREZ_GENE_ID,HUGO_GENE_SYMBOL,GENETIC_ENTITY_ID,TYPE,CYTOBAND) VALUES (672,'BRCA1',7,'protein-coding','17q21'); -INSERT INTO gene (ENTREZ_GENE_ID,HUGO_GENE_SYMBOL,GENETIC_ENTITY_ID,TYPE,CYTOBAND) VALUES (675,'BRCA2',8,'protein-coding','13q12.3'); -INSERT INTO gene (ENTREZ_GENE_ID,HUGO_GENE_SYMBOL,GENETIC_ENTITY_ID,TYPE,CYTOBAND) VALUES (3265,'HRAS',9,'protein-coding','11p15.5'); -INSERT INTO gene (ENTREZ_GENE_ID,HUGO_GENE_SYMBOL,GENETIC_ENTITY_ID,TYPE,CYTOBAND) VALUES (3845,'KRAS',10,'protein-coding','12p12.1'); -INSERT INTO gene (ENTREZ_GENE_ID,HUGO_GENE_SYMBOL,GENETIC_ENTITY_ID,TYPE,CYTOBAND) VALUES (4893,'NRAS',11,'protein-coding','1p13.2'); -INSERT INTO gene (ENTREZ_GENE_ID,HUGO_GENE_SYMBOL,GENETIC_ENTITY_ID,TYPE,CYTOBAND) VALUES (79501,'OR4F5',12,'protein-coding','1p36.33'); -INSERT INTO gene (ENTREZ_GENE_ID,HUGO_GENE_SYMBOL,GENETIC_ENTITY_ID,TYPE,CYTOBAND) VALUES (148398,'SAMD11',13,'protein-coding','1p36.33'); -INSERT INTO gene (ENTREZ_GENE_ID,HUGO_GENE_SYMBOL,GENETIC_ENTITY_ID,TYPE,CYTOBAND) VALUES (26155,'NOC2L',14,'protein-coding','1p36.33'); -INSERT INTO gene (ENTREZ_GENE_ID,HUGO_GENE_SYMBOL,GENETIC_ENTITY_ID,TYPE,CYTOBAND) VALUES (2064,'ERBB2',15,'protein-coding','17q12'); -INSERT INTO gene (ENTREZ_GENE_ID,HUGO_GENE_SYMBOL,GENETIC_ENTITY_ID,TYPE,CYTOBAND) VALUES (2886,'GRB7',16,'protein-coding','17q12'); - -INSERT INTO cosmic_mutation (COSMIC_MUTATION_ID,PROTEIN_CHANGE,ENTREZ_GENE_ID,COUNT,KEYWORD) VALUES (3677745,'D45A',79501,1,'OR4F5 D45 missense'); -INSERT INTO cosmic_mutation (COSMIC_MUTATION_ID,PROTEIN_CHANGE,ENTREZ_GENE_ID,COUNT,KEYWORD) VALUES (426644,'G145C',79501,1,'OR4F5 G145 missense'); -INSERT INTO cosmic_mutation (COSMIC_MUTATION_ID,PROTEIN_CHANGE,ENTREZ_GENE_ID,COUNT,KEYWORD) VALUES (460103,'P23P',148398,1,'SAMD11 P23 silent'); -INSERT INTO cosmic_mutation (COSMIC_MUTATION_ID,PROTEIN_CHANGE,ENTREZ_GENE_ID,COUNT,KEYWORD) VALUES (4010395,'S146S',26155,1,'NOC2L S146 silent'); -INSERT INTO cosmic_mutation (COSMIC_MUTATION_ID,PROTEIN_CHANGE,ENTREZ_GENE_ID,COUNT,KEYWORD) VALUES (1290240,'M1T',26155,1,'NOC2L truncating'); -INSERT INTO cosmic_mutation (COSMIC_MUTATION_ID,PROTEIN_CHANGE,ENTREZ_GENE_ID,COUNT,KEYWORD) VALUES (4010425,'Q197*',26155,1,'NOC2L truncating'); +INSERT INTO gene (ENTREZ_GENE_ID,HUGO_GENE_SYMBOL,GENETIC_ENTITY_ID,TYPE) VALUES(207,'AKT1',1,'protein-coding'); +INSERT INTO gene (ENTREZ_GENE_ID,HUGO_GENE_SYMBOL,GENETIC_ENTITY_ID,TYPE) VALUES(208,'AKT2',2,'protein-coding'); +INSERT INTO gene (ENTREZ_GENE_ID,HUGO_GENE_SYMBOL,GENETIC_ENTITY_ID,TYPE) VALUES(10000,'AKT3',3,'protein-coding'); +INSERT INTO gene (ENTREZ_GENE_ID,HUGO_GENE_SYMBOL,GENETIC_ENTITY_ID,TYPE) VALUES(369,'ARAF',4,'protein-coding'); +INSERT INTO gene (ENTREZ_GENE_ID,HUGO_GENE_SYMBOL,GENETIC_ENTITY_ID,TYPE) VALUES(472,'ATM',5,'protein-coding'); +INSERT INTO gene (ENTREZ_GENE_ID,HUGO_GENE_SYMBOL,GENETIC_ENTITY_ID,TYPE) VALUES(673,'BRAF',6,'protein-coding'); +INSERT INTO gene (ENTREZ_GENE_ID,HUGO_GENE_SYMBOL,GENETIC_ENTITY_ID,TYPE) VALUES(672,'BRCA1',7,'protein-coding'); +INSERT INTO gene (ENTREZ_GENE_ID,HUGO_GENE_SYMBOL,GENETIC_ENTITY_ID,TYPE) VALUES(675,'BRCA2',8,'protein-coding'); +INSERT INTO gene (ENTREZ_GENE_ID,HUGO_GENE_SYMBOL,GENETIC_ENTITY_ID,TYPE) VALUES(3265,'HRAS',9,'protein-coding'); +INSERT INTO gene (ENTREZ_GENE_ID,HUGO_GENE_SYMBOL,GENETIC_ENTITY_ID,TYPE) VALUES(3845,'KRAS',10,'protein-coding'); +INSERT INTO gene (ENTREZ_GENE_ID,HUGO_GENE_SYMBOL,GENETIC_ENTITY_ID,TYPE) VALUES(4893,'NRAS',11,'protein-coding'); +INSERT INTO gene (ENTREZ_GENE_ID,HUGO_GENE_SYMBOL,GENETIC_ENTITY_ID,TYPE) VALUES(79501,'OR4F5',12,'protein-coding'); +INSERT INTO gene (ENTREZ_GENE_ID,HUGO_GENE_SYMBOL,GENETIC_ENTITY_ID,TYPE) VALUES(148398,'SAMD11',13,'protein-coding'); +INSERT INTO gene (ENTREZ_GENE_ID,HUGO_GENE_SYMBOL,GENETIC_ENTITY_ID,TYPE) VALUES(26155,'NOC2L',14,'protein-coding'); +INSERT INTO gene (ENTREZ_GENE_ID,HUGO_GENE_SYMBOL,GENETIC_ENTITY_ID,TYPE) VALUES(2064,'ERBB2',15,'protein-coding'); +INSERT INTO gene (ENTREZ_GENE_ID,HUGO_GENE_SYMBOL,GENETIC_ENTITY_ID,TYPE) VALUES(2886,'GRB7',16,'protein-coding'); +INSERT INTO cosmic_mutation (COSMIC_MUTATION_ID,PROTEIN_CHANGE,ENTREZ_GENE_ID,COUNT,KEYWORD) VALUES(3677745,'D45A',79501,1,'OR4F5 D45 missense'); +INSERT INTO cosmic_mutation (COSMIC_MUTATION_ID,PROTEIN_CHANGE,ENTREZ_GENE_ID,COUNT,KEYWORD) VALUES(426644,'G145C',79501,1,'OR4F5 G145 missense'); +INSERT INTO cosmic_mutation (COSMIC_MUTATION_ID,PROTEIN_CHANGE,ENTREZ_GENE_ID,COUNT,KEYWORD) VALUES(460103,'P23P',148398,1,'SAMD11 P23 silent'); +INSERT INTO cosmic_mutation (COSMIC_MUTATION_ID,PROTEIN_CHANGE,ENTREZ_GENE_ID,COUNT,KEYWORD) VALUES(4010395,'S146S',26155,1,'NOC2L S146 silent'); +INSERT INTO cosmic_mutation (COSMIC_MUTATION_ID,PROTEIN_CHANGE,ENTREZ_GENE_ID,COUNT,KEYWORD) VALUES(1290240,'M1T',26155,1,'NOC2L truncating'); +INSERT INTO cosmic_mutation (COSMIC_MUTATION_ID,PROTEIN_CHANGE,ENTREZ_GENE_ID,COUNT,KEYWORD) VALUES(4010425,'Q197*',26155,1,'NOC2L truncating'); INSERT INTO gene_alias (ENTREZ_GENE_ID,GENE_ALIAS) VALUES (207,'AKT alias'); INSERT INTO gene_alias (ENTREZ_GENE_ID,GENE_ALIAS) VALUES (207,'AKT alias2'); INSERT INTO gene_alias (ENTREZ_GENE_ID,GENE_ALIAS) VALUES (675,'BRCA1 alias'); -INSERT INTO `reference_genome` VALUES (1, 'human', 'hg19', 'GRCh37', NULL, 'http://hgdownload.cse.ucsc.edu/goldenPath/hg19/bigZips', '2009-02-01 00:00:00'); -INSERT INTO `reference_genome` VALUES (2, 'human', 'hg38', 'GRCh38', NULL, 'http://hgdownload.cse.ucsc.edu/goldenPath/hg38/bigZips', '2013-12-01 00:00:00'); -INSERT INTO `reference_genome` VALUES (3, 'mouse', 'mm10', 'GRCm38', NULL, 'http://hgdownload.cse.ucsc.edu//goldenPath/mm10/bigZips', '2012-01-01 00:00:00'); - -INSERT INTO reference_genome_gene (ENTREZ_GENE_ID,CYTOBAND,EXONIC_LENGTH,START,END,CHR,REFERENCE_GENOME_ID) VALUES (207,'14q32.33',10838,105235686,105262088,14,1); -INSERT INTO reference_genome_gene (ENTREZ_GENE_ID,CYTOBAND,EXONIC_LENGTH,START,END,CHR,REFERENCE_GENOME_ID) VALUES (207,'14q32.33',10838,104769349,104795751,14,2); -INSERT INTO reference_genome_gene (ENTREZ_GENE_ID,CYTOBAND,EXONIC_LENGTH,START,END,CHR,REFERENCE_GENOME_ID) VALUES (208,'19q13.2',0, 40736224, 40791443,19,1); -INSERT INTO reference_genome_gene (ENTREZ_GENE_ID,CYTOBAND,EXONIC_LENGTH,START,END,CHR,REFERENCE_GENOME_ID) VALUES (208,'19q13.2',0, 40230317, 40285536,19,2); +INSERT INTO reference_genome_gene (ENTREZ_GENE_ID,CYTOBAND,EXONIC_LENGTH,START,END,CHR,REFERENCE_GENOME_ID) VALUES(207,'14q32.33',10838,105235686,105262088,14,1); +INSERT INTO reference_genome_gene (ENTREZ_GENE_ID,CYTOBAND,EXONIC_LENGTH,START,END,CHR,REFERENCE_GENOME_ID) VALUES(207,'14q32.33',11162,104769349,104795751,14,2); +INSERT INTO reference_genome_gene (ENTREZ_GENE_ID,CYTOBAND,EXONIC_LENGTH,START,END,CHR,REFERENCE_GENOME_ID) VALUES(208,'19q13.2',15035, 40736224, 40791443,19,1); +INSERT INTO reference_genome_gene (ENTREZ_GENE_ID,CYTOBAND,EXONIC_LENGTH,START,END,CHR,REFERENCE_GENOME_ID) VALUES(208,'19q13.2',15035, 40230317, 40285536,19,2); INSERT INTO genetic_profile (GENETIC_PROFILE_ID,STABLE_ID,CANCER_STUDY_ID,GENETIC_ALTERATION_TYPE,DATATYPE,NAME,DESCRIPTION,SHOW_PROFILE_IN_ANALYSIS_TAB) VALUES (2,'study_tcga_pub_gistic',1,'COPY_NUMBER_ALTERATION','DISCRETE','Putative copy-number alterations from GISTIC','Putative copy-number from GISTIC 2.0. Values: -2 = homozygous deletion; -1 = hemizygous deletion; 0 = neutral / no change; 1 = gain; 2 = high level amplification.',1); INSERT INTO genetic_profile (GENETIC_PROFILE_ID,STABLE_ID,CANCER_STUDY_ID,GENETIC_ALTERATION_TYPE,DATATYPE,NAME,DESCRIPTION,SHOW_PROFILE_IN_ANALYSIS_TAB) VALUES (3,'study_tcga_pub_mrna',1,'MRNA_EXPRESSION','Z-SCORE','mRNA expression (microarray)','Expression levels (Agilent microarray).',0); diff --git a/pom.xml b/pom.xml index 95a30a5c9d3..b0076df0955 100644 --- a/pom.xml +++ b/pom.xml @@ -595,7 +595,7 @@ org.apache.maven.plugins maven-javadoc-plugin - 2.9 + 3.0.1 org.umlgraph.doclet.UmlGraphDoc @@ -615,7 +615,7 @@ org.apache.maven.plugins maven-project-info-reports-plugin - 2.7 + 2.9 diff --git a/service/src/main/java/org/cbioportal/service/GeneService.java b/service/src/main/java/org/cbioportal/service/GeneService.java index 6b53f8dd2d6..e4578b6924e 100644 --- a/service/src/main/java/org/cbioportal/service/GeneService.java +++ b/service/src/main/java/org/cbioportal/service/GeneService.java @@ -14,6 +14,8 @@ List getAllGenes(String keyword, String alias, String projection, Integer BaseMeta getMetaGenes(String keyword, String alias); Gene getGene(String geneId) throws GeneNotFoundException; + + Gene getGeneByGeneticEntityId(Integer geneticEntityId) throws GeneNotFoundException; List getAliasesOfGene(String geneId) throws GeneNotFoundException; diff --git a/service/src/main/java/org/cbioportal/service/ReferenceGenomeGeneService.java b/service/src/main/java/org/cbioportal/service/ReferenceGenomeGeneService.java new file mode 100644 index 00000000000..1fb09b586c9 --- /dev/null +++ b/service/src/main/java/org/cbioportal/service/ReferenceGenomeGeneService.java @@ -0,0 +1,12 @@ +package org.cbioportal.service; + +import org.cbioportal.model.ReferenceGenomeGene; +import java.util.List; + +public interface ReferenceGenomeGeneService { + List fetchAllReferenceGenomeGenes(String genomeName); + List fetchGenesByGenomeName(List geneIds, String genomeName); + List fetchGenesByHugoGeneSymbolsAndGenomeName(List geneIds, String genomeName); + ReferenceGenomeGene getReferenceGenomeGene(Integer geneID, String genomeName); + ReferenceGenomeGene getReferenceGenomeGeneByEntityId(Integer entityId, String genomeName); +} diff --git a/service/src/main/java/org/cbioportal/service/impl/CoExpressionServiceImpl.java b/service/src/main/java/org/cbioportal/service/impl/CoExpressionServiceImpl.java index 4c2e001cfe8..7d10e5e2116 100644 --- a/service/src/main/java/org/cbioportal/service/impl/CoExpressionServiceImpl.java +++ b/service/src/main/java/org/cbioportal/service/impl/CoExpressionServiceImpl.java @@ -12,11 +12,13 @@ import org.cbioportal.model.MolecularData; import org.cbioportal.model.MolecularProfile; import org.cbioportal.model.Sample; +import org.cbioportal.model.ReferenceGenomeGene; import org.cbioportal.model.CoExpression.GeneticEntityType; import org.cbioportal.persistence.MolecularDataRepository; import org.cbioportal.persistence.SampleListRepository; import org.cbioportal.model.CoExpression; import org.cbioportal.service.GeneService; +import org.cbioportal.service.ReferenceGenomeGeneService; import org.cbioportal.service.GenesetDataService; import org.cbioportal.service.GenesetService; import org.cbioportal.service.MolecularDataService; @@ -51,6 +53,8 @@ public class CoExpressionServiceImpl implements CoExpressionService { @Autowired private GeneService geneService; @Autowired + private ReferenceGenomeGeneService referenceGenomeGeneService; + @Autowired private GenesetService genesetService; @Autowired private GenesetDataService genesetDataService; @@ -105,7 +109,7 @@ public List getCoExpressions(String geneticEntityId, CoExpression. .filter(p -> sharedSamples.contains(p.getSampleId())).collect(Collectors.toList()); computedCoExpressions = computeCoExpressionsFromMolecularData(finalmolecularDataListB, isMolecularProfileBOfGenesetType, - finalmolecularDataListA, geneticEntityId, threshold); + finalmolecularDataListA, geneticEntityId, threshold, molecularProfileIdB); return computedCoExpressions; } @@ -252,7 +256,7 @@ public List fetchCoExpressions(String geneticEntityId, .collect(Collectors.toList()); } computedCoExpressions = computeCoExpressionsFromMolecularData(molecularDataListB, isMolecularProfileBOfGenesetType, - molecularDataListA, geneticEntityId, threshold); + molecularDataListA, geneticEntityId, threshold, molecularProfileIdB); return computedCoExpressions; } @@ -319,7 +323,15 @@ private CoExpression computeCoExpressions(String entityId, List valuesA, coExpression.setGeneticEntityName(geneset.getName()); } else { Gene gene = geneService.getGene(entityId); - coExpression.setCytoband(gene.getCytoband()); + try { + MolecularProfile molecularProfile = molecularProfileService.getMolecularProfile(molecularProfileId); + ReferenceGenomeGene refGene = referenceGenomeGeneService.getReferenceGenomeGene( + gene.getEntrezGeneId(), + molecularProfile.getCancerStudy().getReferenceGenome()); + coExpression.setCytoband(refGene.getCytoband()); //value will be set by the frontend + } catch (NullPointerException e) { + coExpression.setCytoband("-"); + } coExpression.setGeneticEntityName(gene.getHugoGeneSymbol()); } diff --git a/service/src/main/java/org/cbioportal/service/impl/ExpressionEnrichmentServiceImpl.java b/service/src/main/java/org/cbioportal/service/impl/ExpressionEnrichmentServiceImpl.java index a2d51af37df..a623139eaa9 100644 --- a/service/src/main/java/org/cbioportal/service/impl/ExpressionEnrichmentServiceImpl.java +++ b/service/src/main/java/org/cbioportal/service/impl/ExpressionEnrichmentServiceImpl.java @@ -9,8 +9,10 @@ import org.cbioportal.model.GeneMolecularData; import org.cbioportal.model.MolecularProfile; import org.cbioportal.model.Sample; +import org.cbioportal.model.ReferenceGenomeGene; import org.cbioportal.service.ExpressionEnrichmentService; import org.cbioportal.service.GeneService; +import org.cbioportal.service.ReferenceGenomeGeneService; import org.cbioportal.service.MolecularDataService; import org.cbioportal.service.MolecularProfileService; import org.cbioportal.service.SampleService; @@ -20,7 +22,6 @@ import java.math.BigDecimal; import java.util.ArrayList; -import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.stream.Collectors; @@ -39,6 +40,8 @@ public class ExpressionEnrichmentServiceImpl implements ExpressionEnrichmentServ private MolecularDataService molecularDataService; @Autowired private GeneService geneService; + @Autowired + private ReferenceGenomeGeneService refGeneService; @Override public List getExpressionEnrichments(String molecularProfileId, List alteredIds, @@ -95,7 +98,13 @@ public List getExpressionEnrichments(String molecularProfi ExpressionEnrichment expressionEnrichment = new ExpressionEnrichment(); expressionEnrichment.setEntrezGeneId(entrezGeneId); Gene gene = genes.get(entrezGeneId).get(0); - expressionEnrichment.setCytoband(gene.getCytoband()); + try { + ReferenceGenomeGene refGene = refGeneService.getReferenceGenomeGene(gene.getEntrezGeneId(), + molecularProfileService.getMolecularProfile(molecularProfileId).getCancerStudy().getReferenceGenome()); + expressionEnrichment.setCytoband(refGene.getCytoband()); + } catch (NullPointerException e) { + expressionEnrichment.setCytoband("-"); + } expressionEnrichment.setHugoGeneSymbol(gene.getHugoGeneSymbol()); expressionEnrichment.setMeanExpressionInAlteredGroup(BigDecimal.valueOf(alteredMean)); expressionEnrichment.setMeanExpressionInUnalteredGroup(BigDecimal.valueOf(unalteredMean)); diff --git a/service/src/main/java/org/cbioportal/service/impl/GeneServiceImpl.java b/service/src/main/java/org/cbioportal/service/impl/GeneServiceImpl.java index 262e01584fa..b261c34d461 100644 --- a/service/src/main/java/org/cbioportal/service/impl/GeneServiceImpl.java +++ b/service/src/main/java/org/cbioportal/service/impl/GeneServiceImpl.java @@ -59,7 +59,7 @@ public List getAllGenes(String keyword, String alias, String projection, I } } - geneList.forEach(gene -> chromosomeCalculator.setChromosome(gene)); + //geneList.forEach(gene -> chromosomeCalculator.setChromosome(gene)); return geneList; } @@ -76,6 +76,15 @@ public BaseMeta getMetaGenes(String keyword, String alias) { } } + @Override + public Gene getGeneByGeneticEntityId(Integer geneticEntityId) throws GeneNotFoundException { + + Gene gene; + gene = geneRepository.getGeneByGeneticEntityId(geneticEntityId); + if (gene == null) throw new GeneNotFoundException(Integer.toString(geneticEntityId)); + return gene; + } + @Override public Gene getGene(String geneId) throws GeneNotFoundException { @@ -91,7 +100,7 @@ public Gene getGene(String geneId) throws GeneNotFoundException { throw new GeneNotFoundException(geneId); } - chromosomeCalculator.setChromosome(gene); + //chromosomeCalculator.setChromosome(gene); return gene; } @@ -119,7 +128,7 @@ public List fetchGenes(List geneIds, String geneIdType, String pro geneList = geneRepository.fetchGenesByHugoGeneSymbols(geneIds, projection); } - geneList.forEach(gene -> chromosomeCalculator.setChromosome(gene)); + //geneList.forEach(gene -> chromosomeCalculator.setChromosome(gene)); return geneList; } diff --git a/service/src/main/java/org/cbioportal/service/impl/MutationServiceImpl.java b/service/src/main/java/org/cbioportal/service/impl/MutationServiceImpl.java index a93fb796613..5970e89f753 100644 --- a/service/src/main/java/org/cbioportal/service/impl/MutationServiceImpl.java +++ b/service/src/main/java/org/cbioportal/service/impl/MutationServiceImpl.java @@ -42,7 +42,7 @@ public List getMutationsInMolecularProfileBySampleListId(String molecu List mutationList = mutationRepository.getMutationsInMolecularProfileBySampleListId(molecularProfileId, sampleListId, entrezGeneIds, snpOnly, projection, pageSize, pageNumber, sortBy, direction); - mutationList.forEach(mutation -> chromosomeCalculator.setChromosome(mutation.getGene())); + //mutationList.forEach(mutation -> chromosomeCalculator.setChromosome(mutation.getGene())); return mutationList; } @@ -66,7 +66,7 @@ public List getMutationsInMultipleMolecularProfiles(List molec List mutationList = mutationRepository.getMutationsInMultipleMolecularProfiles(molecularProfileIds, sampleIds, entrezGeneIds, projection, pageSize, pageNumber, sortBy, direction); - mutationList.forEach(mutation -> chromosomeCalculator.setChromosome(mutation.getGene())); + //mutationList.forEach(mutation -> chromosomeCalculator.setChromosome(mutation.getGene())); return mutationList; } @@ -91,7 +91,7 @@ public List fetchMutationsInMolecularProfile(String molecularProfileId List mutationList = mutationRepository.fetchMutationsInMolecularProfile(molecularProfileId, sampleIds, entrezGeneIds, snpOnly, projection, pageSize, pageNumber, sortBy, direction); - mutationList.forEach(mutation -> chromosomeCalculator.setChromosome(mutation.getGene())); + //mutationList.forEach(mutation -> chromosomeCalculator.setChromosome(mutation.getGene())); return mutationList; } diff --git a/service/src/main/java/org/cbioportal/service/impl/ReferenceGenomeGeneServiceImpl.java b/service/src/main/java/org/cbioportal/service/impl/ReferenceGenomeGeneServiceImpl.java new file mode 100644 index 00000000000..4c12f42c659 --- /dev/null +++ b/service/src/main/java/org/cbioportal/service/impl/ReferenceGenomeGeneServiceImpl.java @@ -0,0 +1,46 @@ +package org.cbioportal.service.impl; + + +import org.cbioportal.model.ReferenceGenomeGene; +import org.cbioportal.persistence.ReferenceGenomeGeneRepository; +import org.cbioportal.service.ReferenceGenomeGeneService; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; + +import java.util.List; +import java.util.stream.Collectors; + +@Service +public class ReferenceGenomeGeneServiceImpl implements ReferenceGenomeGeneService { + + @Autowired + private ReferenceGenomeGeneRepository referenceGenomeGeneRepository; + + @Override + public List fetchAllReferenceGenomeGenes(String genomeName) { + + return referenceGenomeGeneRepository.getAllGenesByGenomeName(genomeName); + } + + @Override + public List fetchGenesByGenomeName(List geneIds, String genomeName) { + return referenceGenomeGeneRepository.getGenesByGenomeName(geneIds, genomeName); + } + + @Override + public List fetchGenesByHugoGeneSymbolsAndGenomeName(List geneIds, String genomeName) { + return referenceGenomeGeneRepository.getGenesByHugoGeneSymbolsAndGenomeName(geneIds, genomeName); + } + + @Override + public ReferenceGenomeGene getReferenceGenomeGene(Integer geneId, String genomeName) { + + return referenceGenomeGeneRepository.getReferenceGenomeGene(geneId, genomeName); + } + + @Override + public ReferenceGenomeGene getReferenceGenomeGeneByEntityId(Integer entityId, String genomeName) { + + return referenceGenomeGeneRepository.getReferenceGenomeGeneByEntityId(entityId, genomeName); + } +} diff --git a/service/src/main/java/org/cbioportal/service/util/AlterationEnrichmentUtil.java b/service/src/main/java/org/cbioportal/service/util/AlterationEnrichmentUtil.java index 5ab5a052ffc..2ef8900f151 100644 --- a/service/src/main/java/org/cbioportal/service/util/AlterationEnrichmentUtil.java +++ b/service/src/main/java/org/cbioportal/service/util/AlterationEnrichmentUtil.java @@ -62,7 +62,7 @@ public List createAlterationEnrichments( AlterationEnrichment alterationEnrichment = new AlterationEnrichment(); alterationEnrichment.setEntrezGeneId(gene.getEntrezGeneId()); alterationEnrichment.setHugoGeneSymbol(gene.getHugoGeneSymbol()); - alterationEnrichment.setCytoband(gene.getCytoband()); + //alterationEnrichment.setCytoband(gene.getCytoband()); List counts = groups .stream() diff --git a/service/src/main/java/org/cbioportal/service/util/ChromosomeCalculator.java b/service/src/main/java/org/cbioportal/service/util/ChromosomeCalculator.java index 2dc2dad739e..0a7cff882dc 100644 --- a/service/src/main/java/org/cbioportal/service/util/ChromosomeCalculator.java +++ b/service/src/main/java/org/cbioportal/service/util/ChromosomeCalculator.java @@ -1,6 +1,7 @@ package org.cbioportal.service.util; import org.cbioportal.model.Gene; +import org.cbioportal.model.ReferenceGenomeGene; import org.springframework.stereotype.Component; import java.util.regex.Matcher; @@ -30,11 +31,4 @@ private String getChromosome(String cytoband) { return null; } - - public void setChromosome(Gene gene) { - - if (gene != null) { - gene.setChromosome(getChromosome(gene.getCytoband())); - } - } } diff --git a/service/src/test/java/org/cbioportal/service/impl/BaseServiceImplTest.java b/service/src/test/java/org/cbioportal/service/impl/BaseServiceImplTest.java index 2cc67896511..110f6319170 100644 --- a/service/src/test/java/org/cbioportal/service/impl/BaseServiceImplTest.java +++ b/service/src/test/java/org/cbioportal/service/impl/BaseServiceImplTest.java @@ -24,6 +24,13 @@ public class BaseServiceImplTest { public static final String CLINICAL_DATA_TYPE = "clinical_data_type"; public static final Integer ENTREZ_GENE_ID_1 = 1; public static final Integer ENTREZ_GENE_ID_2 = 2; + public static final Integer ENTREZ_GENE_ID_3 = 3; + public static final Integer ENTREZ_GENE_ID_4 = 4; + public static final Integer GENETIC_ENTITY_ID_1 = 1; + public static final Integer GENETIC_ENTITY_ID_2 = 2; + public static final Integer GENETIC_ENTITY_ID_3 = 3; + public static final Integer GENETIC_ENTITY_ID_4 = 4; + public static final Integer REFERENCE_GENOME_ID = 1; public static final String GENESET_ID1 = "geneset_id1"; public static final String GENESET_ID2 = "geneset_id2"; public static final String HUGO_GENE_SYMBOL = "hugo_gene_symbol"; diff --git a/service/src/test/java/org/cbioportal/service/impl/CoExpressionServiceImplTest.java b/service/src/test/java/org/cbioportal/service/impl/CoExpressionServiceImplTest.java index 32877818221..7d1d7a9742f 100644 --- a/service/src/test/java/org/cbioportal/service/impl/CoExpressionServiceImplTest.java +++ b/service/src/test/java/org/cbioportal/service/impl/CoExpressionServiceImplTest.java @@ -125,13 +125,13 @@ public void fetchGeneCoExpressions() throws Exception { CoExpression coExpression1 = result.get(0); Assert.assertEquals("2", coExpression1.getGeneticEntityId()); Assert.assertEquals("HUGO2", coExpression1.getGeneticEntityName()); - Assert.assertEquals("CYTOBAND2", coExpression1.getCytoband()); + Assert.assertEquals("-", coExpression1.getCytoband()); Assert.assertEquals(new BigDecimal("0.5"), coExpression1.getSpearmansCorrelation()); Assert.assertEquals(new BigDecimal("0.6666666666666667"), coExpression1.getpValue()); CoExpression coExpression2 = result.get(1); Assert.assertEquals("3", coExpression2.getGeneticEntityId()); Assert.assertEquals("HUGO3", coExpression2.getGeneticEntityName()); - Assert.assertEquals("CYTOBAND3", coExpression2.getCytoband()); + Assert.assertEquals("-", coExpression2.getCytoband()); Assert.assertEquals(new BigDecimal("0.8660254037844386"), coExpression2.getSpearmansCorrelation()); Assert.assertEquals(new BigDecimal("0.3333333333333333"), coExpression2.getpValue()); } @@ -287,17 +287,17 @@ private List createGenes() { Gene gene1 = new Gene(); gene1.setEntrezGeneId(2); gene1.setHugoGeneSymbol("HUGO2"); - gene1.setCytoband("CYTOBAND2"); + gene1.setGeneticEntityId(GENETIC_ENTITY_ID_2); genes.add(gene1); Gene gene2 = new Gene(); gene2.setEntrezGeneId(3); gene2.setHugoGeneSymbol("HUGO3"); - gene2.setCytoband("CYTOBAND3"); + gene2.setGeneticEntityId(GENETIC_ENTITY_ID_3); genes.add(gene2); Gene gene3 = new Gene(); gene3.setEntrezGeneId(4); gene3.setHugoGeneSymbol("HUGO4"); - gene3.setCytoband("CYTOBAND4"); + gene3.setGeneticEntityId(GENETIC_ENTITY_ID_4); genes.add(gene3); return genes; } diff --git a/service/src/test/java/org/cbioportal/service/impl/DiscreteCopyNumberServiceImplTest.java b/service/src/test/java/org/cbioportal/service/impl/DiscreteCopyNumberServiceImplTest.java index 2a33d136730..47ff2a3c01e 100644 --- a/service/src/test/java/org/cbioportal/service/impl/DiscreteCopyNumberServiceImplTest.java +++ b/service/src/test/java/org/cbioportal/service/impl/DiscreteCopyNumberServiceImplTest.java @@ -1,11 +1,6 @@ package org.cbioportal.service.impl; -import org.cbioportal.model.CopyNumberCount; -import org.cbioportal.model.CopyNumberCountByGene; -import org.cbioportal.model.DiscreteCopyNumberData; -import org.cbioportal.model.Gene; -import org.cbioportal.model.GeneMolecularData; -import org.cbioportal.model.MolecularProfile; +import org.cbioportal.model.*; import org.cbioportal.model.meta.BaseMeta; import org.cbioportal.persistence.DiscreteCopyNumberRepository; import org.cbioportal.service.MolecularDataService; diff --git a/service/src/test/java/org/cbioportal/service/impl/ExpressionEnrichmentServiceImplTest.java b/service/src/test/java/org/cbioportal/service/impl/ExpressionEnrichmentServiceImplTest.java index 36bd66d7755..0b1cf137318 100644 --- a/service/src/test/java/org/cbioportal/service/impl/ExpressionEnrichmentServiceImplTest.java +++ b/service/src/test/java/org/cbioportal/service/impl/ExpressionEnrichmentServiceImplTest.java @@ -83,12 +83,12 @@ public void getExpressionEnrichments() throws Exception { Gene gene1 = new Gene(); gene1.setEntrezGeneId(2); gene1.setHugoGeneSymbol("HUGO2"); - gene1.setCytoband("CYTOBAND2"); + gene1.setGeneticEntityId(GENETIC_ENTITY_ID_2); genes.add(gene1); Gene gene2 = new Gene(); gene2.setEntrezGeneId(3); gene2.setHugoGeneSymbol("HUGO3"); - gene2.setCytoband("CYTOBAND3"); + gene2.setGeneticEntityId(GENETIC_ENTITY_ID_3); genes.add(gene2); Mockito.when(geneService.fetchGenes(Arrays.asList("2", "3"), "ENTREZ_GENE_ID", "SUMMARY")).thenReturn(genes); @@ -100,7 +100,7 @@ public void getExpressionEnrichments() throws Exception { ExpressionEnrichment expressionEnrichment1 = result.get(0); Assert.assertEquals((Integer) 2, expressionEnrichment1.getEntrezGeneId()); Assert.assertEquals("HUGO2", expressionEnrichment1.getHugoGeneSymbol()); - Assert.assertEquals("CYTOBAND2", expressionEnrichment1.getCytoband()); + Assert.assertEquals("-", expressionEnrichment1.getCytoband()); Assert.assertEquals(new BigDecimal("2.5"), expressionEnrichment1.getMeanExpressionInAlteredGroup()); Assert.assertEquals(new BigDecimal("2.55"), expressionEnrichment1.getMeanExpressionInUnalteredGroup()); Assert.assertEquals(new BigDecimal("0.7071067811865476"), @@ -111,7 +111,7 @@ public void getExpressionEnrichments() throws Exception { ExpressionEnrichment expressionEnrichment2 = result.get(1); Assert.assertEquals((Integer) 3, expressionEnrichment2.getEntrezGeneId()); Assert.assertEquals("HUGO3", expressionEnrichment2.getHugoGeneSymbol()); - Assert.assertEquals("CYTOBAND3", expressionEnrichment2.getCytoband()); + Assert.assertEquals("-", expressionEnrichment2.getCytoband()); Assert.assertEquals(new BigDecimal("3.05"), expressionEnrichment2.getMeanExpressionInAlteredGroup()); Assert.assertEquals(new BigDecimal("2.65"), expressionEnrichment2.getMeanExpressionInUnalteredGroup()); Assert.assertEquals(new BigDecimal("2.7577164466275352"), diff --git a/service/src/test/java/org/cbioportal/service/impl/GeneServiceImplTest.java b/service/src/test/java/org/cbioportal/service/impl/GeneServiceImplTest.java index f5adbe49579..100f7d2334a 100644 --- a/service/src/test/java/org/cbioportal/service/impl/GeneServiceImplTest.java +++ b/service/src/test/java/org/cbioportal/service/impl/GeneServiceImplTest.java @@ -37,15 +37,10 @@ public void getAllGenes() throws Exception { Mockito.when(geneRepository.getAllGenes(KEYWORD, ALIAS, PROJECTION, PAGE_SIZE, PAGE_NUMBER, SORT, DIRECTION)) .thenReturn(expectedGeneList); - Mockito.doAnswer(invocationOnMock -> { - ((Gene) invocationOnMock.getArguments()[0]).setChromosome("19"); - return null; - }).when(chromosomeCalculator).setChromosome(gene); List result = geneService.getAllGenes(KEYWORD, ALIAS, PROJECTION, PAGE_SIZE, PAGE_NUMBER, SORT, DIRECTION); Assert.assertEquals(expectedGeneList, result); - Assert.assertEquals("19", result.get(0).getChromosome()); } @Test @@ -72,15 +67,10 @@ public void getGeneByEntrezGeneId() throws Exception { Gene expectedGene = new Gene(); Mockito.when(geneRepository.getGeneByEntrezGeneId(ENTREZ_GENE_ID_1)).thenReturn(expectedGene); - Mockito.doAnswer(invocationOnMock -> { - ((Gene) invocationOnMock.getArguments()[0]).setChromosome("X"); - return null; - }).when(chromosomeCalculator).setChromosome(expectedGene); Gene result = geneService.getGene(ENTREZ_GENE_ID_1.toString()); Assert.assertEquals(expectedGene, result); - Assert.assertEquals("X", result.getChromosome()); } @Test(expected = GeneNotFoundException.class) @@ -96,15 +86,10 @@ public void getGeneByHugoGeneSymbol() throws Exception { Gene expectedGene = new Gene(); Mockito.when(geneRepository.getGeneByHugoGeneSymbol(HUGO_GENE_SYMBOL)).thenReturn(expectedGene); - Mockito.doAnswer(invocationOnMock -> { - ((Gene) invocationOnMock.getArguments()[0]).setChromosome("Y"); - return null; - }).when(chromosomeCalculator).setChromosome(expectedGene); Gene result = geneService.getGene(HUGO_GENE_SYMBOL); Assert.assertEquals(expectedGene, result); - Assert.assertEquals("Y", result.getChromosome()); } @Test @@ -157,10 +142,6 @@ public void fetchGenes() throws Exception { List geneIds = new ArrayList<>(); geneIds.add(HUGO_GENE_SYMBOL); - Mockito.doAnswer(invocationOnMock -> { - ((Gene) invocationOnMock.getArguments()[0]).setChromosome("12"); - return null; - }).when(chromosomeCalculator).setChromosome(gene); Mockito.when(geneRepository.fetchGenesByHugoGeneSymbols(Arrays.asList(HUGO_GENE_SYMBOL), PROJECTION)) .thenReturn(expectedGeneList); @@ -169,7 +150,7 @@ public void fetchGenes() throws Exception { Assert.assertEquals(1, result.size()); Assert.assertEquals(gene, result.get(0)); - Assert.assertEquals("12", result.get(0).getChromosome()); + } @Test diff --git a/service/src/test/java/org/cbioportal/service/impl/MutationServiceImplTest.java b/service/src/test/java/org/cbioportal/service/impl/MutationServiceImplTest.java index 40bca02f20e..b5647961bb6 100644 --- a/service/src/test/java/org/cbioportal/service/impl/MutationServiceImplTest.java +++ b/service/src/test/java/org/cbioportal/service/impl/MutationServiceImplTest.java @@ -47,16 +47,17 @@ public void getMutationsInMolecularProfileBySampleListId() throws Exception { Mockito.when(mutationRepository.getMutationsInMolecularProfileBySampleListId(MOLECULAR_PROFILE_ID, SAMPLE_LIST_ID, Arrays.asList(ENTREZ_GENE_ID_1), null, PROJECTION, PAGE_SIZE, PAGE_NUMBER, SORT, DIRECTION)) .thenReturn(expectedMutationList); - Mockito.doAnswer(invocationOnMock -> { + //TODO: varify chromosome from reference genome gene + /*Mockito.doAnswer(invocationOnMock -> { ((Gene) invocationOnMock.getArguments()[0]).setChromosome("19"); return null; - }).when(chromosomeCalculator).setChromosome(gene); + }).when(chromosomeCalculator).setChromosome(gene);*/ List result = mutationService.getMutationsInMolecularProfileBySampleListId(MOLECULAR_PROFILE_ID, SAMPLE_LIST_ID, Arrays.asList(ENTREZ_GENE_ID_1), null, PROJECTION, PAGE_SIZE, PAGE_NUMBER, SORT, DIRECTION); Assert.assertEquals(expectedMutationList, result); - Assert.assertEquals("19", result.get(0).getGene().getChromosome()); + //Assert.assertEquals("19", result.get(0).getGene().getChromosome()); } @Test(expected = MolecularProfileNotFoundException.class) @@ -105,17 +106,12 @@ public void getMutationsInMultipleMolecularProfiles() throws Exception { Mockito.when(mutationRepository.getMutationsInMultipleMolecularProfiles(Arrays.asList(MOLECULAR_PROFILE_ID), Arrays.asList(SAMPLE_ID1), Arrays.asList(ENTREZ_GENE_ID_1), PROJECTION, PAGE_SIZE, PAGE_NUMBER, SORT, DIRECTION)).thenReturn(expectedMutationList); - Mockito.doAnswer(invocationOnMock -> { - ((Gene) invocationOnMock.getArguments()[0]).setChromosome("19"); - return null; - }).when(chromosomeCalculator).setChromosome(gene); List result = mutationService.getMutationsInMultipleMolecularProfiles( Arrays.asList(MOLECULAR_PROFILE_ID), Arrays.asList(SAMPLE_ID1), Arrays.asList(ENTREZ_GENE_ID_1), PROJECTION, PAGE_SIZE, PAGE_NUMBER, SORT, DIRECTION); Assert.assertEquals(expectedMutationList, result); - Assert.assertEquals("19", result.get(0).getGene().getChromosome()); } @Test @@ -146,17 +142,12 @@ public void fetchMutationsInMolecularProfile() throws Exception { Mockito.when(mutationRepository.fetchMutationsInMolecularProfile(MOLECULAR_PROFILE_ID, Arrays.asList(SAMPLE_ID1), Arrays.asList(ENTREZ_GENE_ID_1), null, PROJECTION, PAGE_SIZE, PAGE_NUMBER, SORT, DIRECTION)).thenReturn(expectedMutationList); - Mockito.doAnswer(invocationOnMock -> { - ((Gene) invocationOnMock.getArguments()[0]).setChromosome("19"); - return null; - }).when(chromosomeCalculator).setChromosome(gene); List result = mutationService.fetchMutationsInMolecularProfile(MOLECULAR_PROFILE_ID, Arrays.asList(SAMPLE_ID1), Arrays.asList(ENTREZ_GENE_ID_1), null, PROJECTION, PAGE_SIZE, PAGE_NUMBER, SORT, DIRECTION); Assert.assertEquals(expectedMutationList, result); - Assert.assertEquals("19", result.get(0).getGene().getChromosome()); } @Test(expected = MolecularProfileNotFoundException.class) diff --git a/service/src/test/java/org/cbioportal/service/impl/ReferenceGenomeGeneServiceImplTest.java b/service/src/test/java/org/cbioportal/service/impl/ReferenceGenomeGeneServiceImplTest.java new file mode 100644 index 00000000000..b98ab96f29a --- /dev/null +++ b/service/src/test/java/org/cbioportal/service/impl/ReferenceGenomeGeneServiceImplTest.java @@ -0,0 +1,104 @@ +package org.cbioportal.service.impl; + +import org.cbioportal.model.Gene; +import org.cbioportal.model.ReferenceGenome; +import org.cbioportal.model.ReferenceGenomeGene; +import org.cbioportal.model.meta.BaseMeta; +import org.cbioportal.persistence.ReferenceGenomeGeneRepository; +import org.cbioportal.service.exception.GeneNotFoundException; +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.runners.MockitoJUnitRunner; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +@RunWith(MockitoJUnitRunner.class) +public class ReferenceGenomeGeneServiceImplTest extends BaseServiceImplTest { + + @InjectMocks + private ReferenceGenomeGeneServiceImpl geneService; + + @Mock + private ReferenceGenomeGeneRepository geneRepository; + + @Test + public void getAllGenesByGenomeName() throws Exception { + + List expectedGeneList = new ArrayList<>(); + ReferenceGenomeGene gene = new ReferenceGenomeGene(); + gene.setEntrezGeneId(ENTREZ_GENE_ID_2); + gene.setReferenceGenomeId(REFERENCE_GENOME_ID); + expectedGeneList.add(gene); + + Mockito.when(geneRepository.getAllGenesByGenomeName(ReferenceGenome.HOMO_SAPIENS_DEFAULT_GENOME_NAME)) + .thenReturn(expectedGeneList); + + List result = geneService.fetchAllReferenceGenomeGenes(ReferenceGenome.HOMO_SAPIENS_DEFAULT_GENOME_NAME); + + Assert.assertEquals(expectedGeneList, result); + } + + @Test + public void getGenesByGenomeName() throws Exception { + + List expectedGeneList = new ArrayList<>(); + ReferenceGenomeGene gene1 = new ReferenceGenomeGene(); + gene1.setEntrezGeneId(ENTREZ_GENE_ID_1); + gene1.setReferenceGenomeId(REFERENCE_GENOME_ID); + expectedGeneList.add(gene1); + ReferenceGenomeGene gene2 = new ReferenceGenomeGene(); + gene2.setEntrezGeneId(ENTREZ_GENE_ID_2); + gene2.setReferenceGenomeId(REFERENCE_GENOME_ID); + expectedGeneList.add(gene2); + List geneIds = new ArrayList<>(); + geneIds.add(ENTREZ_GENE_ID_1); + geneIds.add(ENTREZ_GENE_ID_2); + Mockito.when(geneRepository.getGenesByGenomeName(geneIds,ReferenceGenome.HOMO_SAPIENS_DEFAULT_GENOME_NAME)) + .thenReturn(expectedGeneList); + + List result = geneService.fetchGenesByGenomeName(geneIds,ReferenceGenome.HOMO_SAPIENS_DEFAULT_GENOME_NAME); + + Assert.assertEquals(expectedGeneList, result); + } + + @Test + public void getReferenceGenomeGene() throws Exception { + + Gene gene = new Gene(); + gene.setEntrezGeneId(ENTREZ_GENE_ID_1); + gene.setHugoGeneSymbol("HUGO2"); + gene.setGeneticEntityId(GENETIC_ENTITY_ID_1); + ReferenceGenomeGene expectedGene = new ReferenceGenomeGene(); + expectedGene.setEntrezGeneId(ENTREZ_GENE_ID_1); + expectedGene.setReferenceGenomeId(REFERENCE_GENOME_ID); + Mockito.when(geneRepository.getReferenceGenomeGene(gene.getEntrezGeneId(), + ReferenceGenome.HOMO_SAPIENS_DEFAULT_GENOME_NAME)).thenReturn(expectedGene); + + ReferenceGenomeGene result = geneService.getReferenceGenomeGene(gene.getEntrezGeneId(), + ReferenceGenome.HOMO_SAPIENS_DEFAULT_GENOME_NAME); + + Assert.assertEquals(expectedGene, result); + } + + @Test + public void getReferenceGenomeGeneByEntityId() throws Exception { + + ReferenceGenomeGene expectedGene = new ReferenceGenomeGene(); + expectedGene.setEntrezGeneId(ENTREZ_GENE_ID_1); + expectedGene.setReferenceGenomeId(REFERENCE_GENOME_ID); + Mockito.when(geneRepository.getReferenceGenomeGeneByEntityId(GENETIC_ENTITY_ID_1, + ReferenceGenome.HOMO_SAPIENS_DEFAULT_GENOME_NAME)).thenReturn(expectedGene); + + ReferenceGenomeGene result = geneService.getReferenceGenomeGeneByEntityId(GENETIC_ENTITY_ID_1, + ReferenceGenome.HOMO_SAPIENS_DEFAULT_GENOME_NAME); + + Assert.assertEquals(expectedGene, result); + } + +} diff --git a/service/src/test/java/org/cbioportal/service/util/AlterationEnrichmentUtilTest.java b/service/src/test/java/org/cbioportal/service/util/AlterationEnrichmentUtilTest.java index 13b1d5b33eb..b7d6a24b248 100644 --- a/service/src/test/java/org/cbioportal/service/util/AlterationEnrichmentUtilTest.java +++ b/service/src/test/java/org/cbioportal/service/util/AlterationEnrichmentUtilTest.java @@ -12,6 +12,7 @@ import org.cbioportal.model.Gene; import org.cbioportal.model.MolecularProfileCaseIdentifier; import org.cbioportal.service.GeneService; +import org.cbioportal.service.impl.BaseServiceImplTest; import org.junit.Assert; import org.junit.Test; import org.junit.runner.RunWith; @@ -80,12 +81,12 @@ public void createAlterationEnrichments() throws Exception { Gene gene1 = new Gene(); gene1.setEntrezGeneId(2); gene1.setHugoGeneSymbol("HUGO2"); - gene1.setCytoband("CYTOBAND2"); + gene1.setGeneticEntityId(2); genes.add(gene1); Gene gene2 = new Gene(); gene2.setEntrezGeneId(3); gene2.setHugoGeneSymbol("HUGO3"); - gene2.setCytoband("CYTOBAND3"); + gene2.setGeneticEntityId(3); genes.add(gene2); Mockito.when(geneService.fetchGenes(Arrays.asList("2", "3"), "ENTREZ_GENE_ID", "SUMMARY")).thenReturn(genes); @@ -106,7 +107,7 @@ public void createAlterationEnrichments() throws Exception { AlterationEnrichment alterationEnrichment1 = result.get(0); Assert.assertEquals((Integer) 2, alterationEnrichment1.getEntrezGeneId()); Assert.assertEquals("HUGO2", alterationEnrichment1.getHugoGeneSymbol()); - Assert.assertEquals("CYTOBAND2", alterationEnrichment1.getCytoband()); + Assert.assertEquals(null, alterationEnrichment1.getCytoband()); Assert.assertEquals(2, alterationEnrichment1.getCounts().size()); Assert.assertEquals(new BigDecimal("1.0"), alterationEnrichment1.getpValue()); alterationEnrichment1.getCounts().forEach(countSummary -> { @@ -120,7 +121,7 @@ public void createAlterationEnrichments() throws Exception { AlterationEnrichment alterationEnrichment2 = result.get(1); Assert.assertEquals((Integer) 3, alterationEnrichment2.getEntrezGeneId()); Assert.assertEquals("HUGO3", alterationEnrichment2.getHugoGeneSymbol()); - Assert.assertEquals("CYTOBAND3", alterationEnrichment2.getCytoband()); + Assert.assertEquals(null, alterationEnrichment2.getCytoband()); Assert.assertEquals(2, alterationEnrichment2.getCounts().size()); Assert.assertEquals(new BigDecimal("0.3"), alterationEnrichment2.getpValue()); alterationEnrichment2.getCounts().forEach(countSummary -> { diff --git a/web/src/main/java/org/cbioportal/web/ReferenceGenomeGeneController.java b/web/src/main/java/org/cbioportal/web/ReferenceGenomeGeneController.java new file mode 100644 index 00000000000..6a45b92cc9e --- /dev/null +++ b/web/src/main/java/org/cbioportal/web/ReferenceGenomeGeneController.java @@ -0,0 +1,98 @@ +package org.cbioportal.web; + +import io.swagger.annotations.Api; +import io.swagger.annotations.ApiOperation; +import io.swagger.annotations.ApiParam; +import org.cbioportal.model.Gene; +import org.cbioportal.model.ReferenceGenomeGene; +import org.cbioportal.service.ReferenceGenomeGeneService; +import org.cbioportal.service.exception.GeneNotFoundException; +import org.cbioportal.web.config.annotation.PublicApi; +import org.cbioportal.web.parameter.Direction; +import org.cbioportal.web.parameter.GeneIdType; +import org.cbioportal.web.parameter.HeaderKeyConstants; +import org.cbioportal.web.parameter.PagingConstants; +import org.cbioportal.web.parameter.Projection; +import org.cbioportal.web.parameter.sort.GeneSortBy; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.http.HttpHeaders; +import org.springframework.http.HttpStatus; +import org.springframework.http.MediaType; +import org.springframework.http.ResponseEntity; +import org.springframework.validation.annotation.Validated; +import org.springframework.web.bind.annotation.PathVariable; +import org.springframework.web.bind.annotation.RequestBody; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestMethod; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RestController; + +import javax.validation.constraints.Max; +import javax.validation.constraints.Min; +import javax.validation.constraints.Size; +import java.util.List; +import java.util.Arrays; +import java.util.stream.Collectors; + +@PublicApi +@RestController +@Validated +@Api(tags = "Reference Genome Genes", description = " ") +public class ReferenceGenomeGeneController { + + private static final int GENE_MAX_PAGE_SIZE = 100000; + private static final String GENE_DEFAULT_PAGE_SIZE = "100000"; + + @Autowired + private ReferenceGenomeGeneService referenceGenomeGeneService; + + @RequestMapping(value = "/reference-genome-genes/{genomeName}", method = RequestMethod.GET, produces = MediaType.APPLICATION_JSON_VALUE) + @ApiOperation("Get all reference genes") + public ResponseEntity> getAllRefGenes( + @ApiParam(required = true, value = "Name of Reference Genome hg19") + @PathVariable String genomeName) { + + return new ResponseEntity<>( + referenceGenomeGeneService.fetchAllReferenceGenomeGenes(genomeName), HttpStatus.OK); + } + + @RequestMapping(value = "/reference-genome-genes/{genomeName}/{geneId}", method = RequestMethod.GET, produces = MediaType.APPLICATION_JSON_VALUE) + @ApiOperation("Get a gene of a reference genome of interest") + public ResponseEntity getGene( + @ApiParam(required = true, value = "Name of Reference Genome hg19") + @PathVariable String genomeName, + @ApiParam(required = true, value = "Entrez Gene ID 207") + @PathVariable Integer geneId) throws GeneNotFoundException { + + return new ResponseEntity<>(referenceGenomeGeneService.getReferenceGenomeGene(geneId, genomeName), HttpStatus.OK); + } + + @RequestMapping(value = "/reference-genome-genes/{genomeName}/fetch", method = RequestMethod.POST, consumes = MediaType.APPLICATION_JSON_VALUE, + produces = MediaType.APPLICATION_JSON_VALUE) + @ApiOperation("Fetch genes of reference genome of interest") + public ResponseEntity> fetchGenes( + @ApiParam(required = true, value = "Name of Reference Genome hg19") + @PathVariable String genomeName, + @ApiParam(required = true, value = "List of Entrez Gene IDs") + @Size(min = 1, max = GENE_MAX_PAGE_SIZE) + @RequestBody List geneIds) { + + if (isInteger(geneIds.get(0))) { + List newIds = geneIds.stream().map(s -> Integer.parseInt(s)).collect(Collectors.toList()); + return new ResponseEntity<>(referenceGenomeGeneService.fetchGenesByGenomeName(newIds, genomeName), HttpStatus.OK); + } else { + return new ResponseEntity<>(referenceGenomeGeneService.fetchGenesByHugoGeneSymbolsAndGenomeName( + geneIds, genomeName), HttpStatus.OK); + } + } + + private boolean isInteger(String s) { + try { + Integer.parseInt(s); + } catch(NumberFormatException | NullPointerException e) { + return false; + } + return true; + } +} + diff --git a/web/src/main/java/org/cbioportal/web/util/ClinicalDataEnrichmentUtil.java b/web/src/main/java/org/cbioportal/web/util/ClinicalDataEnrichmentUtil.java index 8e6083d2836..844378d3634 100644 --- a/web/src/main/java/org/cbioportal/web/util/ClinicalDataEnrichmentUtil.java +++ b/web/src/main/java/org/cbioportal/web/util/ClinicalDataEnrichmentUtil.java @@ -19,7 +19,7 @@ import org.cbioportal.service.ClinicalDataService; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; - + import com.datumbox.framework.common.dataobjects.AssociativeArray; import com.datumbox.framework.common.dataobjects.DataTable2D; import com.datumbox.framework.common.dataobjects.FlatDataCollection; diff --git a/web/src/test/java/org/cbioportal/web/DiscreteCopyNumberControllerTest.java b/web/src/test/java/org/cbioportal/web/DiscreteCopyNumberControllerTest.java index a5b69c3b656..995ca2a1b59 100644 --- a/web/src/test/java/org/cbioportal/web/DiscreteCopyNumberControllerTest.java +++ b/web/src/test/java/org/cbioportal/web/DiscreteCopyNumberControllerTest.java @@ -4,6 +4,7 @@ import org.cbioportal.model.CopyNumberCount; import org.cbioportal.model.DiscreteCopyNumberData; import org.cbioportal.model.Gene; +import org.cbioportal.model.ReferenceGenomeGene; import org.cbioportal.model.meta.BaseMeta; import org.cbioportal.service.DiscreteCopyNumberService; import org.cbioportal.web.parameter.CopyNumberCountIdentifier; @@ -136,8 +137,6 @@ public void getDiscreteCopyNumbersInMolecularProfileBySampleListIdDetailedProjec .andExpect(MockMvcResultMatchers.jsonPath("$[0].gene.entrezGeneId").value(TEST_ENTREZ_GENE_ID_1)) .andExpect(MockMvcResultMatchers.jsonPath("$[0].gene.hugoGeneSymbol").value(TEST_HUGO_GENE_SYMBOL_1)) .andExpect(MockMvcResultMatchers.jsonPath("$[0].gene.type").value(TEST_TYPE_1)) - .andExpect(MockMvcResultMatchers.jsonPath("$[0].gene.cytoband").value(TEST_CYTOBAND_1)) - .andExpect(MockMvcResultMatchers.jsonPath("$[0].gene.chromosome").value(TEST_CHROMOSOME_1)) .andExpect(MockMvcResultMatchers.jsonPath("$[1].molecularProfileId") .value(TEST_MOLECULAR_PROFILE_STABLE_ID_2)) .andExpect(MockMvcResultMatchers.jsonPath("$[1].sampleId").value(TEST_SAMPLE_STABLE_ID_2)) @@ -145,9 +144,7 @@ public void getDiscreteCopyNumbersInMolecularProfileBySampleListIdDetailedProjec .andExpect(MockMvcResultMatchers.jsonPath("$[1].alteration").value(TEST_ALTERATION_2)) .andExpect(MockMvcResultMatchers.jsonPath("$[1].gene.entrezGeneId").value(TEST_ENTREZ_GENE_ID_2)) .andExpect(MockMvcResultMatchers.jsonPath("$[1].gene.hugoGeneSymbol").value(TEST_HUGO_GENE_SYMBOL_2)) - .andExpect(MockMvcResultMatchers.jsonPath("$[1].gene.type").value(TEST_TYPE_2)) - .andExpect(MockMvcResultMatchers.jsonPath("$[1].gene.cytoband").value(TEST_CYTOBAND_2)) - .andExpect(MockMvcResultMatchers.jsonPath("$[1].gene.chromosome").value(TEST_CHROMOSOME_2)); + .andExpect(MockMvcResultMatchers.jsonPath("$[1].gene.type").value(TEST_TYPE_2)); } @Test @@ -230,8 +227,6 @@ public void fetchDiscreteCopyNumbersInMolecularProfileDetailedProjection() throw .andExpect(MockMvcResultMatchers.jsonPath("$[0].gene.entrezGeneId").value(TEST_ENTREZ_GENE_ID_1)) .andExpect(MockMvcResultMatchers.jsonPath("$[0].gene.hugoGeneSymbol").value(TEST_HUGO_GENE_SYMBOL_1)) .andExpect(MockMvcResultMatchers.jsonPath("$[0].gene.type").value(TEST_TYPE_1)) - .andExpect(MockMvcResultMatchers.jsonPath("$[0].gene.cytoband").value(TEST_CYTOBAND_1)) - .andExpect(MockMvcResultMatchers.jsonPath("$[0].gene.chromosome").value(TEST_CHROMOSOME_1)) .andExpect(MockMvcResultMatchers.jsonPath("$[1].molecularProfileId") .value(TEST_MOLECULAR_PROFILE_STABLE_ID_2)) .andExpect(MockMvcResultMatchers.jsonPath("$[1].sampleId").value(TEST_SAMPLE_STABLE_ID_2)) @@ -239,9 +234,7 @@ public void fetchDiscreteCopyNumbersInMolecularProfileDetailedProjection() throw .andExpect(MockMvcResultMatchers.jsonPath("$[1].alteration").value(TEST_ALTERATION_2)) .andExpect(MockMvcResultMatchers.jsonPath("$[1].gene.entrezGeneId").value(TEST_ENTREZ_GENE_ID_2)) .andExpect(MockMvcResultMatchers.jsonPath("$[1].gene.hugoGeneSymbol").value(TEST_HUGO_GENE_SYMBOL_2)) - .andExpect(MockMvcResultMatchers.jsonPath("$[1].gene.type").value(TEST_TYPE_2)) - .andExpect(MockMvcResultMatchers.jsonPath("$[1].gene.cytoband").value(TEST_CYTOBAND_2)) - .andExpect(MockMvcResultMatchers.jsonPath("$[1].gene.chromosome").value(TEST_CHROMOSOME_2)); + .andExpect(MockMvcResultMatchers.jsonPath("$[1].gene.type").value(TEST_TYPE_2)); } @Test @@ -363,15 +356,11 @@ private List createExampleDiscreteCopyNumberDataWithGene gene1.setEntrezGeneId(TEST_ENTREZ_GENE_ID_1); gene1.setHugoGeneSymbol(TEST_HUGO_GENE_SYMBOL_1); gene1.setType(TEST_TYPE_1); - gene1.setCytoband(TEST_CYTOBAND_1); - gene1.setChromosome(TEST_CHROMOSOME_1); discreteCopyNumberDataList.get(0).setGene(gene1); Gene gene2 = new Gene(); gene2.setEntrezGeneId(TEST_ENTREZ_GENE_ID_2); gene2.setHugoGeneSymbol(TEST_HUGO_GENE_SYMBOL_2); gene2.setType(TEST_TYPE_2); - gene2.setCytoband(TEST_CYTOBAND_2); - gene2.setChromosome(TEST_CHROMOSOME_2); discreteCopyNumberDataList.get(1).setGene(gene2); return discreteCopyNumberDataList; } diff --git a/web/src/test/java/org/cbioportal/web/GeneControllerTest.java b/web/src/test/java/org/cbioportal/web/GeneControllerTest.java index e201be37365..256464ae883 100644 --- a/web/src/test/java/org/cbioportal/web/GeneControllerTest.java +++ b/web/src/test/java/org/cbioportal/web/GeneControllerTest.java @@ -84,13 +84,9 @@ public void getAllGenesDefaultProjection() throws Exception { .andExpect(MockMvcResultMatchers.jsonPath("$[0].entrezGeneId").value(ENTREZ_GENE_ID_1)) .andExpect(MockMvcResultMatchers.jsonPath("$[0].hugoGeneSymbol").value(HUGO_GENE_SYMBOL_1)) .andExpect(MockMvcResultMatchers.jsonPath("$[0].type").value(TYPE_1)) - .andExpect(MockMvcResultMatchers.jsonPath("$[0].cytoband").value(CYTOBAND_1)) - .andExpect(MockMvcResultMatchers.jsonPath("$[0].chromosome").value(CHROMOSOME_1)) .andExpect(MockMvcResultMatchers.jsonPath("$[1].entrezGeneId").value(ENTREZ_GENE_ID_2)) .andExpect(MockMvcResultMatchers.jsonPath("$[1].hugoGeneSymbol").value(HUGO_GENE_SYMBOL_2)) - .andExpect(MockMvcResultMatchers.jsonPath("$[1].type").value(TYPE_2)) - .andExpect(MockMvcResultMatchers.jsonPath("$[1].cytoband").value(CYTOBAND_2)) - .andExpect(MockMvcResultMatchers.jsonPath("$[1].chromosome").value(CHROMOSOME_2)); + .andExpect(MockMvcResultMatchers.jsonPath("$[1].type").value(TYPE_2)); } @@ -127,8 +123,6 @@ public void getGene() throws Exception { gene.setEntrezGeneId(ENTREZ_GENE_ID_1); gene.setHugoGeneSymbol(HUGO_GENE_SYMBOL_1); gene.setType(TYPE_1); - gene.setCytoband(CYTOBAND_1); - gene.setChromosome(CHROMOSOME_1); geneList.add(gene); Mockito.when(geneService.getGene(Mockito.anyString())).thenReturn(gene); @@ -139,9 +133,7 @@ public void getGene() throws Exception { .andExpect(MockMvcResultMatchers.content().contentTypeCompatibleWith(MediaType.APPLICATION_JSON)) .andExpect(MockMvcResultMatchers.jsonPath("$.entrezGeneId").value(ENTREZ_GENE_ID_1)) .andExpect(MockMvcResultMatchers.jsonPath("$.hugoGeneSymbol").value(HUGO_GENE_SYMBOL_1)) - .andExpect(MockMvcResultMatchers.jsonPath("$.type").value(TYPE_1)) - .andExpect(MockMvcResultMatchers.jsonPath("$.cytoband").value(CYTOBAND_1)) - .andExpect(MockMvcResultMatchers.jsonPath("$.chromosome").value(CHROMOSOME_1)); + .andExpect(MockMvcResultMatchers.jsonPath("$.type").value(TYPE_1)); } @Test @@ -184,13 +176,9 @@ public void fetchGenesDefaultProjection() throws Exception { .andExpect(MockMvcResultMatchers.jsonPath("$[0].entrezGeneId").value(ENTREZ_GENE_ID_1)) .andExpect(MockMvcResultMatchers.jsonPath("$[0].hugoGeneSymbol").value(HUGO_GENE_SYMBOL_1)) .andExpect(MockMvcResultMatchers.jsonPath("$[0].type").value(TYPE_1)) - .andExpect(MockMvcResultMatchers.jsonPath("$[0].cytoband").value(CYTOBAND_1)) - .andExpect(MockMvcResultMatchers.jsonPath("$[0].chromosome").value(CHROMOSOME_1)) .andExpect(MockMvcResultMatchers.jsonPath("$[1].entrezGeneId").value(ENTREZ_GENE_ID_2)) .andExpect(MockMvcResultMatchers.jsonPath("$[1].hugoGeneSymbol").value(HUGO_GENE_SYMBOL_2)) - .andExpect(MockMvcResultMatchers.jsonPath("$[1].type").value(TYPE_2)) - .andExpect(MockMvcResultMatchers.jsonPath("$[1].cytoband").value(CYTOBAND_2)) - .andExpect(MockMvcResultMatchers.jsonPath("$[1].chromosome").value(CHROMOSOME_2)); + .andExpect(MockMvcResultMatchers.jsonPath("$[1].type").value(TYPE_2)); } @Test @@ -220,15 +208,11 @@ private List createGeneList() { gene1.setEntrezGeneId(ENTREZ_GENE_ID_1); gene1.setHugoGeneSymbol(HUGO_GENE_SYMBOL_1); gene1.setType(TYPE_1); - gene1.setCytoband(CYTOBAND_1); - gene1.setChromosome(CHROMOSOME_1); geneList.add(gene1); Gene gene2 = new Gene(); gene2.setEntrezGeneId(ENTREZ_GENE_ID_2); gene2.setHugoGeneSymbol(HUGO_GENE_SYMBOL_2); gene2.setType(TYPE_2); - gene2.setCytoband(CYTOBAND_2); - gene2.setChromosome(CHROMOSOME_2); geneList.add(gene2); return geneList; } diff --git a/web/src/test/java/org/cbioportal/web/MutationControllerTest.java b/web/src/test/java/org/cbioportal/web/MutationControllerTest.java index 3512ebd71d8..536495ed84c 100644 --- a/web/src/test/java/org/cbioportal/web/MutationControllerTest.java +++ b/web/src/test/java/org/cbioportal/web/MutationControllerTest.java @@ -4,6 +4,7 @@ import org.cbioportal.model.Gene; import org.cbioportal.model.Mutation; import org.cbioportal.model.MutationCountByPosition; +import org.cbioportal.model.ReferenceGenomeGene; import org.cbioportal.model.meta.MutationMeta; import org.cbioportal.service.MutationService; import org.cbioportal.web.parameter.HeaderKeyConstants; @@ -280,8 +281,6 @@ public void getMutationsInMolecularProfileBySampleListIdDetailedProjection() thr .andExpect(MockMvcResultMatchers.jsonPath("$[0].gene.entrezGeneId").value(TEST_ENTREZ_GENE_ID_1)) .andExpect(MockMvcResultMatchers.jsonPath("$[0].gene.hugoGeneSymbol").value(TEST_HUGO_GENE_SYMBOL_1)) .andExpect(MockMvcResultMatchers.jsonPath("$[0].gene.type").value(TEST_TYPE_1)) - .andExpect(MockMvcResultMatchers.jsonPath("$[0].gene.cytoband").value(TEST_CYTOBAND_1)) - .andExpect(MockMvcResultMatchers.jsonPath("$[0].gene.chromosome").value(TEST_CHROMOSOME_1)) .andExpect(MockMvcResultMatchers.jsonPath("$[1].molecularProfileId").value(TEST_MOLECULAR_PROFILE_STABLE_ID_2)) .andExpect(MockMvcResultMatchers.jsonPath("$[1].sampleId").value(TEST_SAMPLE_STABLE_ID_2)) .andExpect(MockMvcResultMatchers.jsonPath("$[1].entrezGeneId").value(TEST_ENTREZ_GENE_ID_2)) @@ -317,9 +316,7 @@ public void getMutationsInMolecularProfileBySampleListIdDetailedProjection() thr .andExpect(MockMvcResultMatchers.jsonPath("$[1].keyword").value(TEST_KEYWORD_2)) .andExpect(MockMvcResultMatchers.jsonPath("$[1].gene.entrezGeneId").value(TEST_ENTREZ_GENE_ID_2)) .andExpect(MockMvcResultMatchers.jsonPath("$[1].gene.hugoGeneSymbol").value(TEST_HUGO_GENE_SYMBOL_2)) - .andExpect(MockMvcResultMatchers.jsonPath("$[1].gene.type").value(TEST_TYPE_2)) - .andExpect(MockMvcResultMatchers.jsonPath("$[1].gene.cytoband").value(TEST_CYTOBAND_2)) - .andExpect(MockMvcResultMatchers.jsonPath("$[1].gene.chromosome").value(TEST_CHROMOSOME_2)); + .andExpect(MockMvcResultMatchers.jsonPath("$[1].gene.type").value(TEST_TYPE_2)); } @Test @@ -596,8 +593,6 @@ public void fetchMutationsInMolecularProfileDetailedProjection() throws Exceptio .andExpect(MockMvcResultMatchers.jsonPath("$[0].gene.entrezGeneId").value(TEST_ENTREZ_GENE_ID_1)) .andExpect(MockMvcResultMatchers.jsonPath("$[0].gene.hugoGeneSymbol").value(TEST_HUGO_GENE_SYMBOL_1)) .andExpect(MockMvcResultMatchers.jsonPath("$[0].gene.type").value(TEST_TYPE_1)) - .andExpect(MockMvcResultMatchers.jsonPath("$[0].gene.cytoband").value(TEST_CYTOBAND_1)) - .andExpect(MockMvcResultMatchers.jsonPath("$[0].gene.chromosome").value(TEST_CHROMOSOME_1)) .andExpect(MockMvcResultMatchers.jsonPath("$[1].molecularProfileId") .value(TEST_MOLECULAR_PROFILE_STABLE_ID_2)) .andExpect(MockMvcResultMatchers.jsonPath("$[1].sampleId").value(TEST_SAMPLE_STABLE_ID_2)) @@ -634,9 +629,7 @@ public void fetchMutationsInMolecularProfileDetailedProjection() throws Exceptio .andExpect(MockMvcResultMatchers.jsonPath("$[1].keyword").value(TEST_KEYWORD_2)) .andExpect(MockMvcResultMatchers.jsonPath("$[1].gene.entrezGeneId").value(TEST_ENTREZ_GENE_ID_2)) .andExpect(MockMvcResultMatchers.jsonPath("$[1].gene.hugoGeneSymbol").value(TEST_HUGO_GENE_SYMBOL_2)) - .andExpect(MockMvcResultMatchers.jsonPath("$[1].gene.type").value(TEST_TYPE_2)) - .andExpect(MockMvcResultMatchers.jsonPath("$[1].gene.cytoband").value(TEST_CYTOBAND_2)) - .andExpect(MockMvcResultMatchers.jsonPath("$[1].gene.chromosome").value(TEST_CHROMOSOME_2)); + .andExpect(MockMvcResultMatchers.jsonPath("$[1].gene.type").value(TEST_TYPE_2)); } @Test @@ -796,15 +789,11 @@ private List createExampleMutationsWithGene() { gene1.setEntrezGeneId(TEST_ENTREZ_GENE_ID_1); gene1.setHugoGeneSymbol(TEST_HUGO_GENE_SYMBOL_1); gene1.setType(TEST_TYPE_1); - gene1.setCytoband(TEST_CYTOBAND_1); - gene1.setChromosome(TEST_CHROMOSOME_1); mutationList.get(0).setGene(gene1); Gene gene2 = new Gene(); gene2.setEntrezGeneId(TEST_ENTREZ_GENE_ID_2); gene2.setHugoGeneSymbol(TEST_HUGO_GENE_SYMBOL_2); gene2.setType(TEST_TYPE_2); - gene2.setCytoband(TEST_CYTOBAND_2); - gene2.setChromosome(TEST_CHROMOSOME_2); mutationList.get(1).setGene(gene2); return mutationList; diff --git a/web/src/test/java/org/cbioportal/web/ReferenceGenomeGeneControllerTest.java b/web/src/test/java/org/cbioportal/web/ReferenceGenomeGeneControllerTest.java new file mode 100644 index 00000000000..8f7d7353210 --- /dev/null +++ b/web/src/test/java/org/cbioportal/web/ReferenceGenomeGeneControllerTest.java @@ -0,0 +1,142 @@ +package org.cbioportal.web; + +import com.fasterxml.jackson.databind.ObjectMapper; +import org.cbioportal.model.ReferenceGenomeGene; +import org.cbioportal.service.ReferenceGenomeGeneService; +import org.cbioportal.service.exception.GeneNotFoundException; +import org.hamcrest.Matchers; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mockito; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.http.MediaType; +import org.springframework.test.context.ContextConfiguration; +import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; +import org.springframework.test.context.web.WebAppConfiguration; +import org.springframework.test.web.servlet.MockMvc; +import org.springframework.test.web.servlet.request.MockMvcRequestBuilders; +import org.springframework.test.web.servlet.result.MockMvcResultMatchers; +import org.springframework.test.web.servlet.setup.MockMvcBuilders; +import org.springframework.web.context.WebApplicationContext; + +import java.util.ArrayList; +import java.util.List; + +@RunWith(SpringJUnit4ClassRunner.class) +@WebAppConfiguration +@ContextConfiguration("/applicationContext-web-test.xml") +@Configuration +public class ReferenceGenomeGeneControllerTest { + + public static final String CYTOBAND_1 = "cytoband_1"; + public static final int LENGTH_1 = 100; + public static final String CHROMOSOME_1 = "chromosome_1"; + public static final String CYTOBAND_2 = "cytoband_2"; + public static final int LENGTH_2 = 200; + public static final String CHROMOSOME_2 = "chromosome_2"; + public static final int REFERENCE_GENOME_ID = 1; + public static final int ENTREZ_GENE_ID_1 = 1; + public static final int ENTREZ_GENE_ID_2 = 2; + + @Autowired + private WebApplicationContext wac; + + @Autowired + private ReferenceGenomeGeneService referenceGenomeGeneService; + + @Autowired + private ObjectMapper objectMapper; + + private MockMvc mockMvc; + + @Bean + public ReferenceGenomeGeneService referenceGenomeGeneService() { + return Mockito.mock(ReferenceGenomeGeneService.class); + } + + @Before + public void setUp() throws Exception { + + Mockito.reset(referenceGenomeGeneService); + mockMvc = MockMvcBuilders.webAppContextSetup(wac).build(); + } + + @Test + public void getGene() throws Exception { + + ReferenceGenomeGene gene = new ReferenceGenomeGene(); + gene.setEntrezGeneId(ENTREZ_GENE_ID_1); + gene.setReferenceGenomeId(REFERENCE_GENOME_ID); + gene.setCytoband(CYTOBAND_1); + gene.setChromosome(CHROMOSOME_1); + gene.setLength(LENGTH_1); + + Mockito.when(referenceGenomeGeneService.getReferenceGenomeGene(Mockito.anyInt(), Mockito.anyString())).thenReturn(gene); + + mockMvc.perform(MockMvcRequestBuilders.get("/reference-genome-genes/hg19/1") + .accept(MediaType.APPLICATION_JSON)) + .andExpect(MockMvcResultMatchers.status().isOk()) + .andExpect(MockMvcResultMatchers.content().contentTypeCompatibleWith(MediaType.APPLICATION_JSON)) + .andExpect(MockMvcResultMatchers.jsonPath("$.entrezGeneId").value(ENTREZ_GENE_ID_1)) + .andExpect(MockMvcResultMatchers.jsonPath("$.referenceGenomeId").value(REFERENCE_GENOME_ID)) + .andExpect(MockMvcResultMatchers.jsonPath("$.cytoband").value(CYTOBAND_1)) + .andExpect(MockMvcResultMatchers.jsonPath("$.length").value(LENGTH_1)) + .andExpect(MockMvcResultMatchers.jsonPath("$.chromosome").value(CHROMOSOME_1)); + } + + + @Test + public void fetchGenesDefaultProjection() throws Exception { + + List geneList = createGeneList(); + + Mockito.when(referenceGenomeGeneService.fetchGenesByGenomeName(Mockito.anyListOf(Integer.class), Mockito.anyString())) + .thenReturn(geneList); + + List geneIds = new ArrayList<>(); + geneIds.add(Integer.toString(ENTREZ_GENE_ID_1)); + geneIds.add(Integer.toString(ENTREZ_GENE_ID_2)); + + mockMvc.perform(MockMvcRequestBuilders.post("/reference-genome-genes/hg19/fetch") + .accept(MediaType.APPLICATION_JSON) + .contentType(MediaType.APPLICATION_JSON) + .content(objectMapper.writeValueAsString(geneIds))) + .andExpect(MockMvcResultMatchers.status().isOk()) + .andExpect(MockMvcResultMatchers.content().contentTypeCompatibleWith(MediaType.APPLICATION_JSON)) + .andExpect(MockMvcResultMatchers.jsonPath("$", Matchers.hasSize(2))) + .andExpect(MockMvcResultMatchers.jsonPath("$[0].entrezGeneId").value(ENTREZ_GENE_ID_1)) + .andExpect(MockMvcResultMatchers.jsonPath("$[0].referenceGenomeId").value(REFERENCE_GENOME_ID)) + .andExpect(MockMvcResultMatchers.jsonPath("$[0].cytoband").value(CYTOBAND_1)) + .andExpect(MockMvcResultMatchers.jsonPath("$[0].length").value(LENGTH_1)) + .andExpect(MockMvcResultMatchers.jsonPath("$[0].chromosome").value(CHROMOSOME_1)) + .andExpect(MockMvcResultMatchers.jsonPath("$[1].entrezGeneId").value(ENTREZ_GENE_ID_2)) + .andExpect(MockMvcResultMatchers.jsonPath("$[1].referenceGenomeId").value(REFERENCE_GENOME_ID)) + .andExpect(MockMvcResultMatchers.jsonPath("$[1].cytoband").value(CYTOBAND_2)) + .andExpect(MockMvcResultMatchers.jsonPath("$[1].length").value(LENGTH_2)) + .andExpect(MockMvcResultMatchers.jsonPath("$[1].chromosome").value(CHROMOSOME_2)); + } + + + private List createGeneList() { + List geneList = new ArrayList<>(); + ReferenceGenomeGene gene1 = new ReferenceGenomeGene(); + gene1.setEntrezGeneId(ENTREZ_GENE_ID_1); + gene1.setCytoband(CYTOBAND_1); + gene1.setReferenceGenomeId(REFERENCE_GENOME_ID); + gene1.setChromosome(CHROMOSOME_1); + gene1.setLength(LENGTH_1); + geneList.add(gene1); + ReferenceGenomeGene gene2 = new ReferenceGenomeGene(); + gene2.setEntrezGeneId(ENTREZ_GENE_ID_2); + gene2.setReferenceGenomeId(REFERENCE_GENOME_ID); + gene2.setCytoband(CYTOBAND_2); + gene2.setLength(LENGTH_2); + gene2.setChromosome(CHROMOSOME_2); + geneList.add(gene2); + return geneList; + } +} + diff --git a/web/src/test/java/org/cbioportal/weblegacy/ApiControllerTest.java b/web/src/test/java/org/cbioportal/weblegacy/ApiControllerTest.java index 1372ea0a5b6..672f473425c 100644 --- a/web/src/test/java/org/cbioportal/weblegacy/ApiControllerTest.java +++ b/web/src/test/java/org/cbioportal/weblegacy/ApiControllerTest.java @@ -549,12 +549,10 @@ private List getGeneticprofiledataQuery1PersistenceFullMock() { gene_AKT1.setHugoGeneSymbol("AKT1"); gene_AKT1.setEntrezGeneId(207); gene_AKT1.setType("protein-coding"); - gene_AKT1.setCytoband("14q32.32"); Gene gene_TGFBR1 = new Gene(); gene_TGFBR1.setHugoGeneSymbol("TGFBR1"); gene_TGFBR1.setEntrezGeneId(7046); gene_TGFBR1.setType("protein-coding"); - gene_TGFBR1.setCytoband("9q22"); TypeOfCancer typeOfCancer_brca = new TypeOfCancer(); typeOfCancer_brca.setTypeOfCancerId("brca"); typeOfCancer_brca.setName("Invasive Breast Carcinoma"); @@ -1079,7 +1077,6 @@ private void applyNullMaskToMutationModel(Mutation mutation) { mutation.getGeneticProfile().setShowProfileInAnalysisTab(null); mutation.setSample(null); //class object mutation.getGene().setType(null); - mutation.getGene().setCytoband(null); } private List getGeneticprofiledataQuery1ServiceMock() { diff --git a/web/src/test/java/org/cbioportal/weblegacy/GenePanelControllerLegacyTest.java b/web/src/test/java/org/cbioportal/weblegacy/GenePanelControllerLegacyTest.java index 536e4af9f60..0c960bdf53e 100644 --- a/web/src/test/java/org/cbioportal/weblegacy/GenePanelControllerLegacyTest.java +++ b/web/src/test/java/org/cbioportal/weblegacy/GenePanelControllerLegacyTest.java @@ -93,12 +93,10 @@ public void setup() { braf.setEntrezGeneId(673); braf.setHugoGeneSymbol("BRAF"); braf.setType("protein-coding"); - braf.setCytoband("7q34"); egfr = new Gene(); egfr.setEntrezGeneId(1956); egfr.setHugoGeneSymbol("EGFR"); egfr.setType("protein-coding"); - egfr.setCytoband("7p12"); genes.add(braf); genes.add(egfr); diff --git a/web/src/test/java/org/cbioportal/weblegacy/StructuralVariantControllerTest.java b/web/src/test/java/org/cbioportal/weblegacy/StructuralVariantControllerTest.java new file mode 100644 index 00000000000..5f99df40a08 --- /dev/null +++ b/web/src/test/java/org/cbioportal/weblegacy/StructuralVariantControllerTest.java @@ -0,0 +1,449 @@ +/* + * Copyright (c) 2016 - 2018 Memorial Sloan-Kettering Cancer Center. + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY, WITHOUT EVEN THE IMPLIED WARRANTY OF MERCHANTABILITY OR FITNESS + * FOR A PARTICULAR PURPOSE. The software and documentation provided hereunder + * is on an "as is" basis, and Memorial Sloan-Kettering Cancer Center has no + * obligations to provide maintenance, support, updates, enhancements or + * modifications. In no event shall Memorial Sloan-Kettering Cancer Center be + * liable to any party for direct, indirect, special, incidental or + * consequential damages, including lost profits, arising out of the use of this + * software and its documentation, even if Memorial Sloan-Kettering Cancer + * Center has been advised of the possibility of such damage. + */ + +/* + * This file is part of cBioPortal. + * + * cBioPortal is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . +*/ + +package org.cbioportal.weblegacy; + +import org.cbioportal.web.config.CacheMapUtilConfig; +import java.util.ArrayList; +import java.util.List; + +import org.cbioportal.model.CancerStudy; +import org.cbioportal.model.Gene; +import org.cbioportal.model.MolecularProfile; +import org.cbioportal.model.Patient; +import org.cbioportal.model.Sample; +import org.mskcc.cbio.portal.model.StructuralVariant; +import org.mskcc.cbio.portal.service.StructuralVariantService; +import org.cbioportal.web.config.CustomObjectMapper; +import org.hamcrest.Matchers; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mockito; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.http.MediaType; +import org.springframework.test.context.ContextConfiguration; +import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; +import org.springframework.test.context.web.WebAppConfiguration; +import org.springframework.test.web.servlet.MockMvc; +import org.springframework.test.web.servlet.ResultActions; +import org.springframework.test.web.servlet.request.MockMvcRequestBuilders; +import org.springframework.test.web.servlet.result.MockMvcResultMatchers; +import org.springframework.test.web.servlet.setup.MockMvcBuilders; +import org.springframework.web.context.WebApplicationContext; + +@RunWith(SpringJUnit4ClassRunner.class) +@WebAppConfiguration +@ContextConfiguration(classes = {StructuralVariantControllerTestConfig.class, CustomObjectMapper.class, CacheMapUtilConfig.class}) +public class StructuralVariantControllerTest { + @Autowired + private StructuralVariantService structuralVariantServiceMock; + @Autowired + private WebApplicationContext webApplicationContext; + private MockMvc mockMvc; + private static List structuralVariantDataServiceFullResponseMock; + private static List structuralVariantDataServiceEmptyResponseMock; + + @Before + public void setup() { + Mockito.reset(structuralVariantServiceMock); + mockMvc = MockMvcBuilders.webAppContextSetup(webApplicationContext).build(); + } + + @Test + public void svDataTest1() throws Exception { + List mockResponse = getStructuralVariantDataServiceFullResponseMock(); + Mockito.when(structuralVariantServiceMock.getStructuralVariant( + org.mockito.Matchers.anyListOf(String.class), + org.mockito.Matchers.anyListOf(String.class), + org.mockito.Matchers.anyListOf(String.class) + )).thenReturn(mockResponse); + ResultActions resultActions = this.mockMvc.perform( + MockMvcRequestBuilders.get("/structuralvariant") + .accept(MediaType.parseMediaType("application/json;charset=UTF-8")) + .param("geneticProfileStableIds", "7") + .param("hugoGeneSymbols", "ERBB2,GRB7") + .param("sampleStableIds", "TCGA-A1-A0SB-01,TCGA-A1-A0SD-01,TCGA-A1-A0SE-01") + ) + // note: an attempt was made to test the proper parsing of various parameter combinations, but strings are not parsed until the request is performed + //.andDo(MockMvcResultHandlers.print()); + ; + testFullResponse(resultActions); + } + + @Test + public void svDataTest2() throws Exception { + List mockResponse = getStructuralVariantDataServiceEmptyResponseMock(); + Mockito.when(structuralVariantServiceMock.getStructuralVariant( + org.mockito.Matchers.anyListOf(String.class), + org.mockito.Matchers.anyListOf(String.class), + org.mockito.Matchers.anyListOf(String.class) + )).thenReturn(mockResponse); + ResultActions resultActions = this.mockMvc.perform( + MockMvcRequestBuilders.get("/structuralvariant") + .accept("application/json;charset=UTF-8") + .param("geneticProfileStableIds", "7") + .param("hugoGeneSymbols", "unrecognized_gene_identifier") + .param("sampleStableIds", "") + ); + testEmptyResponse(resultActions); + } + + private void testFullResponse(ResultActions resultActions) throws Exception { + resultActions + .andExpect(MockMvcResultMatchers.status().isOk()) + .andExpect(MockMvcResultMatchers.content().contentTypeCompatibleWith("application/json;charset=UTF-8")) + .andExpect(MockMvcResultMatchers.jsonPath("$", Matchers.hasSize(3))) + .andExpect(MockMvcResultMatchers.jsonPath("$[0].breakpointType").value("PRECISE")) + .andExpect(MockMvcResultMatchers.jsonPath("$[0].annotation").value("n/a")) + .andExpect(MockMvcResultMatchers.jsonPath("$[0].comments").value("n/a")) + .andExpect(MockMvcResultMatchers.jsonPath("$[0].confidenceClass").value("AUTO_OK")) + .andExpect(MockMvcResultMatchers.jsonPath("$[0].connectionType").value("3to5")) + .andExpect(MockMvcResultMatchers.jsonPath("$[0].eventInfo").value("Transcript fusion (ERBB2-GRB7)")) + .andExpect(MockMvcResultMatchers.jsonPath("$[0].mapq").value("0")) + .andExpect(MockMvcResultMatchers.jsonPath("$[0].normalReadCount").value(7181)) + .andExpect(MockMvcResultMatchers.jsonPath("$[0].normalVariantCount").value(0)) + .andExpect(MockMvcResultMatchers.jsonPath("$[0].pairedEndReadSupport").value(11)) + .andExpect(MockMvcResultMatchers.jsonPath("$[0].site1Chrom").value("17")) + .andExpect(MockMvcResultMatchers.jsonPath("$[0].site1Desc").value("Intron of ERBB2(+): 51bp after exon 26")) + .andExpect(MockMvcResultMatchers.jsonPath("$[0].site1Gene").value("ERBB2")) + .andExpect(MockMvcResultMatchers.jsonPath("$[0].site1Pos").value(37883851)) + .andExpect(MockMvcResultMatchers.jsonPath("$[0].site2Chrom").value("17")) + .andExpect(MockMvcResultMatchers.jsonPath("$[0].site2Desc").value("5-UTR of GRB7(+): 1Kb before coding start")) + .andExpect(MockMvcResultMatchers.jsonPath("$[0].site2Gene").value("GRB7")) + .andExpect(MockMvcResultMatchers.jsonPath("$[0].site2Pos").value(37897379)) + .andExpect(MockMvcResultMatchers.jsonPath("$[0].splitReadSupport").value(31)) + .andExpect(MockMvcResultMatchers.jsonPath("$[0].svClassName").value("DELETION")) + .andExpect(MockMvcResultMatchers.jsonPath("$[0].svDesc").value("n/a")) + .andExpect(MockMvcResultMatchers.jsonPath("$[0].svLength").value(13528)) + .andExpect(MockMvcResultMatchers.jsonPath("$[0].tumorReadCount").value(4389)) + .andExpect(MockMvcResultMatchers.jsonPath("$[0].tumorVariantCount").value(6)) + .andExpect(MockMvcResultMatchers.jsonPath("$[0].variantStatusName").value("NEW_VARIANT")) + .andExpect(MockMvcResultMatchers.jsonPath("$[0].gene1.hugoGeneSymbol").value("ERBB2")) + .andExpect(MockMvcResultMatchers.jsonPath("$[0].gene1.entrezGeneId").value(2064)) + .andExpect(MockMvcResultMatchers.jsonPath("$[0].gene2.hugoGeneSymbol").value("GRB7")) + .andExpect(MockMvcResultMatchers.jsonPath("$[0].gene2.entrezGeneId").value(2886)) + .andExpect(MockMvcResultMatchers.jsonPath("$[0].sample.stableId").value("TCGA-A1-A0SB-01")) + .andExpect(MockMvcResultMatchers.jsonPath("$[1].breakpointType").value("PRECISE")) + .andExpect(MockMvcResultMatchers.jsonPath("$[1].annotation").value("n/a")) + .andExpect(MockMvcResultMatchers.jsonPath("$[1].comments").value("n/a")) + .andExpect(MockMvcResultMatchers.jsonPath("$[1].confidenceClass").value("AUTO_OK")) + .andExpect(MockMvcResultMatchers.jsonPath("$[1].connectionType").value("3to5")) + .andExpect(MockMvcResultMatchers.jsonPath("$[1].eventInfo").value("Protein fusion: mid-exon (ERBB2-GRB7)")) + .andExpect(MockMvcResultMatchers.jsonPath("$[1].mapq").value("0")) + .andExpect(MockMvcResultMatchers.jsonPath("$[1].normalReadCount").value(7062)) + .andExpect(MockMvcResultMatchers.jsonPath("$[1].normalVariantCount").value(0)) + .andExpect(MockMvcResultMatchers.jsonPath("$[1].pairedEndReadSupport").value(97)) + .andExpect(MockMvcResultMatchers.jsonPath("$[1].site1Chrom").value("17")) + .andExpect(MockMvcResultMatchers.jsonPath("$[1].site1Desc").value("Exon 25 of ERBB2(+)")) + .andExpect(MockMvcResultMatchers.jsonPath("$[1].site1Gene").value("ERBB2")) + .andExpect(MockMvcResultMatchers.jsonPath("$[1].site1Pos").value(37883138)) + .andExpect(MockMvcResultMatchers.jsonPath("$[1].site2Chrom").value("17")) + .andExpect(MockMvcResultMatchers.jsonPath("$[1].site2Desc").value("Intron of GRB7(+): 56bp before exon 10")) + .andExpect(MockMvcResultMatchers.jsonPath("$[1].site2Gene").value("GRB7")) + .andExpect(MockMvcResultMatchers.jsonPath("$[1].site2Pos").value(37901416)) + .andExpect(MockMvcResultMatchers.jsonPath("$[1].splitReadSupport").value(71)) + .andExpect(MockMvcResultMatchers.jsonPath("$[1].svClassName").value("DELETION")) + .andExpect(MockMvcResultMatchers.jsonPath("$[1].svDesc").value("n/a")) + .andExpect(MockMvcResultMatchers.jsonPath("$[1].svLength").value(18278)) + .andExpect(MockMvcResultMatchers.jsonPath("$[1].tumorReadCount").value(9849)) + .andExpect(MockMvcResultMatchers.jsonPath("$[1].tumorVariantCount").value(60)) + .andExpect(MockMvcResultMatchers.jsonPath("$[1].variantStatusName").value("NEW_VARIANT")) + .andExpect(MockMvcResultMatchers.jsonPath("$[1].gene1.hugoGeneSymbol").value("ERBB2")) + .andExpect(MockMvcResultMatchers.jsonPath("$[1].gene1.entrezGeneId").value(2064)) + .andExpect(MockMvcResultMatchers.jsonPath("$[1].gene2.hugoGeneSymbol").value("GRB7")) + .andExpect(MockMvcResultMatchers.jsonPath("$[1].gene2.entrezGeneId").value(2886)) + .andExpect(MockMvcResultMatchers.jsonPath("$[1].sample.stableId").value("TCGA-A1-A0SD-01")) + .andExpect(MockMvcResultMatchers.jsonPath("$[2].breakpointType").value("PRECISE")) + .andExpect(MockMvcResultMatchers.jsonPath("$[2].annotation").value("n/a")) + .andExpect(MockMvcResultMatchers.jsonPath("$[2].comments").value("ERBB2 (NM_004448) rearrangement")) + .andExpect(MockMvcResultMatchers.jsonPath("$[2].confidenceClass").value("MANUAL_OK")) + .andExpect(MockMvcResultMatchers.jsonPath("$[2].connectionType").value("3to5")) + .andExpect(MockMvcResultMatchers.jsonPath("$[2].eventInfo").value("Deletion of 1 exon: in frame")) + .andExpect(MockMvcResultMatchers.jsonPath("$[2].mapq").value("0")) + .andExpect(MockMvcResultMatchers.jsonPath("$[2].normalReadCount").value(7212)) + .andExpect(MockMvcResultMatchers.jsonPath("$[2].normalVariantCount").value(0)) + .andExpect(MockMvcResultMatchers.jsonPath("$[2].pairedEndReadSupport").value(31)) + .andExpect(MockMvcResultMatchers.jsonPath("$[2].site1Chrom").value("17")) + .andExpect(MockMvcResultMatchers.jsonPath("$[2].site1Desc").value("Intron of ERBB2(+): 46bp after exon 15")) + .andExpect(MockMvcResultMatchers.jsonPath("$[2].site1Gene").value("ERBB2")) + .andExpect(MockMvcResultMatchers.jsonPath("$[2].site1Pos").value(37873779)) + .andExpect(MockMvcResultMatchers.jsonPath("$[2].site2Chrom").value("17")) + .andExpect(MockMvcResultMatchers.jsonPath("$[2].site2Desc").value("Intron of ERBB2(+): 501bp before exon 17")) + .andExpect(MockMvcResultMatchers.jsonPath("$[2].site2Gene").value("ERBB2")) + .andExpect(MockMvcResultMatchers.jsonPath("$[2].site2Pos").value(37879041)) + .andExpect(MockMvcResultMatchers.jsonPath("$[2].splitReadSupport").value(10)) + .andExpect(MockMvcResultMatchers.jsonPath("$[2].svClassName").value("DELETION")) + .andExpect(MockMvcResultMatchers.jsonPath("$[2].svDesc").value("n/a")) + .andExpect(MockMvcResultMatchers.jsonPath("$[2].svLength").value(5262)) + .andExpect(MockMvcResultMatchers.jsonPath("$[2].tumorReadCount").value(3101)) + .andExpect(MockMvcResultMatchers.jsonPath("$[2].tumorVariantCount").value(7)) + .andExpect(MockMvcResultMatchers.jsonPath("$[2].variantStatusName").value("NEW_VARIANT")) + .andExpect(MockMvcResultMatchers.jsonPath("$[2].gene1.hugoGeneSymbol").value("ERBB2")) + .andExpect(MockMvcResultMatchers.jsonPath("$[2].gene1.entrezGeneId").value(2064)) + .andExpect(MockMvcResultMatchers.jsonPath("$[2].gene2.hugoGeneSymbol").value("ERBB2")) + .andExpect(MockMvcResultMatchers.jsonPath("$[2].gene2.entrezGeneId").value(2064)) + .andExpect(MockMvcResultMatchers.jsonPath("$[2].sample.stableId").value("TCGA-A1-A0SE-01")) + ; + } + + private void testEmptyResponse(ResultActions resultActions) throws Exception { + resultActions + .andExpect(MockMvcResultMatchers.status().isOk()) + .andExpect(MockMvcResultMatchers.content().contentTypeCompatibleWith("application/json;charset=UTF-8")) + .andExpect(MockMvcResultMatchers.jsonPath("$", Matchers.hasSize(0))) + ; + } + + private List getStructuralVariantDataServiceFullResponseMock() { + if(structuralVariantDataServiceFullResponseMock != null) { + return structuralVariantDataServiceFullResponseMock; + } + //data from persistence-mybatis-test test + structuralVariantDataServiceFullResponseMock = new ArrayList<>(); + CancerStudy cancerStudy1 = new CancerStudy(); + cancerStudy1.setCancerStudyId(1); + cancerStudy1.setCancerStudyIdentifier("study_tcga_pub"); + cancerStudy1.setTypeOfCancerId("brca"); + cancerStudy1.setName("Breast Invasive Carcinoma (TCGA, Nature 2012)"); + cancerStudy1.setShortName("BRCA (TCGA)"); + cancerStudy1.setDescription("The Cancer Genome Atlas (TCGA) Breast Invasive Carcinoma project. 825 cases.
Nature 2012. Raw data via the TCGA Data Portal"); + cancerStudy1.setPublicStudy(true); + cancerStudy1.setPmid("23000897"); + cancerStudy1.setCitation("TCGA, Nature 2012"); + cancerStudy1.setGroups("PUBLIC"); + cancerStudy1.setStatus(0); + cancerStudy1.setImportDate(null); + Patient patient1 = new Patient(); + patient1.setCancerStudy(cancerStudy1); + patient1.setCancerStudyId(1); + patient1.setInternalId(1); + patient1.setStableId("TCGA-A1-A0SB"); + Patient patient2 = new Patient(); + patient2.setCancerStudy(cancerStudy1); + patient2.setCancerStudyId(1); + patient2.setInternalId(2); + patient2.setStableId("TCGA-A1-A0SD"); + Patient patient3 = new Patient(); + patient3.setCancerStudy(cancerStudy1); + patient3.setCancerStudyId(1); + patient3.setInternalId(3); + patient3.setStableId("TCGA-A1-A0SE"); + Sample sample1 = new Sample(); + sample1.setInternalId(1); + sample1.setStableId("TCGA-A1-A0SB-01"); + sample1.setSampleType(Sample.SampleType.PRIMARY_SOLID_TUMOR); + sample1.setPatientId(1); + sample1.setPatient(patient1); + Sample sample2 = new Sample(); + sample2.setInternalId(2); + sample2.setStableId("TCGA-A1-A0SD-01"); + sample2.setSampleType(Sample.SampleType.PRIMARY_SOLID_TUMOR); + sample2.setPatientId(2); + sample2.setPatient(patient2); + Sample sample3 = new Sample(); + sample3.setInternalId(3); + sample3.setStableId("TCGA-A1-A0SE-01"); + sample3.setSampleType(Sample.SampleType.PRIMARY_SOLID_TUMOR); + sample3.setPatientId(3); + sample3.setPatient(patient3); + MolecularProfile geneticProfile1 = new MolecularProfile(); + geneticProfile1.setMolecularProfileId(7); + geneticProfile1.setStableId("study_tcga_pub_sv"); + geneticProfile1.setCancerStudy(cancerStudy1); + geneticProfile1.setMolecularAlterationType(MolecularProfile.MolecularAlterationType.STRUCTURAL_VARIANT); + geneticProfile1.setDatatype("SV"); + geneticProfile1.setName("Structural Variants"); + geneticProfile1.setDescription("Structural Variants detected by Illumina HiSeq sequencing."); + geneticProfile1.setShowProfileInAnalysisTab(true); + Gene geneERBB2 = new Gene(); + geneERBB2.setHugoGeneSymbol("ERBB2"); + geneERBB2.setEntrezGeneId(2064); + geneERBB2.setType("protein-coding"); + Gene geneGRB7 = new Gene(); + geneGRB7.setHugoGeneSymbol("GRB7"); + geneGRB7.setEntrezGeneId(2886); + geneGRB7.setType("protein-coding"); + StructuralVariant structuralVariant1 = new StructuralVariant(); + structuralVariant1.setSampleId(1); + structuralVariant1.setSample(sample1); + structuralVariant1.setBreakpointType("PRECISE"); + structuralVariant1.setAnnotation("n/a"); + structuralVariant1.setComments("n/a"); + structuralVariant1.setConfidenceClass("AUTO_OK"); + structuralVariant1.setConnectionType("3to5"); + structuralVariant1.setEventInfo("Transcript fusion (ERBB2-GRB7)"); + structuralVariant1.setMapq("0"); + structuralVariant1.setNormalReadCount(7181); + structuralVariant1.setNormalVariantCount(0); + structuralVariant1.setPairedEndReadSupport(11); + structuralVariant1.setSite1Chrom("17"); + structuralVariant1.setSite1Desc("Intron of ERBB2(+): 51bp after exon 26"); + structuralVariant1.setSite1Gene("ERBB2"); + structuralVariant1.setSite1Pos(37883851); + structuralVariant1.setGene1(geneERBB2); + structuralVariant1.setSite2Chrom("17"); + structuralVariant1.setSite2Desc("5-UTR of GRB7(+): 1Kb before coding start"); + structuralVariant1.setSite2Gene("GRB7"); + structuralVariant1.setSite2Pos(37897379); + structuralVariant1.setGene2(geneGRB7); + structuralVariant1.setSplitReadSupport(31); + structuralVariant1.setSvClassName("DELETION"); + structuralVariant1.setSvDesc("n/a"); + structuralVariant1.setSvLength(13528); + structuralVariant1.setTumorReadCount(4389); + structuralVariant1.setTumorVariantCount(6); + structuralVariant1.setVariantStatusName("NEW_VARIANT"); + structuralVariant1.setGeneticProfileId(7); + structuralVariant1.setGeneticProfile(geneticProfile1); + structuralVariantDataServiceFullResponseMock.add(structuralVariant1); + StructuralVariant structuralVariant2 = new StructuralVariant(); + structuralVariant2.setSampleId(2); + structuralVariant2.setSample(sample2); + structuralVariant2.setBreakpointType("PRECISE"); + structuralVariant2.setAnnotation("n/a"); + structuralVariant2.setComments("n/a"); + structuralVariant2.setConfidenceClass("AUTO_OK"); + structuralVariant2.setConnectionType("3to5"); + structuralVariant2.setEventInfo("Protein fusion: mid-exon (ERBB2-GRB7)"); + structuralVariant2.setMapq("0"); + structuralVariant2.setNormalReadCount(7062); + structuralVariant2.setNormalVariantCount(0); + structuralVariant2.setPairedEndReadSupport(97); + structuralVariant2.setSite1Chrom("17"); + structuralVariant2.setSite1Desc("Exon 25 of ERBB2(+)"); + structuralVariant2.setSite1Gene("ERBB2"); + structuralVariant2.setSite1Pos(37883138); + structuralVariant2.setGene1(geneERBB2); + structuralVariant2.setSite2Chrom("17"); + structuralVariant2.setSite2Desc("Intron of GRB7(+): 56bp before exon 10"); + structuralVariant2.setSite2Gene("GRB7"); + structuralVariant2.setSite2Pos(37901416); + structuralVariant2.setGene2(geneGRB7); + structuralVariant2.setSplitReadSupport(71); + structuralVariant2.setSvClassName("DELETION"); + structuralVariant2.setSvDesc("n/a"); + structuralVariant2.setSvLength(18278); + structuralVariant2.setTumorReadCount(9849); + structuralVariant2.setTumorVariantCount(60); + structuralVariant2.setVariantStatusName("NEW_VARIANT"); + structuralVariant2.setGeneticProfileId(7); + structuralVariant2.setGeneticProfile(geneticProfile1); + structuralVariantDataServiceFullResponseMock.add(structuralVariant2); + StructuralVariant structuralVariant3 = new StructuralVariant(); + structuralVariant3.setSampleId(3); + structuralVariant3.setSample(sample3); + structuralVariant3.setBreakpointType("PRECISE"); + structuralVariant3.setAnnotation("n/a"); + structuralVariant3.setComments("ERBB2 (NM_004448) rearrangement"); + structuralVariant3.setConfidenceClass("MANUAL_OK"); + structuralVariant3.setConnectionType("3to5"); + structuralVariant3.setEventInfo("Deletion of 1 exon: in frame"); + structuralVariant3.setMapq("0"); + structuralVariant3.setNormalReadCount(7212); + structuralVariant3.setNormalVariantCount(0); + structuralVariant3.setPairedEndReadSupport(31); + structuralVariant3.setSite1Chrom("17"); + structuralVariant3.setSite1Desc("Intron of ERBB2(+): 46bp after exon 15"); + structuralVariant3.setSite1Gene("ERBB2"); + structuralVariant3.setSite1Pos(37873779); + structuralVariant3.setGene1(geneERBB2); + structuralVariant3.setSite2Chrom("17"); + structuralVariant3.setSite2Desc("Intron of ERBB2(+): 501bp before exon 17"); + structuralVariant3.setSite2Gene("ERBB2"); + structuralVariant3.setSite2Pos(37879041); + structuralVariant3.setGene2(geneERBB2); + structuralVariant3.setSplitReadSupport(10); + structuralVariant3.setSvClassName("DELETION"); + structuralVariant3.setSvDesc("n/a"); + structuralVariant3.setSvLength(5262); + structuralVariant3.setTumorReadCount(3101); + structuralVariant3.setTumorVariantCount(7); + structuralVariant3.setVariantStatusName("NEW_VARIANT"); + structuralVariant3.setGeneticProfileId(7); + structuralVariant3.setGeneticProfile(geneticProfile1); + structuralVariantDataServiceFullResponseMock.add(structuralVariant3); + //mask out values not normally returned by service layer + cancerStudy1.setCancerStudyId(null); + cancerStudy1.setTypeOfCancerId(null); + cancerStudy1.setCancerStudyIdentifier(null); + cancerStudy1.setName(null); + cancerStudy1.setShortName(null); + cancerStudy1.setDescription(null); + cancerStudy1.setPublicStudy(null); + cancerStudy1.setPmid(null); + cancerStudy1.setCitation(null); + cancerStudy1.setGroups(null); + cancerStudy1.setStatus(null); + cancerStudy1.setImportDate(null); + patient1.setCancerStudyId(null); + patient1.setInternalId(null); + patient1.setStableId(null); + patient2.setCancerStudyId(null); + patient2.setInternalId(null); + patient2.setStableId(null); + patient3.setCancerStudyId(null); + patient3.setInternalId(null); + patient3.setStableId(null); + sample1.setInternalId(null); + sample1.setSampleType(null); + sample1.setPatientId(null); + sample2.setInternalId(null); + sample2.setSampleType(null); + sample2.setPatientId(null); + sample3.setInternalId(null); + sample3.setSampleType(null); + sample3.setPatientId(null); + geneticProfile1.setMolecularProfileId(null); + geneticProfile1.setStableId(null); + geneticProfile1.setMolecularAlterationType(null); + geneticProfile1.setDatatype(null); + geneticProfile1.setName(null); + geneticProfile1.setDescription(null); + geneticProfile1.setShowProfileInAnalysisTab(null); + geneERBB2.setType(null); + geneGRB7.setType(null); + + structuralVariant1.setGeneticProfileId(null); + structuralVariant2.setGeneticProfileId(null); + structuralVariant3.setGeneticProfileId(null); + return structuralVariantDataServiceFullResponseMock; + } + + private List getStructuralVariantDataServiceEmptyResponseMock() { + if(structuralVariantDataServiceEmptyResponseMock != null) { + return structuralVariantDataServiceEmptyResponseMock; + } + structuralVariantDataServiceEmptyResponseMock = new ArrayList<>(); + return structuralVariantDataServiceEmptyResponseMock; + } +} From b1b65c78668ebdfbb9d6921ccade66d170c04025 Mon Sep 17 00:00:00 2001 From: khzhu Date: Fri, 19 Apr 2019 17:13:31 -0400 Subject: [PATCH 4/9] resolving code review comments/suggestions --- core/pom.xml | 10 +- .../mskcc/cbio/portal/dao/DaoCancerStudy.java | 34 +-- .../cbio/portal/dao/DaoGeneOptimized.java | 31 +-- .../cbio/portal/dao/DaoReferenceGenome.java | 86 ++----- .../portal/dao/DaoReferenceGenomeGene.java | 124 ++-------- .../mskcc/cbio/portal/dao/JdbcDataSource.java | 3 +- .../org/mskcc/cbio/portal/dao/JdbcUtil.java | 24 +- .../cbio/portal/model/ReferenceGenome.java | 6 +- .../portal/model/ReferenceGenomeGene.java | 35 ++- .../cbio/portal/scripts/ImportCosmicData.java | 2 +- .../scripts/ImportExtendedMutationData.java | 24 +- .../cbio/portal/scripts/ImportFusionData.java | 2 +- .../cbio/portal/scripts/ImportGeneData.java | 226 +++++++++++++----- .../mskcc/cbio/portal/scripts/ImportHprd.java | 4 +- .../scripts/ImportPathwayCommonsExtSif.java | 4 +- .../scripts/ImportProteinArrayData.java | 2 +- .../portal/scripts/ImportReferenceGenome.java | 15 +- .../mskcc/cbio/portal/scripts/ImportSif.java | 4 +- .../portal/scripts/ImportTabDelimData.java | 2 +- .../portal/servlet/GetCoExpressionJSON.java | 2 +- .../cbio/portal/util/CancerStudyReader.java | 23 +- .../portal/util/EnrichmentsAnalysisUtil.java | 2 +- .../cbio/portal/util/GlobalProperties.java | 8 +- .../main/scripts/importer/cbio_importer.py | 185 -------------- core/src/main/scripts/importer/metaImport.py | 8 +- .../src/main/scripts/importer/validateData.py | 64 +++-- .../cbio/portal/dao/TestDaoCancerStudy.java | 2 + .../scripts/TestImportCaisesClinicalXML.java | 1 + .../scripts/TestImportClinicalData.java | 1 + .../TestImportCopyNumberSegmentData.java | 3 +- .../portal/scripts/TestImportGeneData.java | 6 +- .../scripts/TestImportReferenceGenome.java | 39 --- .../portal/web_api/TestGetTypesOfCancer.java | 1 + core/src/test/resources/reference_genomes.txt | 2 - core/src/test/resources/seed_mini.sql | 2 +- .../resources/adjust_col_size_to_utf8.sql | 16 -- db-scripts/src/main/resources/cgds.sql | 3 +- db-scripts/src/main/resources/migration.sql | 25 ++ docs/Using-the-dataset-validator.md | 30 +-- docs/Using-the-metaImport-script.md | 10 +- .../ReferenceGenomeGeneRepository.java | 31 --- .../ReferenceGenomeGeneMyBatisRepository.java | 1 - .../mybatis/ReferenceGenomeGeneMapper.xml | 1 - ...screteCopyNumberMyBatisRepositoryTest.java | 7 +- ...MolecularProfileMyBatisRepositoryTest.java | 8 +- .../mybatis/PatientMyBatisRepositoryTest.java | 8 +- ...erenceGenomeGeneMyBatisRepositoryTest.java | 38 --- .../SampleListMyBatisRepositoryTest.java | 12 +- .../mybatis/SampleMyBatisRepositoryTest.java | 12 +- .../mybatis/StudyMyBatisRepositoryTest.java | 16 +- .../src/test/resources/testSql.sql | 2 +- pom.xml | 6 +- .../service/impl/CoExpressionServiceImpl.java | 6 +- .../service/impl/GeneServiceImpl.java | 7 +- .../service/impl/MutationServiceImpl.java | 9 +- .../impl/ReferenceGenomeGeneServiceImpl.java | 1 - .../service/util/ChromosomeCalculator.java | 2 - .../impl/CoExpressionServiceImplTest.java | 4 +- .../DiscreteCopyNumberServiceImplTest.java | 7 +- .../service/impl/MutationServiceImplTest.java | 12 +- .../util/AlterationEnrichmentUtilTest.java | 4 +- .../web/ReferenceGenomeGeneController.java | 18 +- .../web/mixin/CancerStudyMixin.java | 2 + .../cbioportal/web/mixin/MutationMixin.java | 2 + .../web/mixin/ReferenceGenomeGeneMixin.java | 9 + .../web/util/ClinicalDataEnrichmentUtil.java | 2 +- .../ReferenceGenomeGeneControllerTest.java | 5 +- 67 files changed, 489 insertions(+), 814 deletions(-) delete mode 100644 core/src/main/scripts/importer/cbio_importer.py delete mode 100644 db-scripts/src/main/resources/adjust_col_size_to_utf8.sql create mode 100644 web/src/main/java/org/cbioportal/web/mixin/ReferenceGenomeGeneMixin.java diff --git a/core/pom.xml b/core/pom.xml index 905411c7f8f..fd4aeeea528 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -65,7 +65,7 @@ org.apache.commons commons-dbcp2 - 2.4.0 + 2.1.1 commons-fileupload @@ -293,7 +293,7 @@ 2.16 always - + true log4j.configuration @@ -353,7 +353,7 @@ mysql mysql-connector-java - 8.0.11 + 5.0.3 @@ -369,7 +369,7 @@ ${db.test.url} ${db.test.username} ${db.test.password} - SET default_storage_engine=INNODB + SET storage_engine=INNODB SET SESSION sql_mode = 'ANSI_QUOTES' ${project.build.testOutputDirectory}/cgds.sql @@ -377,7 +377,7 @@ UTF-8 - + characterEncoding=utf8, connectionCollation=utf8_general_ci ${skipTests} diff --git a/core/src/main/java/org/mskcc/cbio/portal/dao/DaoCancerStudy.java b/core/src/main/java/org/mskcc/cbio/portal/dao/DaoCancerStudy.java index eddc1ba0f05..0d559831846 100644 --- a/core/src/main/java/org/mskcc/cbio/portal/dao/DaoCancerStudy.java +++ b/core/src/main/java/org/mskcc/cbio/portal/dao/DaoCancerStudy.java @@ -95,7 +95,7 @@ private static synchronized void reCache() { CancerStudy cancerStudy = extractCancerStudy(rs); cacheCancerStudy(cancerStudy, new java.util.Date()); } - } catch (SQLException e) { + } catch (SQLException | DaoException e) { e.printStackTrace(); } finally { JdbcUtil.closeAll(DaoCancerStudy.class, con, pstmt, rs); @@ -324,7 +324,7 @@ public static void addCancerStudy(CancerStudy cancerStudy, boolean overwrite) th pstmt.setInt(12, referenceGenome.getReferenceGenomeId()); } catch (NullPointerException e) { - pstmt.setInt(12,1); //#TODO default reference genome to use + throw new DaoException("Unsupported reference genome"); } pstmt.executeUpdate(); rs = pstmt.getGeneratedKeys(); @@ -619,25 +619,25 @@ public static void purgeUnreferencedRecordsAfterDeletionOfStudy() throws DaoExce /** * Extracts Cancer Study JDBC Results. */ - private static CancerStudy extractCancerStudy(ResultSet rs) throws SQLException { - CancerStudy cancerStudy = new CancerStudy(rs.getString("NAME"), - rs.getString("DESCRIPTION"), - rs.getString("CANCER_STUDY_IDENTIFIER"), - rs.getString("TYPE_OF_CANCER_ID"), - rs.getBoolean("PUBLIC")); - cancerStudy.setPmid(rs.getString("PMID")); - cancerStudy.setCitation(rs.getString("CITATION")); - cancerStudy.setGroupsInUpperCase(rs.getString("GROUPS")); - cancerStudy.setShortName(rs.getString("SHORT_NAME")); - cancerStudy.setInternalId(rs.getInt("CANCER_STUDY_ID")); - cancerStudy.setImportDate(rs.getDate("IMPORT_DATE")); + private static CancerStudy extractCancerStudy(ResultSet rs) throws DaoException { try { + CancerStudy cancerStudy = new CancerStudy(rs.getString("NAME"), + rs.getString("DESCRIPTION"), + rs.getString("CANCER_STUDY_IDENTIFIER"), + rs.getString("TYPE_OF_CANCER_ID"), + rs.getBoolean("PUBLIC")); + cancerStudy.setPmid(rs.getString("PMID")); + cancerStudy.setCitation(rs.getString("CITATION")); + cancerStudy.setGroupsInUpperCase(rs.getString("GROUPS")); + cancerStudy.setShortName(rs.getString("SHORT_NAME")); + cancerStudy.setInternalId(rs.getInt("CANCER_STUDY_ID")); + cancerStudy.setImportDate(rs.getDate("IMPORT_DATE")); cancerStudy.setReferenceGenome(DaoReferenceGenome.getReferenceGenomeByInternalId( rs.getInt("REFERENCE_GENOME_ID")).getGenomeName()); - } catch (DaoException e) { - cancerStudy.setReferenceGenome(ReferenceGenome.HOMO_SAPIENS_DEFAULT_GENOME_NAME); + return cancerStudy; + } catch (SQLException e) { + throw new DaoException(e); } - return cancerStudy; } private static boolean studyNeedsRecaching(String stableId, Integer ... internalId) { diff --git a/core/src/main/java/org/mskcc/cbio/portal/dao/DaoGeneOptimized.java b/core/src/main/java/org/mskcc/cbio/portal/dao/DaoGeneOptimized.java index 20c829f542e..74ee0c3c756 100644 --- a/core/src/main/java/org/mskcc/cbio/portal/dao/DaoGeneOptimized.java +++ b/core/src/main/java/org/mskcc/cbio/portal/dao/DaoGeneOptimized.java @@ -285,7 +285,7 @@ public CanonicalGene getGene(long entrezId) { /** * Gets Gene By Entrez Gene ID. * - * @param entrezId Entrez Gene ID. + * @param geneticEntityId Gene Entity ID. * @return Canonical Gene Object. */ public CanonicalGene getGeneByEntityId(int geneticEntityId) { @@ -335,24 +335,8 @@ public List guessGene(String geneId, String chr) { List genes = geneAliasMap.get(geneId.toUpperCase()); if (genes!=null) { - if (chr==null) { - return Collections.unmodifiableList(genes); - } - - String nchr = normalizeChr(chr); - - List ret = new ArrayList(); - for (CanonicalGene cg : genes) { - //String gchr = getChrFromCytoband(cg.getCytoband()); - String gchr = null; - if (gchr==null // TODO: should we exlude this? - || gchr.equals(nchr)) { - ret.add(cg); - } - } - return ret; + return Collections.unmodifiableList(genes); } - return Collections.emptyList(); } @@ -408,7 +392,7 @@ private static String getChrFromCytoband(String cytoband) { * @return a gene that can be non-ambiguously determined, or null if cannot. */ public CanonicalGene getNonAmbiguousGene(String geneId) { - return getNonAmbiguousGene(geneId, null); + return getNonAmbiguousGene(geneId, true); } /** @@ -418,19 +402,18 @@ public CanonicalGene getNonAmbiguousGene(String geneId) { * @return a gene that can be non-ambiguously determined, or null if cannot. */ public CanonicalGene getNonAmbiguousGene(String geneId, String chr) { - return getNonAmbiguousGene(geneId, chr, true); + return getNonAmbiguousGene(geneId, true); } /** * Look for gene that can be non-ambiguously determined * @param geneId an Entrez Gene ID or HUGO symbol or gene alias - * @param chr chromosome - * @param issueWarning if true and gene is not ambiguous, + * @param issueWarning if true and gene is not ambiguous, * print all the Entrez Ids corresponding to the geneId provided * @return a gene that can be non-ambiguously determined, or null if cannot. */ - public CanonicalGene getNonAmbiguousGene(String geneId, String chr, boolean issueWarning) { - List genes = guessGene(geneId, chr); + public CanonicalGene getNonAmbiguousGene(String geneId, boolean issueWarning) { + List genes = guessGene(geneId); if (genes.isEmpty()) { return null; } diff --git a/core/src/main/java/org/mskcc/cbio/portal/dao/DaoReferenceGenome.java b/core/src/main/java/org/mskcc/cbio/portal/dao/DaoReferenceGenome.java index 6fa2864ac5f..1d9bb4990d5 100644 --- a/core/src/main/java/org/mskcc/cbio/portal/dao/DaoReferenceGenome.java +++ b/core/src/main/java/org/mskcc/cbio/portal/dao/DaoReferenceGenome.java @@ -74,11 +74,9 @@ private static synchronized void reCache() { JdbcUtil.closeAll(DaoReferenceGenome.class, con, pstmt, rs); } } - - + /** * Add a new reference genome to the Database. - * * @param referenceGenome Reference Genome. * @throws DaoException Database Error. */ @@ -120,15 +118,17 @@ public static void addReferenceGenome(ReferenceGenome referenceGenome, boolean o pstmt.setLong(4, referenceGenome.getGenomeSize()); pstmt.setString(5, referenceGenome.getUrl()); pstmt.setDate(6, new java.sql.Date(referenceGenome.getReleaseDate().getTime())); - - pstmt.executeUpdate(); - rs = pstmt.getGeneratedKeys(); - if (rs.next()) { - int autoId = rs.getInt(1); - referenceGenome.setReferenceGenomeId(autoId); + if (pstmt.executeUpdate() != 0 ) { + rs = pstmt.getGeneratedKeys(); + if (rs.next()) { + int autoId = rs.getInt(1); + referenceGenome.setReferenceGenomeId(autoId); + } + // update reference cache + addCache(referenceGenome); + } else { + throw new DaoException("attempt to add new referenceGenome record failed"); } - // update reference cache - addCache(referenceGenome); } catch (SQLException e) { throw new DaoException(e); } finally { @@ -142,7 +142,6 @@ public static void addReferenceGenome(ReferenceGenome referenceGenome, boolean o * @throws DaoException Database Error. */ public static void deleteAllRecords() throws DaoException { - Connection con = null; PreparedStatement pstmt = null; ResultSet rs = null; @@ -166,7 +165,7 @@ public static void deleteAllRecords() throws DaoException { * @param referenceGenome Reference Genome Object * @throws DaoException */ - public static int updateReferenceGenome(ReferenceGenome referenceGenome) throws DaoException { + public static void updateReferenceGenome(ReferenceGenome referenceGenome) throws DaoException { ReferenceGenome existing = getReferenceGenomeByInternalId(referenceGenome.getReferenceGenomeId()); if (existing==null) { @@ -175,7 +174,6 @@ public static int updateReferenceGenome(ReferenceGenome referenceGenome) throws Connection con = null; PreparedStatement pstmt = null; ResultSet rs = null; - int rows = 0; try { con = JdbcUtil.getDbConnection(DaoReferenceGenome.class); pstmt = con.prepareStatement("UPDATE reference_genome " + @@ -189,10 +187,10 @@ public static int updateReferenceGenome(ReferenceGenome referenceGenome) throws pstmt.setString(5, referenceGenome.getUrl()); pstmt.setDate(6, new java.sql.Date(referenceGenome.getReleaseDate().getTime())); pstmt.setInt(7, referenceGenome.getReferenceGenomeId()); - rows += pstmt.executeUpdate(); - // update reference cache - reCache(); - return rows; + if (pstmt.executeUpdate() != 0) { + // update reference cache + reCache(); + } } catch (SQLException e) { throw new DaoException(e); } finally { @@ -220,8 +218,7 @@ public static ReferenceGenome getReferenceGenomeByInternalId(int internalId) thr public static ReferenceGenome getReferenceGenomeByBuildName(String buildName) throws DaoException { return byGenomeBuild.get(buildName); } - - + /** * Retrieve reference genome by genome build name * @param genomeName Reference Genome build name @@ -229,25 +226,7 @@ public static ReferenceGenome getReferenceGenomeByBuildName(String buildName) th */ public static ReferenceGenome getReferenceGenomeByGenomeName(String genomeName) throws DaoException { - //return byGenomeName.get(genomeName); - Connection con = null; - PreparedStatement pstmt = null; - ResultSet rs = null; - ReferenceGenome referenceGenome = null; - try { - con = JdbcUtil.getDbConnection(DaoReferenceGenome.class); - pstmt = con.prepareStatement("SELECT * FROM reference_genome WHERE `name` = ?"); - pstmt.setString(1,genomeName); - rs = pstmt.executeQuery(); - if (rs.next()) { - referenceGenome = extractReferenceGenome(rs); - } - } catch (SQLException e) { - e.printStackTrace(); - } finally { - JdbcUtil.closeAll(DaoReferenceGenome.class, con, pstmt, rs); - return referenceGenome; - } + return byGenomeName.get(genomeName); } /** @@ -255,28 +234,12 @@ public static ReferenceGenome getReferenceGenomeByGenomeName(String genomeName) * @param name Name of Reference Genome or Genome Assembly * @throws DaoException Database Error. */ - @Deprecated public static int getReferenceGenomeIdByName(String name) throws DaoException { - return getReferenceGenomeIdByName(name, ReferenceGenome.HOMO_SAPIENS); - } - - /** - * Retrieve reference genome of interest by genome name or genome assembly name - * @param name Name of Reference Genome or Genome Assembly - * @param species genetic species - * @throws DaoException Database Error. - */ - public static int getReferenceGenomeIdByName(String name, String species) throws DaoException { - try { - return genomeInternalIds.get(name); - } catch (java.lang.NullPointerException exp) { - if (species.equals(ReferenceGenome.HOMO_SAPIENS)) { - return genomeInternalIds.get(ReferenceGenome.HOMO_SAPIENS_DEFAULT_GENOME_BUILD); - } else if (species.equals(ReferenceGenome.MUS_MUSCULUS)) { - return genomeInternalIds.get(ReferenceGenome.MUS_MUSCULUS_DEFAULT_GENOME_BUILD); // NCBI_BUILD field was an optional in the past - } else { - throw new DaoException("Species not supproted yet"); - } + Integer referenceGenomeId = genomeInternalIds.get(name); + if (referenceGenomeId == null) { + return -1; + } else { + return referenceGenomeId; } } @@ -286,8 +249,8 @@ public static int getReferenceGenomeIdByName(String name, String species) throws */ private static ReferenceGenome extractReferenceGenome(ResultSet rs) throws SQLException { ReferenceGenome referenceGenome = new ReferenceGenome( - rs.getString("SPECIES"), rs.getString("NAME"), + rs.getString("SPECIES"), rs.getString("BUILD_NAME")); referenceGenome.setReferenceGenomeId(rs.getInt("REFERENCE_GENOME_ID")); referenceGenome.setGenomeSize(rs.getLong("GENOME_SIZE")); @@ -295,5 +258,4 @@ private static ReferenceGenome extractReferenceGenome(ResultSet rs) throws SQLEx referenceGenome.setUrl(rs.getString("URL")); return referenceGenome; } - } \ No newline at end of file diff --git a/core/src/main/java/org/mskcc/cbio/portal/dao/DaoReferenceGenomeGene.java b/core/src/main/java/org/mskcc/cbio/portal/dao/DaoReferenceGenomeGene.java index 36261a6bda5..8926ac45711 100644 --- a/core/src/main/java/org/mskcc/cbio/portal/dao/DaoReferenceGenomeGene.java +++ b/core/src/main/java/org/mskcc/cbio/portal/dao/DaoReferenceGenomeGene.java @@ -32,75 +32,52 @@ */ public class DaoReferenceGenomeGene { private static DaoReferenceGenomeGene instance = null; - - - protected DaoReferenceGenomeGene() { - //Exists only for default instantiation - } - + public static DaoReferenceGenomeGene getInstance() { if (instance == null) { instance = new DaoReferenceGenomeGene(); } - return instance; + return instance; } - + /** - * Update Reference Genome Gene Record in the Database. + * Update Reference Genome Gene in the Database. * @param gene Reference Genome Gene */ - public int updateGene(ReferenceGenomeGene gene) throws DaoException { + public void updateGene(ReferenceGenomeGene gene) throws DaoException { Connection con = null; PreparedStatement pstmt = null; ResultSet rs = null; - boolean setBulkLoadAtEnd = false; try { MySQLbulkLoader.bulkLoadOff(); - int rows = 0; con = JdbcUtil.getDbConnection(DaoReferenceGenome.class); pstmt = con.prepareStatement - ("UPDATE reference_genome_gene SET `CHR`=?, `CYTOBAND`=?,`EXONIC_LENGTH`=?,`START`=?, `END`=? WHERE `ENTREZ_GENE_ID`=? AND `REFERENCE_GENOME_ID`=?"); - pstmt.setString(1, gene.getChr()); - pstmt.setString(2, gene.getCytoband()); - pstmt.setInt(3, gene.getExonicLength()); - pstmt.setLong(4, gene.getStart()); - pstmt.setLong(5, gene.getEnd()); - pstmt.setLong(6, gene.getEntrezGeneId()); - pstmt.setInt(7, gene.getReferenceGenomeId()); - - rows += pstmt.executeUpdate(); - if (rows != 1) { - ProgressMonitor.logWarning("No change for " + gene.getEntrezGeneId() + " " + gene.getReferenceGenomeId() + "? Code " + rows); - } - - return rows; + ("UPDATE reference_genome_gene SET `EXONIC_LENGTH`=?,`START`=?, `END`=? WHERE `ENTREZ_GENE_ID`=? AND `REFERENCE_GENOME_ID`=?"); + pstmt.setInt(1, gene.getExonicLength()); + pstmt.setLong(2, gene.getStart()); + pstmt.setLong(3, gene.getEnd()); + pstmt.setLong(4, gene.getEntrezGeneId()); + pstmt.setInt(5, gene.getReferenceGenomeId()); + pstmt.executeUpdate(); } catch (SQLException e) { throw new DaoException(e); } finally { MySQLbulkLoader.bulkLoadOn(); JdbcUtil.closeAll(DaoGene.class, con, pstmt, rs); } - } /** - * - * Adds a new reference genome gene Record to the Database or update the existing record. - * + * Adds a new reference genome gene to the Database or update the existing one. * @param gene Reference Genome Gene Object. - * @return number of records successfully added. * @throws DaoException Database Error. */ - public int addOrUpdateGene(ReferenceGenomeGene gene) throws DaoException { - + public void addOrUpdateGene(ReferenceGenomeGene gene) throws DaoException { Connection con = null; PreparedStatement pstmt = null; ResultSet rs = null; try { - int rows = 0; - ReferenceGenomeGene existingGene = null; - - existingGene = getGene(gene.getEntrezGeneId(), gene.getReferenceGenomeId()); + ReferenceGenomeGene existingGene = getGene(gene.getEntrezGeneId(), gene.getReferenceGenomeId()); if (existingGene == null) { //add gene, referring to this genetic entity @@ -115,12 +92,11 @@ public int addOrUpdateGene(ReferenceGenomeGene gene) throws DaoException { pstmt.setInt(5, gene.getExonicLength()); pstmt.setLong(6, gene.getStart()); pstmt.setLong(7, gene.getEnd()); - rows += pstmt.executeUpdate(); + pstmt.executeUpdate(); } else { - rows += updateGene(existingGene); + updateGene(existingGene); } - return rows; } catch (SQLException e) { throw new DaoException(e); } finally { @@ -130,11 +106,9 @@ public int addOrUpdateGene(ReferenceGenomeGene gene) throws DaoException { /** - * Gets the Gene with the Specified Entrez Gene ID. - * For faster access, consider using DaoGeneOptimized. - * + * Gets the Reference Genome Gene with the Specified Entrez Gene ID. * @param entrezGeneId ENTRZ GENE ID. - * @return Canonical Gene Object. + * @return Reference Genome Gene Object. * @throws DaoException Database Error. */ public ReferenceGenomeGene getGene(long entrezGeneId, int referenceGenomeId) throws DaoException { @@ -160,38 +134,6 @@ public ReferenceGenomeGene getGene(long entrezGeneId, int referenceGenomeId) thr } } - /** - * Gets the Gene with the Specified Entrez Gene ID. - * For faster access, consider using DaoGeneOptimized. - * - * @param hugoGeneSymbol Hugo Gene Symbol. - * @return Canonical Gene Object. - * @throws DaoException Database Error. - */ - public ReferenceGenomeGene getGene(String hugoGeneSymbol, int referenceGenomeId) throws DaoException { - Connection con = null; - PreparedStatement pstmt = null; - ResultSet rs = null; - try { - con = JdbcUtil.getDbConnection(DaoReferenceGenomeGene.class); - pstmt = con.prepareStatement - ("SELECT * FROM `reference_genome_gene` JOIN `gene` ON `reference_genome_gene`.entrez_gene_id=" + - "`gene`.entrez_gene_id WHERE `HUGO_GENE_SYMBOL` = ? AND `REFERENCE_GENOME_ID` = ?"); - pstmt.setString(1, hugoGeneSymbol); - pstmt.setInt(2, referenceGenomeId); - rs = pstmt.executeQuery(); - if (rs.next()) { - return extractGene(rs); - } else { - return null; - } - } catch (SQLException e) { - throw new DaoException(e); - } finally { - JdbcUtil.closeAll(DaoGene.class, con, pstmt, rs); - } - } - private ReferenceGenomeGene extractGene(ResultSet rs) throws SQLException, DaoException { int entrezGeneId = rs.getInt("ENTREZ_GENE_ID"); int reference_genome_id = rs.getInt("REFERENCE_GENOME_ID"); @@ -206,13 +148,11 @@ private ReferenceGenomeGene extractGene(ResultSet rs) throws SQLException, DaoEx } /** - * Deletes the Reference Genome Gene Record with Entrez Gene ID and Referece Genome ID in the Database. - * + * Deletes the Reference Genome Gene for a given Entrez Gene ID and Referece Genome ID. * @param entrezGeneId ENTREZ GENE ID * @param referenceGenomeId REFERENCE GENOME ID */ public void deleteGene(int entrezGeneId, int referenceGenomeId) throws DaoException { - Connection con = null; PreparedStatement pstmt = null; ResultSet rs = null; @@ -228,28 +168,4 @@ public void deleteGene(int entrezGeneId, int referenceGenomeId) throws DaoExcept JdbcUtil.closeAll(DaoGene.class, con, pstmt, rs); } } - - /** - * Deletes all Reference Genome Gene Records in the Database. - * @throws DaoException Database Error. - * - * @deprecated only used by deprecated code, so deprecating this as well. - */ - public void deleteAllRecords() throws DaoException { - Connection con = null; - PreparedStatement pstmt = null; - ResultSet rs = null; - try { - con = JdbcUtil.getDbConnection(DaoReferenceGenomeGene.class); - JdbcUtil.disableForeignKeyCheck(con); - pstmt = con.prepareStatement("TRUNCATE TABLE `reference_gnome_gene`"); - pstmt.executeUpdate(); - JdbcUtil.enableForeignKeyCheck(con); - } catch (SQLException e) { - throw new DaoException(e); - } finally { - JdbcUtil.closeAll(DaoGene.class, con, pstmt, rs); - } - } - } diff --git a/core/src/main/java/org/mskcc/cbio/portal/dao/JdbcDataSource.java b/core/src/main/java/org/mskcc/cbio/portal/dao/JdbcDataSource.java index 17f71a1b5ce..da80b97d205 100644 --- a/core/src/main/java/org/mskcc/cbio/portal/dao/JdbcDataSource.java +++ b/core/src/main/java/org/mskcc/cbio/portal/dao/JdbcDataSource.java @@ -13,6 +13,7 @@ public JdbcDataSource () { String host = dbProperties.getDbHost(); String userName = dbProperties.getDbUser(); String password = dbProperties.getDbPassword(); + String mysqlDriverClassName = dbProperties.getDbDriverClassName(); String database = dbProperties.getDbName(); String useSSL = (!StringUtils.isBlank(dbProperties.getDbUseSSL())) ? dbProperties.getDbUseSSL() : "false"; String enablePooling = (!StringUtils.isBlank(dbProperties.getDbEnablePooling())) ? dbProperties.getDbEnablePooling(): "false"; @@ -20,7 +21,7 @@ public JdbcDataSource () { "?user=" + userName + "&password=" + password + "&zeroDateTimeBehavior=convertToNull&useSSL=" + useSSL; // Set up poolable data source - this.setDriverClassName("com.mysql.jdbc.Driver"); + this.setDriverClassName(mysqlDriverClassName); this.setUsername(userName); this.setPassword(password); this.setUrl(url); diff --git a/core/src/main/java/org/mskcc/cbio/portal/dao/JdbcUtil.java b/core/src/main/java/org/mskcc/cbio/portal/dao/JdbcUtil.java index 67845e35ec8..817610ef258 100644 --- a/core/src/main/java/org/mskcc/cbio/portal/dao/JdbcUtil.java +++ b/core/src/main/java/org/mskcc/cbio/portal/dao/JdbcUtil.java @@ -56,7 +56,7 @@ public class JdbcUtil { */ public static DataSource getDataSource() { if (dataSource == null) { - dataSource = initDataSource(); + dataSource = new JdbcDataSource(); } return dataSource; } @@ -69,28 +69,6 @@ public static void setDataSource(DataSource value) { dataSource = value; } - private static DataSource initDataSource() { - DatabaseProperties dbProperties = DatabaseProperties.getInstance(); - String host = dbProperties.getDbHost(); - String userName = dbProperties.getDbUser(); - String password = dbProperties.getDbPassword(); - String database = dbProperties.getDbName(); - String url ="jdbc:mysql://" + host + "/" + database + - "?user=" + userName + "&password=" + password + - "&zeroDateTimeBehavior=convertToNull"; - // Set up poolable data source - BasicDataSource dataSource = new BasicDataSource(); - dataSource.setDriverClassName("com.mysql.jdbc.Driver"); - dataSource.setUsername(userName); - dataSource.setPassword(password); - dataSource.setUrl(url); - // By pooling/reusing PreparedStatements, we get a major performance gain - dataSource.setPoolPreparedStatements(true); - dataSource.setMaxTotal(100); - activeConnectionCount = new HashMap(); - return dataSource; - } - /** * Gets Connection to the Database. * diff --git a/core/src/main/java/org/mskcc/cbio/portal/model/ReferenceGenome.java b/core/src/main/java/org/mskcc/cbio/portal/model/ReferenceGenome.java index 6b9ecf0c0cb..0d58cd09bab 100644 --- a/core/src/main/java/org/mskcc/cbio/portal/model/ReferenceGenome.java +++ b/core/src/main/java/org/mskcc/cbio/portal/model/ReferenceGenome.java @@ -17,9 +17,9 @@ package org.mskcc.cbio.portal.model; - import java.math.BigInteger; - import java.util.Date; - import org.mskcc.cbio.portal.util.*; +import java.math.BigInteger; +import java.util.Date; +import org.mskcc.cbio.portal.util.*; /** * This represents the reference genome used by molecular profiling diff --git a/core/src/main/java/org/mskcc/cbio/portal/model/ReferenceGenomeGene.java b/core/src/main/java/org/mskcc/cbio/portal/model/ReferenceGenomeGene.java index 5370f0429c8..dc33874a570 100644 --- a/core/src/main/java/org/mskcc/cbio/portal/model/ReferenceGenomeGene.java +++ b/core/src/main/java/org/mskcc/cbio/portal/model/ReferenceGenomeGene.java @@ -14,10 +14,7 @@ * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ - package org.mskcc.cbio.portal.model; - - /** * Class to wrap Reference Genome Gene. * @author Kelsey Zhu @@ -63,15 +60,21 @@ public ReferenceGenomeGene(long entrezGeneId, int referenceGenomeId, String chr, } - public void setReferenceGenomeId(int referenceGenomeId) { this.referenceGenomeId = referenceGenomeId; } + public void setReferenceGenomeId(int referenceGenomeId) { + this.referenceGenomeId = referenceGenomeId; + } public int getReferenceGenomeId() { return referenceGenomeId; } - public void setEntrezGeneId(long entrezGeneId) { this.entrezGeneId = entrezGeneId; } + public void setEntrezGeneId(long entrezGeneId) { + this.entrezGeneId = entrezGeneId; + } - public long getEntrezGeneId() { return entrezGeneId; } + public long getEntrezGeneId() { + return entrezGeneId; + } public String getChr() { return chr; @@ -96,13 +99,21 @@ public void setExonicLength(int exonicLength) { this.exonicLength = exonicLength; } - public long getStart() { return this.start; } + public long getStart() { + return this.start; + } - public void setStart(long start) { this.start = start; } + public void setStart(long start) { + this.start = start; + } - public long getEnd() { return this.end = end; } + public long getEnd() { + return this.end = end; + } - public void setEnd(long end) { this.end = end; } + public void setEnd(long end) { + this.end = end; + } @Override public boolean equals(Object obj0) { @@ -116,8 +127,7 @@ public boolean equals(Object obj0) { } return false; } - - + @Override public int hashCode() { int result = 2; @@ -125,5 +135,4 @@ public int hashCode() { result = 31 * result + this.referenceGenomeId; return result; } - } \ No newline at end of file diff --git a/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportCosmicData.java b/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportCosmicData.java index 592790939e1..b3bd23f70ab 100644 --- a/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportCosmicData.java +++ b/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportCosmicData.java @@ -80,7 +80,7 @@ public void importData() throws IOException, DaoException { // if (gene.contains("_HUMAN")) { // gene = gene.substring(0,gene.indexOf("_HUMAN")); // } - CanonicalGene canonicalGene = daoGeneOptimized.getNonAmbiguousGene(gene, null); + CanonicalGene canonicalGene = daoGeneOptimized.getNonAmbiguousGene(gene, true); if (canonicalGene==null) { System.err.println("Gene symbol in COSMIC not recognized: "+gene); continue; diff --git a/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportExtendedMutationData.java b/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportExtendedMutationData.java index eb8eea32c56..dc1f63b5378 100644 --- a/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportExtendedMutationData.java +++ b/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportExtendedMutationData.java @@ -137,26 +137,28 @@ public void importData() throws IOException, DaoException { } GeneticProfile geneticProfile = DaoGeneticProfile.getGeneticProfileById(geneticProfileId); + + CancerStudy cancerStudy = DaoCancerStudy.getCancerStudyByInternalId(geneticProfile.getCancerStudyId()); + String genomeBuildName; + String referenceGenome = cancerStudy.getReferenceGenome(); + if (referenceGenome == null) { + genomeBuildName = GlobalProperties.getReferenceGenomeName(); + } else { + genomeBuildName = DaoReferenceGenome.getReferenceGenomeByGenomeName(referenceGenome).getBuildName(); + } + while((line=buf.readLine()) != null) { ProgressMonitor.incrementCurValue(); ConsoleUtil.showProgress(); - + if( !line.startsWith("#") && line.trim().length() > 0) { String[] parts = line.split("\t", -1 ); // the -1 keeps trailing empty strings; see JavaDoc for String MafRecord record = mafUtil.parseRecord(line); - CancerStudy cancerStudy = DaoCancerStudy.getCancerStudyByInternalId(geneticProfile.getCancerStudyId()); - String genomeBuildName; - try { - String referenceGenome = cancerStudy.getReferenceGenome(); - genomeBuildName = DaoReferenceGenome.getReferenceGenomeByGenomeName(referenceGenome).getBuildName(); - } catch (NullPointerException e) { - genomeBuildName = ReferenceGenome.HOMO_SAPIENS_DEFAULT_GENOME_BUILD; - } if (!record.getNcbiBuild().equalsIgnoreCase(genomeBuildName)) { - ProgressMonitor.setCurrentMessage("Genome Build Name does not match, expecting " + genomeBuildName); + ProgressMonitor.logWarning("Genome Build Name does not match, expecting " + genomeBuildName); } // process case id String barCode = record.getTumorSampleID(); @@ -310,7 +312,7 @@ public void importData() throws IOException, DaoException { if (gene == null && !(geneSymbol.equals("") || geneSymbol.equals("Unknown"))) { - gene = daoGene.getNonAmbiguousGene(geneSymbol, chr); + gene = daoGene.getNonAmbiguousGene(geneSymbol, true); } // assume symbol=Unknown and entrez=0 (or missing Entrez column) to imply an diff --git a/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportFusionData.java b/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportFusionData.java index 4c4f4bb8c0c..632ed011c2b 100644 --- a/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportFusionData.java +++ b/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportFusionData.java @@ -119,7 +119,7 @@ public void importData() throws IOException, DaoException { } if (gene == null) { // If Entrez Gene ID Fails, try Symbol. - gene = daoGene.getNonAmbiguousGene(geneSymbol, null); + gene = daoGene.getNonAmbiguousGene(geneSymbol, true); } if(gene == null) { ProgressMonitor.logWarning("Gene not found: " + geneSymbol + " [" diff --git a/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportGeneData.java b/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportGeneData.java index 9f2ce530b8a..609f15c09fe 100644 --- a/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportGeneData.java +++ b/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportGeneData.java @@ -58,9 +58,11 @@ public class ImportGeneData extends ConsoleRunnable { * @throws IOException * @throws DaoException */ - public static void importData(File geneFile) throws IOException, DaoException { + public static void importData(File geneFile, String genomeBuild) throws IOException, DaoException { Map> genesWithSymbolFromNomenClatureAuthority = new LinkedHashMap<>(); Map> genesWithoutSymbolFromNomenClatureAuthority = new LinkedHashMap<>(); + Map> refGenesWithSymbolFromNomenClatureAuthority = new LinkedHashMap<>(); + Map> refGenesWithoutSymbolFromNomenClatureAuthority = new LinkedHashMap<>(); try (FileReader reader = new FileReader(geneFile)) { BufferedReader buf = new BufferedReader(reader); String line; @@ -94,6 +96,8 @@ public static void importData(File geneFile) throws IOException, DaoException { String strAliases = parts[4]; String strXrefs = parts[5]; String cytoband = parts[7]; + String chr = cytoband.split("p|q")[0]; + int referenceGenomeId = DaoReferenceGenome.getReferenceGenomeByBuildName(genomeBuild).getReferenceGenomeId(); String desc = parts[8]; String type = parts[9]; String mainSymbol = parts[10]; // use 10 instead of 2 since column 2 may have duplication @@ -112,24 +116,41 @@ public static void importData(File geneFile) throws IOException, DaoException { } CanonicalGene gene = null; + ReferenceGenomeGene refGene = null; if (!mainSymbol.equals("-")) { //Try the main symbol: gene = new CanonicalGene(entrezGeneId, mainSymbol, aliases); + refGene = new ReferenceGenomeGene(entrezGeneId, referenceGenomeId, chr, cytoband, 0, 0, 0); Set genes = genesWithSymbolFromNomenClatureAuthority.get(mainSymbol); + Set refGenes = refGenesWithSymbolFromNomenClatureAuthority.get(mainSymbol); + if (genes==null) { genes = new HashSet(); genesWithSymbolFromNomenClatureAuthority.put(mainSymbol, genes); } + if (refGenes==null) { + refGenes = new HashSet(); + refGenesWithSymbolFromNomenClatureAuthority.put(mainSymbol, refGenes); + } genes.add(gene); + refGenes.add(refGene); } else if (!geneSymbol.equals("-")) { //there is no main symbol, so import using the temporary/unofficial(?) symbol: gene = new CanonicalGene(entrezGeneId, geneSymbol, aliases); + refGene = new ReferenceGenomeGene(entrezGeneId, referenceGenomeId, chr, + cytoband, 0, 0, 0); Set genes = genesWithoutSymbolFromNomenClatureAuthority.get(geneSymbol); if (genes==null) { genes = new HashSet(); genesWithoutSymbolFromNomenClatureAuthority.put(geneSymbol, genes); } genes.add(gene); + Set refGenes = refGenesWithoutSymbolFromNomenClatureAuthority.get(geneSymbol); + if (refGenes==null) { + refGenes = new HashSet(); + refGenesWithoutSymbolFromNomenClatureAuthority.put(geneSymbol, refGenes); + } + refGenes.add(refGene); } if (gene!=null) { @@ -137,6 +158,8 @@ public static void importData(File geneFile) throws IOException, DaoException { } } addGenesToDB(genesWithSymbolFromNomenClatureAuthority, genesWithoutSymbolFromNomenClatureAuthority); + addReferenceGenomeGenesToDB(refGenesWithSymbolFromNomenClatureAuthority, + refGenesWithoutSymbolFromNomenClatureAuthority); } } @@ -153,7 +176,7 @@ private static void addGenesToDB(Map> genesWithSymbol Map> genesWithoutSymbolFromNomenClatureAuthority) throws DaoException { DaoGeneOptimized daoGene = DaoGeneOptimized.getInstance(); - int nrExisting = 0; + int nrExisting = 0; // Add genes with symbol from nomenclature authority for (Map.Entry> entry : genesWithSymbolFromNomenClatureAuthority.entrySet()) { Set genes = entry.getValue(); @@ -216,6 +239,89 @@ private static void addGenesToDB(Map> genesWithSymbol ProgressMonitor.logWarning("Number of records skipped because the gene was already in the gene table (updates are not allowed - see specific warnings above): " + nrExisting); } } + /** + * Iterate over the reference genome genes found in the given maps and try to add them to the DB. + * + * @param refGenesWithSymbolFromNomenClatureAuthority: reference genome genes with official symbol + * @param refGenesWithoutSymbolFromNomenClatureAuthority: reference genome genes without official symbol (can happen, some entrez genes + * have no official symbol yet, but a temporary symbol). + * + * @throws DaoException + */ + private static void addReferenceGenomeGenesToDB(Map> refGenesWithSymbolFromNomenClatureAuthority, + Map> refGenesWithoutSymbolFromNomenClatureAuthority) throws DaoException { + + DaoReferenceGenomeGene daoReferenceGenomeGene = DaoReferenceGenomeGene.getInstance(); + + int nrExisting = 0; + // Add genes with symbol from nomenclature authority + for (Map.Entry> entry : refGenesWithSymbolFromNomenClatureAuthority.entrySet()) { + Set refGenes = entry.getValue(); + if (refGenes.size()==1) { + ReferenceGenomeGene refGene = refGenes.iterator().next(); + // first check if gene exists. If exists, give warning and skip record: + if (daoReferenceGenomeGene.getGene(refGene.getEntrezGeneId(), refGene.getReferenceGenomeId()) != null) { + ProgressMonitor.logWarning("Reference Genome Gene is already in table. Updates are not allowed. If you need to update the `gene` table, please remove all studies first and empty the `gene` table."); + nrExisting++; + continue; + } + try { + daoReferenceGenomeGene.addOrUpdateGene(refGene); + } catch (Exception e) { + ProgressMonitor.logWarning("chromosome too large" + refGene.getChr()); + continue; + } + ProgressMonitor.logWarning("New reference genome gene with official symbol added"); + } else { + //TODO - is unexpected for official symbols...raise Exception instead? + logDuplicateReferenceGenomeGeneSymbolWarning(entry.getKey(), refGenes, true); + } + } + + // Add genes without symbol from nomenclature authority + if (refGenesWithoutSymbolFromNomenClatureAuthority.keySet().size() > 0) { + int nrImported = 0; + int nrSkipped = 0; + for (Map.Entry> entry : refGenesWithoutSymbolFromNomenClatureAuthority.entrySet()) { + Set refGenes = entry.getValue(); + String symbol = entry.getKey(); + if (refGenes.size()==1) { + ReferenceGenomeGene refGene = refGenes.iterator().next(); + //only add the gene if it does not conflict with an official symbol: + if (!refGenesWithSymbolFromNomenClatureAuthority.containsKey(symbol)) { + //first check if gene exists. If exists, give warning and skip record since we don't allow updates in this process: + if (daoReferenceGenomeGene.getGene(refGene.getEntrezGeneId(), refGene.getReferenceGenomeId()) != null) { + ProgressMonitor.logWarning("Gene is already in table. Updates are not allowed. If you need to update the `gene` table, please remove all studies first and empty the `gene` table."); + nrSkipped++; + nrExisting++; + continue; + } + daoReferenceGenomeGene.addOrUpdateGene(refGene); + ProgressMonitor.logWarning("New gene with *no* official symbol added"); + nrImported++; + } else { + // ignore entries with a symbol that have the same value as stardard one + ProgressMonitor.logWarning("Ignored line with entrez gene id "+refGene.getEntrezGeneId() + " because its 'unnoficial' symbol " + + symbol+" is already an 'official symbol' of another gene"); + nrSkipped++; + continue; + } + } else { + logDuplicateReferenceGenomeGeneSymbolWarning(entry.getKey(), refGenes, true); + nrSkipped++; + continue; + } + } + ProgressMonitor.logWarning("There were " +refGenesWithoutSymbolFromNomenClatureAuthority.keySet().size() + + " genes names in this file without an official symbol from nomenclature authority. Imported: " + nrImported + + ". Gene names skipped (because of duplicate symbol entry or because symbol is an 'official symbol' of another gene): " + nrSkipped); + } + + if (nrExisting > 0) { + ProgressMonitor.logWarning("Number of records skipped because the gene was already in the gene table (updates are not allowed - see specific warnings above): " + nrExisting); + } + } + private static void logDuplicateGeneSymbolWarning(String symbol, Set genes, boolean isOfficialSymbol) { StringBuilder sb = new StringBuilder(); @@ -233,6 +339,23 @@ private static void logDuplicateGeneSymbolWarning(String symbol, Set refGenes, boolean isOfficialSymbol) { + StringBuilder sb = new StringBuilder(); + if (isOfficialSymbol) { + sb.append("More than 1 gene has the same (official) symbol "); + } else { + sb.append("More than 1 gene has the same (unofficial) symbol "); + } + sb.append(symbol) + .append(":"); + for (ReferenceGenomeGene refGene : refGenes) { + sb.append(" ") + .append(refGene.getEntrezGeneId()) + .append(". Ignore..."); + } + ProgressMonitor.logWarning(sb.toString()); + } private static String getCytoband(String cytoband, String species) { if (!cytoband.equals("-")) { @@ -279,7 +402,7 @@ public static void importGeneLength(File geneFile, String genomeBuild, String sp //Set the variables needed for the method FileReader reader = new FileReader(geneFile); BufferedReader buf = new BufferedReader(reader); - int referenceGenomeId = DaoReferenceGenome.getReferenceGenomeIdByName(genomeBuild, species); + int referenceGenomeId = DaoReferenceGenome.getReferenceGenomeByBuildName(genomeBuild).getReferenceGenomeId(); String line; ProgressMonitor.setCurrentMessage("\nUpdating gene lengths... \n"); //Display a message in the console boolean geneUpdated = false; @@ -292,7 +415,6 @@ public static void importGeneLength(File geneFile, String genomeBuild, String sp String currentChrom = ""; Long currentStart; Long currentStop; - String cytoband = ""; String parts[] = null; List loci = new ArrayList(); @@ -305,7 +427,6 @@ public static void importGeneLength(File geneFile, String genomeBuild, String sp } parts = line.split("\t"); currentChrom = parts[0]; - cytoband = getCytoband(parts[7], species); currentStart = Long.parseLong(parts[3]); currentStop = Long.parseLong(parts[4]) + 1; // We have to add 1 here, because the last base is also included. @@ -341,14 +462,14 @@ else if (i.contains("gene_name")) { } /// If there is a switch else { - geneUpdated = updateLength(previousSymbol, previousChrom, loci, - referenceGenomeId, hasGenes, cytoband); + geneUpdated = updateLength(previousSymbol, previousChrom, loci, + referenceGenomeId); if (geneUpdated) { - nrGenesUpdated++; + nrGenesUpdated++; } /// At the end of writing a new gene, clear the loci and save the new ensemblID. loci.clear(); - + previousEnsembl = currentEnsembl; previousSymbol = currentSymbol; previousChrom = currentChrom; @@ -357,14 +478,12 @@ else if (i.contains("gene_name")) { } } } - /// Write the last gene /// First check if the gene exists in the database - geneUpdated = updateLength(previousSymbol, previousChrom, loci, referenceGenomeId, hasGenes, cytoband); + geneUpdated = updateLength(previousSymbol, previousChrom, loci, referenceGenomeId); if (geneUpdated) { - nrGenesUpdated++; + nrGenesUpdated++; } - ProgressMonitor.setCurrentMessage("Updated length info for " + nrGenesUpdated + " genes\n"); buf.close(); @@ -382,53 +501,36 @@ else if (i.contains("gene_name")) { * @throws IOException * @throws DaoException */ - public static boolean updateLength(String symbol, String chromosome, List loci, int refreneceGenomeId, - boolean hasGenes, String cytoband) throws IOException, DaoException { + public static boolean updateLength(String symbol, String chromosome, List loci, int refreneceGenomeId) throws IOException, DaoException { DaoGeneOptimized daoGeneOptimized = DaoGeneOptimized.getInstance(); + CanonicalGene gene = daoGeneOptimized.getNonAmbiguousGene(symbol); + System.out.println(" --> update reference genome gene: " + gene.getHugoGeneSymbolAllCaps()); + DaoReferenceGenomeGene daoReferenceGenomeGene = DaoReferenceGenomeGene.getInstance(); boolean lengthUpdated = false; /// Check if the gene is in the database - CanonicalGene gene = daoGeneOptimized.getNonAmbiguousGene(symbol, chromosome, false); //Identify unambiguously the gene (with the symbol and the chromosome) - DaoReferenceGenomeGene daoReferenceGenomeGene = DaoReferenceGenomeGene.getInstance(); - ReferenceGenomeGene refGene = DaoReferenceGenomeGene.getInstance().getGene(gene.getEntrezGeneId(), refreneceGenomeId); + ReferenceGenomeGene refGene = daoReferenceGenomeGene.getGene(gene.getEntrezGeneId(), refreneceGenomeId); /// If it's not in the database, don't add it - if (!(gene==null)) { + if (refGene != null) { /// Calc length long[] exonic = calculateGeneLength(loci); - /// If there is no cytoband in the database, just write it (can also be an overwrite) - if (cytoband == null) { - if (hasGenes) { - daoGeneOptimized.updateGene(gene); - lengthUpdated = true; - } - } - /// If there is a cytoband in database, check if cytoband-chr matches input-chr + String cbChr = "chr"+refGene.getCytoband(); + System.out.println(" updateLength--> cytoband of the gene: " + cbChr); + if (cbChr.equals(chromosome)) { //Update the length only if the chromosome matches + // update reference genome gene + System.out.println(" updateLength--> update reference genome gene: " + gene.getHugoGeneSymbolAllCaps()); + refGene.setExonicLength((int) exonic[2]); + refGene.setStart(exonic[0]); + refGene.setEnd(exonic[1]); + daoReferenceGenomeGene.updateGene(refGene); + System.out.println(" updateLength--> exonic length of the gene: " + + daoReferenceGenomeGene.getGene(gene.getEntrezGeneId(), refreneceGenomeId).getExonicLength()); + lengthUpdated = true; + } else { - String cbChr = "chr"+cytoband.split("p|q")[0]; - if (cbChr.equals(chromosome)) { //Update the length only if the chromosome matches - if (hasGenes) { - //gene.setLength((int) exonic[2]); - daoGeneOptimized.updateGene(gene); - } - - // update reference genome gene - if (refGene == null) { - refGene = new ReferenceGenomeGene(gene.getGeneticEntityId(), refreneceGenomeId); - } - refGene.setEntrezGeneId(gene.getEntrezGeneId()); - refGene.setChr(chromosome.replace("chr", "")); - refGene.setCytoband(cytoband); - refGene.setExonicLength((int) exonic[2]); - refGene.setStart(exonic[0]); - refGene.setEnd(exonic[1]); - daoReferenceGenomeGene.addOrUpdateGene(refGene); - lengthUpdated = true; - } - else { - ProgressMonitor.logWarning("Cytoband does not match, gene not saved (likely another version of gene in gtf has correct chr and is saved)"); - } - } + ProgressMonitor.logWarning("Cytoband does not match, gene not saved (likely another version of gene in gtf has correct chr and is saved)"); + } } return lengthUpdated; } @@ -525,17 +627,17 @@ public void run() { File geneFile; int numLines; - if(options.has("genes")) { - geneFile = new File((String) options.valueOf("genes")); - - System.out.println("Reading gene data from: " + geneFile.getAbsolutePath()); - numLines = FileUtil.getNumLines(geneFile); - System.out.println(" --> total number of lines: " + numLines); - ProgressMonitor.setMaxValue(numLines); - MySQLbulkLoader.bulkLoadOn(); - ImportGeneData.importData(geneFile); - MySQLbulkLoader.flushAll(); //Gene and gene_alias should be updated before calculating gene length (gtf)! - } + if(options.has("genes")) { + geneFile = new File((String) options.valueOf("genes")); + + System.out.println("Reading gene data from: " + geneFile.getAbsolutePath()); + numLines = FileUtil.getNumLines(geneFile); + System.out.println(" --> total number of lines: " + numLines); + ProgressMonitor.setMaxValue(numLines); + MySQLbulkLoader.bulkLoadOn(); + ImportGeneData.importData(geneFile, (String)options.valueOf("genome-build")); + MySQLbulkLoader.flushAll(); //Gene and gene_alias should be updated before calculating gene length (gtf)! + } if(options.has("supp-genes")) { File suppGeneFile = new File((String) options.valueOf("genes")); diff --git a/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportHprd.java b/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportHprd.java index 980a7b6dab1..e3c968dbbb5 100644 --- a/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportHprd.java +++ b/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportHprd.java @@ -83,8 +83,8 @@ public void importData() throws IOException, DaoException { String expTypes = parts[6]; String pmids = parts[7]; - CanonicalGene geneA = daoGene.getNonAmbiguousGene(geneAId, null); - CanonicalGene geneB = daoGene.getNonAmbiguousGene(geneBId, null); + CanonicalGene geneA = daoGene.getNonAmbiguousGene(geneAId, true); + CanonicalGene geneB = daoGene.getNonAmbiguousGene(geneBId, true); // Log genes that we cannot identify. if (geneA == null) { diff --git a/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportPathwayCommonsExtSif.java b/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportPathwayCommonsExtSif.java index 8836d720f16..ee21a957419 100644 --- a/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportPathwayCommonsExtSif.java +++ b/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportPathwayCommonsExtSif.java @@ -82,10 +82,10 @@ public void importData() throws IOException, DaoException { String geneAId = parts[0]; - CanonicalGene geneA = daoGene.getNonAmbiguousGene(geneAId, null); + CanonicalGene geneA = daoGene.getNonAmbiguousGene(geneAId); if (geneA != null) { String geneBId = parts[2]; - CanonicalGene geneB = daoGene.getNonAmbiguousGene(geneBId, null); + CanonicalGene geneB = daoGene.getNonAmbiguousGene(geneBId, true); if (geneB != null) { String interactionType = parts[1]; diff --git a/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportProteinArrayData.java b/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportProteinArrayData.java index 4e2942beaf1..b8d3c637757 100644 --- a/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportProteinArrayData.java +++ b/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportProteinArrayData.java @@ -162,7 +162,7 @@ private String importArrayInfo(String info) throws DaoException { StringUtils.join(genes, "/"), residue, null); daoPAI.addProteinArrayInfo(pai); for (String symbol : genes) { - CanonicalGene gene = daoGene.getNonAmbiguousGene(symbol, null); + CanonicalGene gene = daoGene.getNonAmbiguousGene(symbol, true); if (gene==null) { System.err.println(symbol+" not exist"); continue; diff --git a/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportReferenceGenome.java b/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportReferenceGenome.java index 417c3b22e6b..1230608a166 100644 --- a/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportReferenceGenome.java +++ b/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportReferenceGenome.java @@ -39,7 +39,6 @@ public class ImportReferenceGenome extends ConsoleRunnable { /** * Adds the genes parsed from the file into the Database. - * * @param referenceGenomeFile File with reference genome information * @throws IOException * @throws DaoException @@ -82,24 +81,21 @@ public static void importData(File referenceGenomeFile) throws IOException, DaoE } addReferenceGenomesToDB(referenceGenomes); } - } /** * Iterate over the genes found in the given maps and try to add them to the DB. - * * @param referenceGenomes: reference genomes * @throws DaoException */ private static void addReferenceGenomesToDB(Set referenceGenomes) throws DaoException { - - int nrExisting = 0; for (ReferenceGenome refGenome: referenceGenomes) { if (DaoReferenceGenome.getReferenceGenomeByInternalId(refGenome.getReferenceGenomeId()) != null) { ProgressMonitor.logWarning("Reference genome updated"); - int rows = DaoReferenceGenome.updateReferenceGenome(refGenome); - if (rows != 1) { + try { + DaoReferenceGenome.updateReferenceGenome(refGenome); + } catch (DaoException e) { ProgressMonitor.logWarning("No change for " + refGenome.getGenomeName()); } } else { @@ -108,8 +104,7 @@ private static void addReferenceGenomesToDB(Set referenceGenome } } } - - + @Override public void run() { try { @@ -164,7 +159,6 @@ public void run() { /** * Makes an instance to run with the given command line arguments. - * * @param args the command line arguments to be used */ public ImportReferenceGenome(String[] args) { @@ -173,7 +167,6 @@ public ImportReferenceGenome(String[] args) { /** * Runs the command as a script and exits with an appropriate exit code. - * * @param args the arguments given on the command line */ public static void main(String[] args) { diff --git a/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportSif.java b/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportSif.java index e8f30f4b779..c13e958cd59 100644 --- a/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportSif.java +++ b/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportSif.java @@ -86,8 +86,8 @@ public void importData() throws IOException, DaoException { String interactionType = parts[1]; String geneBId = parts[2]; - CanonicalGene geneA = daoGeneOptimized.getNonAmbiguousGene(geneAId, null); - CanonicalGene geneB = daoGeneOptimized.getNonAmbiguousGene(geneBId, null); + CanonicalGene geneA = daoGeneOptimized.getNonAmbiguousGene(geneAId, true); + CanonicalGene geneB = daoGeneOptimized.getNonAmbiguousGene(geneBId, true); // Log genes that we cannot identify. if (geneA == null) { diff --git a/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportTabDelimData.java b/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportTabDelimData.java index 79229f451bd..b7a62aac296 100644 --- a/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportTabDelimData.java +++ b/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportTabDelimData.java @@ -779,7 +779,7 @@ private List parseRPPAGenes(String antibodyWithGene) throws DaoEx ProgressMonitor.logWarning("Gene " + symbol + " will be interpreted as 'Not Available' in this case. Record will be skipped for this gene."); } else { - CanonicalGene gene = daoGene.getNonAmbiguousGene(symbol, null); + CanonicalGene gene = daoGene.getNonAmbiguousGene(symbol, true); if (gene!=null) { genes.add(gene); } diff --git a/core/src/main/java/org/mskcc/cbio/portal/servlet/GetCoExpressionJSON.java b/core/src/main/java/org/mskcc/cbio/portal/servlet/GetCoExpressionJSON.java index f33402a9b07..dec63a4f6d0 100644 --- a/core/src/main/java/org/mskcc/cbio/portal/servlet/GetCoExpressionJSON.java +++ b/core/src/main/java/org/mskcc/cbio/portal/servlet/GetCoExpressionJSON.java @@ -241,7 +241,7 @@ protected void doPost(HttpServletRequest httpServletRequest, double spearman = spearmansCorrelation.correlation(new_query_gene_exp, new_compared_gene_exp); CanonicalGene comparedGene = daoGeneOptimized.getGene(compared_gene_id); fullResutlStr.append( - comparedGene.getHugoGeneSymbolAllCaps() + "\t" + "\t" + + comparedGene.getHugoGeneSymbolAllCaps() + "\t" + (double) Math.round(pearson * 100) / 100 + "\t" + (double) Math.round(spearman * 100) / 100 + "\n" ); diff --git a/core/src/main/java/org/mskcc/cbio/portal/util/CancerStudyReader.java b/core/src/main/java/org/mskcc/cbio/portal/util/CancerStudyReader.java index d48de86deff..5497ee25efb 100644 --- a/core/src/main/java/org/mskcc/cbio/portal/util/CancerStudyReader.java +++ b/core/src/main/java/org/mskcc/cbio/portal/util/CancerStudyReader.java @@ -34,6 +34,7 @@ import org.mskcc.cbio.portal.dao.DaoCancerStudy; import org.mskcc.cbio.portal.dao.DaoException; +import org.mskcc.cbio.portal.dao.DaoReferenceGenome; import org.mskcc.cbio.portal.model.CancerStudy; import org.mskcc.cbio.portal.model.ReferenceGenome; import org.mskcc.cbio.portal.scripts.TrimmedProperties; @@ -72,6 +73,20 @@ public static CancerStudy loadCancerStudy(File file, boolean strict, boolean add return cancerStudy; } + private static Boolean checkSpecies(String studyId, String genomeName) { + if (genomeName == null || genomeName == "") { + return true; + } + try { + CancerStudy oldCancerStudy = DaoCancerStudy.getCancerStudyByStableId(studyId); + ReferenceGenome referenceGenome = DaoReferenceGenome.getReferenceGenomeByGenomeName( + oldCancerStudy.getReferenceGenome()); + return referenceGenome.getGenomeName().equalsIgnoreCase(genomeName); + } catch (DaoException | NullPointerException e) { + return true; + } + } + private static CancerStudy getCancerStudy(TrimmedProperties properties) { String cancerStudyIdentifier = properties.getProperty("cancer_study_identifier"); @@ -99,6 +114,7 @@ private static CancerStudy getCancerStudy(TrimmedProperties properties) throw new IllegalArgumentException("short_name is not specified."); } + CancerStudy cancerStudy = new CancerStudy(name, description, cancerStudyIdentifier, typeOfCancer, publicStudy(properties)); cancerStudy.setPmid(properties.getProperty("pmid")); @@ -106,11 +122,14 @@ private static CancerStudy getCancerStudy(TrimmedProperties properties) cancerStudy.setGroupsInUpperCase(properties.getProperty("groups")); cancerStudy.setShortName(shortName); String referenceGenome = properties.getProperty("reference_genome"); + if (referenceGenome == null) { - referenceGenome = ReferenceGenome.HOMO_SAPIENS_DEFAULT_GENOME_NAME; + referenceGenome = GlobalProperties.getReferenceGenomeName(); + } + if (!checkSpecies(cancerStudyIdentifier, referenceGenome)) { + throw new IllegalArgumentException("Species not match with old study"); } cancerStudy.setReferenceGenome(referenceGenome); - return cancerStudy; } diff --git a/core/src/main/java/org/mskcc/cbio/portal/util/EnrichmentsAnalysisUtil.java b/core/src/main/java/org/mskcc/cbio/portal/util/EnrichmentsAnalysisUtil.java index 418e9a22ba0..cf81e820f5f 100644 --- a/core/src/main/java/org/mskcc/cbio/portal/util/EnrichmentsAnalysisUtil.java +++ b/core/src/main/java/org/mskcc/cbio/portal/util/EnrichmentsAnalysisUtil.java @@ -660,7 +660,7 @@ private String getCytoband(int geneticEntityId, String geneticProfileStableId) { try { genomeName = DaoCancerStudy.getCancerStudyByInternalId(cancerStudyId).getReferenceGenome(); } catch (NullPointerException ne) { - genomeName = ReferenceGenome.HOMO_SAPIENS_DEFAULT_GENOME_NAME; + genomeName = GlobalProperties.getReferenceGenomeName(); } int genomeId = DaoReferenceGenome.getReferenceGenomeByGenomeName(genomeName).getReferenceGenomeId(); return DaoReferenceGenomeGene.getInstance().getGene(geneticEntityId, genomeId).getCytoband(); diff --git a/core/src/main/java/org/mskcc/cbio/portal/util/GlobalProperties.java b/core/src/main/java/org/mskcc/cbio/portal/util/GlobalProperties.java index f7b459d4a8f..cfe5730212e 100644 --- a/core/src/main/java/org/mskcc/cbio/portal/util/GlobalProperties.java +++ b/core/src/main/java/org/mskcc/cbio/portal/util/GlobalProperties.java @@ -1161,7 +1161,7 @@ public static String getQuerySetsOfGenes() { String fileName = portalProperties.getProperty(SETSOFGENES_LOCATION, null); return readFile(fileName); } - + public static String getMskWholeSlideViewerToken() { // this token is for the msk portal @@ -1185,4 +1185,8 @@ public static Boolean getRevokeOtherTokens() { public static String getDatMethod() { return datMethod; } -} + + public static String getReferenceGenomeName() { + return portalProperties.getProperty(UCSC_BUILD, DEFAULT_UCSC_BUILD); + } +} \ No newline at end of file diff --git a/core/src/main/scripts/importer/cbio_importer.py b/core/src/main/scripts/importer/cbio_importer.py deleted file mode 100644 index 1d805b52400..00000000000 --- a/core/src/main/scripts/importer/cbio_importer.py +++ /dev/null @@ -1,185 +0,0 @@ -import sys -import logging -import argparse -from sqlalchemy import create_engine -from sqlalchemy import exc -import smtplib -from email.mime.multipart import MIMEMultipart -from email.mime.text import MIMEText -import cbioportalImporter -import validateData - -def send_mail(physician, patient, sample): - me = "cbioportal@uhnresearch.ca" - physician = "kzhu@uhnresearch.ca" - - # Create message container - the correct MIME type is multipart/alternative. - msg = MIMEMultipart('alternative') - msg['Subject'] = "Link" - msg['From'] = me - msg['To'] = physician - - #http://localhost:8081/cbioportal/case.do#/patient?studyId=OCTANE&sampleId=OCT-01-0001_Tumour - # Create the body of the message (a plain-text and an HTML version). - text = "Dear Dr. %s!\nnew sample is available for your patient %s\n"\ - "Here is the link to view new sample:\n"\ - "http://localhost:8081/cbioportal/case.do#/patient?studyId=OCTANE&sampleId=%s"%(physician, patient, sample) - html = """\ - - New Sample Availabe for your Review - -

Dear Dr. %s
- new sample is available for your patient %s
- Here is the link - to view new sample. -

- - - """%(physician, patient, sample) - - # Record the MIME types of both parts - text/plain and text/html. - part1 = MIMEText(text, 'plain') - part2 = MIMEText(html, 'html') - - # Attach parts into message container. - # According to RFC 2046, the last part of a multipart message, in this case - # the HTML message, is best and preferred. - msg.attach(part1) - msg.attach(part2) - - # Send the message via local SMTP server. - s = smtplib.SMTP('smtp.uhnresearch.ca') - # sendmail function takes 3 arguments: sender's address, recipient's address - # and message to send - here it is sent as one string. - s.sendmail(me, physician, msg.as_string()) - s.quit() - -def get_options(): - parser = argparse.ArgumentParser(description='cBioPortal Importer') - parser.add_argument('-u', '--url_server', - type=str, - default='http://localhost/cbioportal', - help='URL to cBioPortal server. You can ' - 'set this if your URL is not ' - 'http://localhost/cbioportal') - parser.add_argument('-html', '--html_table', type=str, required=False, - help='path to html report output file') - parser.add_argument('-s', '--study_directory', type=str, required=False, - help='path to directory.') - parser.add_argument('-r', '--relaxed_clinical_definitions', required=False, - action='store_true', default=False, - help='Option to enable relaxed mode for validator when ' - 'validating clinical data without header definitions') - parser.add_argument('-m', '--strict_maf_checks', required=False, - action='store_true', default=False, - help='Option to enable strict mode for validator when ' - 'validating mutation data') - parser.add_argument('-n', '--no_portal_checks', default=False, - action='store_true', - help='Skip tests requiring information ' - 'from the cBioPortal installation') - parser.add_argument('-P', '--portal_properties', type=str, - help='portal.properties file path (default: assumed hg19)', - required=False) - parser.add_argument('-jar', '--jar_path', type=str, required=False, - help='Path to scripts JAR file (default: $PORTAL_HOME/scripts/target/scripts-*.jar)') - parser.add_argument('-c', '--cancer_study', type=str, required=True, - help='Cancer study identifier') - parser.add_argument('-o', '--override_warning', action='store_true', - help='override warnings and continue importing') - parser.add_argument('-v', '--verbose', required=False, action='store_true', - help='report status info messages in addition ') - parser = parser.parse_args() - return parser - -def get_sample_info(connection): - try: - sql_str = """ - select p.stable_id as patient_id, s.STABLE_ID as sample_id, cp.ATTR_VALUE as physician - from patient p - join sample s on s.PATIENT_ID = p.INTERNAL_ID - join clinical_patient cp on cp.INTERNAL_ID = p.INTERNAL_ID - where p. CANCER_STUDY_ID = %s - and cp.ATTR_ID = 'TREATING_PHYSICIAN' - """%(cancer_study_id) - return connection.execute(sql_str) - except: - raise - -def get_study_id(cancer_study_identifier): - try: - sql_str = """ - select cancer_study_id from cancer_study where CANCER_STUDY_IDENTIFIER = '%s' - """%cancer_study_identifier - result = connection.execute(sql_str) - for row in result: - return row['cancer_study_id'] - except: - raise - -def get_db_connection(): - try: - # mysql-python - engine = create_engine('mysql+mysqldb://cbio_user:cbi0pass@localhost/cgds') - return engine.connect() - except exc.SQLAlchemyError: - raise - -def get_logger(): - logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') - return logging.getLogger('cbio_importer') - -def get_sample_dic(sample_list): - res_dic = {} - for row in sample_list: - patient_id = row['patient_id'] - sample_id = row['sample_id'] - if patient_id not in res_dic.keys(): - res_dic[patient_id] = [sample_id] - res_dic['physician'] = row['physician'] - else: - res_dic[patient_id].append(sample_id) - return res_dic - -if __name__ == '__main__': - # Parse user input - args = get_options() - - - connection = get_db_connection() - logger = get_logger() - - logger.info(args) - - cancer_study_id = get_study_id(args.cancer_study) - logger.info("Cancer Study ID: %s"%cancer_study_id) - - try: - logger.info("check exisiting samples...") - sample_list_old = get_sample_info(connection) - - old_samples = get_sample_dic(sample_list_old) - logger.info(old_samples) - - # Import study - # exit_code = validateData.main_validate(args) - # if not exit_code in [1,2]: - # cbioportalImporter.main(args) - # else: - # logger.error('Validation of study {status}.'.format( - # status={0: 'succeeded', - # 1: 'failed', - # 2: 'not performed as problems occurred', - # 3: 'succeeded with warnings'}.get(exit_code, 'unknown'))) - # system.exit(exit_code) - # get sample list again - logger.info("check new samples...") - sample_list_new = get_sample_info(connection) - new_samples = get_sample_dic(sample_list_new) - logger.info(new_samples) - # try import study - except exc.SQLAlchemyError as e: - logger.error(e.message) - - finally: - connection.close() \ No newline at end of file diff --git a/core/src/main/scripts/importer/metaImport.py b/core/src/main/scripts/importer/metaImport.py index 5ead265f68a..afdf5c0bcef 100755 --- a/core/src/main/scripts/importer/metaImport.py +++ b/core/src/main/scripts/importer/metaImport.py @@ -74,11 +74,11 @@ def interface(): parser.add_argument('-species', '--species', type=str, default='human', help='species information (default: assumed human)', required=False) - parser.add_argument('-genome', '--reference_genome', type=str, default='hg19', - help='reference genome build (default: assumed hg19)', + parser.add_argument('-ucsc', '--ucsc_build_name', type=str, default='hg19', + help='UCSC reference genome assembly name (default: assumed hg19)', required=False) - parser.add_argument('-build', '--genome_build', type=str, default='37', - help='reference genome build (default: assumed 37 for reference genome hg19)', + parser.add_argument('-ncbi', '--ncbi_build_number', type=str, default='37', + help='NCBI reference genome build number (default: assumed 37 for UCSC reference genome build hg19)', required=False) parser.add_argument('-jar', '--jar_path', type=str, required=False, help=( diff --git a/core/src/main/scripts/importer/validateData.py b/core/src/main/scripts/importer/validateData.py index 4b5d8f2cc9a..2d8af5fa5d8 100755 --- a/core/src/main/scripts/importer/validateData.py +++ b/core/src/main/scripts/importer/validateData.py @@ -288,11 +288,7 @@ def __init__(self, cancer_type_dict, hugo_entrez_map, alias_entrez_map, gene_set for entrez_list in list(entrez_map.values()): for entrez_id in entrez_list: self.entrez_set.add(entrez_id) - - # Set defaults for genome version and species - self.species = 'human' - self.ncbi_build = '37' - self.genome_build = 'hg19' + #Set defaults for genome version and species self.__species = 'human' self.__ncbi_build = '37' @@ -4327,29 +4323,29 @@ def validate_defined_caselists(cancer_study_id, case_list_ids, file_types, logge "'add_global_case_list: true' to the meta_study.txt file", cancer_study_id + '_all') - if 'meta_mutations_extended' in file_types: - if cancer_study_id + '_sequenced' not in case_list_ids: - logger.error( - "No case list found with stable_id '%s', please add this " - "case list to specify which samples are profiled for mutations. This " - "is required for calculation of samples with mutations in OncoPrint and Study Summary.", - cancer_study_id + '_sequenced') - - if 'meta_CNA' in file_types: - if cancer_study_id + '_cna' not in case_list_ids: - logger.error( - "No case list found with stable_id '%s', please add this " - "case list to specify which samples are profiled for mutations. This " - "is required for calculation of samples with CNA in OncoPrint and Study Summary.", - cancer_study_id + '_cna') - - if 'meta_mutations_extended' in file_types and 'meta_CNA' in file_types: - if cancer_study_id + '_cnaseq' not in case_list_ids: - logger.warning( - "No case list found with stable_id '%s', please add this " - "case list to specify which samples are profiled for this data type. On the query page, this " - "case list will be selected by default when both mutation and CNA data are available.", - cancer_study_id + '_cnaseq') + if 'meta_mutations_extended' in file_types: + if cancer_study_id + '_sequenced' not in case_list_ids: + logger.error( + "No case list found with stable_id '%s', please add this " + "case list to specify which samples are profiled for mutations. This " + "is required for calculation of samples with mutations in OncoPrint and Study Summary.", + cancer_study_id + '_sequenced') + + if 'meta_CNA' in file_types: + if cancer_study_id + '_cna' not in case_list_ids: + logger.error( + "No case list found with stable_id '%s', please add this " + "case list to specify which samples are profiled for mutations. This " + "is required for calculation of samples with CNA in OncoPrint and Study Summary.", + cancer_study_id + '_cna') + + if 'meta_mutations_extended' in file_types and 'meta_CNA' in file_types: + if cancer_study_id + '_cnaseq' not in case_list_ids: + logger.warning( + "No case list found with stable_id '%s', please add this " + "case list to specify which samples are profiled for this data type. On the query page, this " + "case list will be selected by default when both mutation and CNA data are available.", + cancer_study_id + '_cnaseq') def validateStudyTags(tags_file_path, logger): """Validate the study tags file.""" @@ -4630,11 +4626,11 @@ def interface(args=None): parser.add_argument('-species', '--species', type=str, default='human', help='species information (default: assumed human)', required=False) - parser.add_argument('-genome', '--reference_genome', type=str, default='hg19', - help='reference genome build (default: assumed hg19)', + parser.add_argument('-ucsc', '--ucsc_build_name', type=str, default='hg19', + help='UCSC reference genome assembly name(default: assumed hg19)', required=False) - parser.add_argument('-build', '--genome_build', type=str, default='37', - help='reference genome build (default: assumed 37 for reference genome hg19)', + parser.add_argument('-ncbi', '--ncbi_build_number', type=str, default='37', + help='NCBI reference genome build number (default: assumed 37 for UCSC reference genome build hg19)', required=False) parser.add_argument('-html', '--html_table', type=str, required=False, help='path to html report output file') @@ -4942,8 +4938,8 @@ def main_validate(args): # specify species and genomic information portal_instance.species = args.species - portal_instance.genome_build = args.reference_genome - portal_instance.ncbi_build = args.genome_build + portal_instance.genome_build = args.ucsc_build_name + portal_instance.ncbi_build = args.ncbi_build_number validate_study(study_dir, portal_instance, logger, relaxed_mode, strict_maf_checks) diff --git a/core/src/test/java/org/mskcc/cbio/portal/dao/TestDaoCancerStudy.java b/core/src/test/java/org/mskcc/cbio/portal/dao/TestDaoCancerStudy.java index 8a20dc5de01..a06b7db4753 100644 --- a/core/src/test/java/org/mskcc/cbio/portal/dao/TestDaoCancerStudy.java +++ b/core/src/test/java/org/mskcc/cbio/portal/dao/TestDaoCancerStudy.java @@ -70,6 +70,7 @@ public void testDaoCancerStudy() throws DaoException, IOException { assertEquals("breast,breast invasive", DaoTypeOfCancer.getTypeOfCancerById("BRCA").getClinicalTrialKeywords()); CancerStudy cancerStudy = new CancerStudy("GBM", "GBM Description", "gbm", "brca", false); + cancerStudy.setReferenceGenome("hg19"); DaoCancerStudy.addCancerStudy(cancerStudy); // Removed testing that depends on internal ids @@ -87,6 +88,7 @@ public void testDaoCancerStudy() throws DaoException, IOException { assertEquals("Glioblastoma", cancerStudy.getDescription()); CancerStudy cancerStudy2 = new CancerStudy("Breast", "Breast Description", "breast", "brca", false); + cancerStudy2.setReferenceGenome("hg19"); DaoCancerStudy.addCancerStudy(cancerStudy2); // Removed testing that depends on internal ids diff --git a/core/src/test/java/org/mskcc/cbio/portal/scripts/TestImportCaisesClinicalXML.java b/core/src/test/java/org/mskcc/cbio/portal/scripts/TestImportCaisesClinicalXML.java index a5c12ce538f..8bf8fd5c5c6 100644 --- a/core/src/test/java/org/mskcc/cbio/portal/scripts/TestImportCaisesClinicalXML.java +++ b/core/src/test/java/org/mskcc/cbio/portal/scripts/TestImportCaisesClinicalXML.java @@ -69,6 +69,7 @@ public void setUp() throws Exception { DaoTypeOfCancer.addTypeOfCancer(typeOfCancer); CancerStudy cancerStudy = new CancerStudy("prad","prad","prad","prad",true); + cancerStudy.setReferenceGenome("hg19"); DaoCancerStudy.addCancerStudy(cancerStudy); int studyId = DaoCancerStudy.getCancerStudyByStableId("prad").getInternalId(); diff --git a/core/src/test/java/org/mskcc/cbio/portal/scripts/TestImportClinicalData.java b/core/src/test/java/org/mskcc/cbio/portal/scripts/TestImportClinicalData.java index 52174e564ba..044d475334d 100644 --- a/core/src/test/java/org/mskcc/cbio/portal/scripts/TestImportClinicalData.java +++ b/core/src/test/java/org/mskcc/cbio/portal/scripts/TestImportClinicalData.java @@ -83,6 +83,7 @@ public void setUp() throws DaoException // new dummy study to simulate importing clinical data in empty study: cancerStudy = new CancerStudy("testnew","testnew","testnew","brca",true); + cancerStudy.setReferenceGenome("hg19"); DaoCancerStudy.addCancerStudy(cancerStudy); // implicit test: cancerStudy = DaoCancerStudy.getCancerStudyByStableId("testnew"); diff --git a/core/src/test/java/org/mskcc/cbio/portal/scripts/TestImportCopyNumberSegmentData.java b/core/src/test/java/org/mskcc/cbio/portal/scripts/TestImportCopyNumberSegmentData.java index f00be487724..0b650c164a1 100644 --- a/core/src/test/java/org/mskcc/cbio/portal/scripts/TestImportCopyNumberSegmentData.java +++ b/core/src/test/java/org/mskcc/cbio/portal/scripts/TestImportCopyNumberSegmentData.java @@ -87,7 +87,8 @@ public void setUp() throws DaoException public void testImportSegmentDataNewStudy() throws Exception { //new dummy study to simulate importing clinical data in empty study: CancerStudy cancerStudy = new CancerStudy("testnewseg","testnewseg","testnewseg","brca",true); - DaoCancerStudy.addCancerStudy(cancerStudy); + cancerStudy.setReferenceGenome("hg19"); + DaoCancerStudy.addCancerStudy(cancerStudy); addTestPatientAndSampleRecords(new File("src/test/resources/segment/data_cna_hg19.seg"), cancerStudy); String[] args = { diff --git a/core/src/test/java/org/mskcc/cbio/portal/scripts/TestImportGeneData.java b/core/src/test/java/org/mskcc/cbio/portal/scripts/TestImportGeneData.java index 374783e4570..44ec2b2248b 100644 --- a/core/src/test/java/org/mskcc/cbio/portal/scripts/TestImportGeneData.java +++ b/core/src/test/java/org/mskcc/cbio/portal/scripts/TestImportGeneData.java @@ -67,13 +67,9 @@ public class TestImportGeneData { public void testImportGeneData() throws Exception { DaoGeneOptimized daoGene = DaoGeneOptimized.getInstance(); ProgressMonitor.setConsoleMode(false); - - /* those isoforms from MSKCC clinical bioinformatics pipeline need to be manually added - File file = new File("src/test/resources/supp-genes.txt"); - ImportGeneData.importSuppGeneData(file, ReferenceGenome.HOMO_SAPIENS_DEFAULT_GENOME_BUILD);*/ File file = new File("src/test/resources/genes_test.txt"); - ImportGeneData.importData(file); + ImportGeneData.importData(file, "GRCh37"); CanonicalGene gene = daoGene.getGene(10); assertEquals("NAT2", gene.getHugoGeneSymbolAllCaps()); diff --git a/core/src/test/java/org/mskcc/cbio/portal/scripts/TestImportReferenceGenome.java b/core/src/test/java/org/mskcc/cbio/portal/scripts/TestImportReferenceGenome.java index d269ae7f61f..839c551b7f8 100644 --- a/core/src/test/java/org/mskcc/cbio/portal/scripts/TestImportReferenceGenome.java +++ b/core/src/test/java/org/mskcc/cbio/portal/scripts/TestImportReferenceGenome.java @@ -1,35 +1,3 @@ -/* - * Copyright (c) 2015 Memorial Sloan-Kettering Cancer Center. - * - * This library is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY, WITHOUT EVEN THE IMPLIED WARRANTY OF MERCHANTABILITY OR FITNESS - * FOR A PARTICULAR PURPOSE. The software and documentation provided hereunder - * is on an "as is" basis, and Memorial Sloan-Kettering Cancer Center has no - * obligations to provide maintenance, support, updates, enhancements or - * modifications. In no event shall Memorial Sloan-Kettering Cancer Center be - * liable to any party for direct, indirect, special, incidental or - * consequential damages, including lost profits, arising out of the use of this - * software and its documentation, even if Memorial Sloan-Kettering Cancer - * Center has been advised of the possibility of such damage. - */ - -/* - * This file is part of cBioPortal. - * - * cBioPortal is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as - * published by the Free Software Foundation, either version 3 of the - * License. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - package org.mskcc.cbio.portal.scripts; import org.junit.Test; @@ -62,18 +30,11 @@ public class TestImportReferenceGenome { * in genes_test.txt. The file genes_test.txt contains real data. */ public void testImportReferenceGenome() throws Exception { - ProgressMonitor.setConsoleMode(false); - File file = new File("src/test/resources/reference_genomes.txt"); - ImportReferenceGenome.importData(file); - ReferenceGenome genome = DaoReferenceGenome.getReferenceGenomeByInternalId(1); assertEquals("GRCh37", genome.getBuildName()); assertEquals(1, DaoReferenceGenome.getReferenceGenomeIdByName("GRCh37")); - - } - } \ No newline at end of file diff --git a/core/src/test/java/org/mskcc/cbio/portal/web_api/TestGetTypesOfCancer.java b/core/src/test/java/org/mskcc/cbio/portal/web_api/TestGetTypesOfCancer.java index 8bea433c41c..36f600e8dc4 100644 --- a/core/src/test/java/org/mskcc/cbio/portal/web_api/TestGetTypesOfCancer.java +++ b/core/src/test/java/org/mskcc/cbio/portal/web_api/TestGetTypesOfCancer.java @@ -162,6 +162,7 @@ public void testGetCancerStudies() throws DaoException, IOException, ProtocolExc CancerStudy cancerStudy = new CancerStudy("Breast Invasive Carcinoma (TCGA, Nature 2012)", DESCRIPTION, "study_tcga_pub_testapi2", "brca_testapi2", true); + cancerStudy.setReferenceGenome("hg19"); DaoCancerStudy.addCancerStudy(cancerStudy, true); String output = GetTypesOfCancer.getCancerStudies(); diff --git a/core/src/test/resources/reference_genomes.txt b/core/src/test/resources/reference_genomes.txt index 5bdc713cc2d..91f2f96bfd2 100644 --- a/core/src/test/resources/reference_genomes.txt +++ b/core/src/test/resources/reference_genomes.txt @@ -1,4 +1,2 @@ #species name build_name nonN_bases URL release_date -#human hg19 GRCh37 2897310462 http://hgdownload.cse.ucsc.edu/goldenPath/hg19 2009-02-01 00:00:00 -#human hg38 GRCh38 3049315783 http://hgdownload.cse.ucsc.edu/goldenPath/hg38 2013-12-24 00:00:00 mouse mm10 GRCm38 2652783500 http://hgdownload.cse.ucsc.edu/goldenPath/mm10 2011-12-01 00:00:00 \ No newline at end of file diff --git a/core/src/test/resources/seed_mini.sql b/core/src/test/resources/seed_mini.sql index 019cbdc6c8d..2ec17c0c385 100644 --- a/core/src/test/resources/seed_mini.sql +++ b/core/src/test/resources/seed_mini.sql @@ -101,7 +101,7 @@ INSERT INTO `reference_genome` VALUES (2, 'human', 'hg38', 'GRCh38', 3049315783, -- cancer_study INSERT INTO "cancer_study" ("CANCER_STUDY_ID", "CANCER_STUDY_IDENTIFIER", "TYPE_OF_CANCER_ID", "NAME", "SHORT_NAME", "DESCRIPTION", "PUBLIC", "PMID", "CITATION", "GROUPS","REFERENCE_GENOME_ID") -VALUES (1,'study_tcga_pub','brca','Breast Invasive Carcinoma (TCGA, Nature 2012)','BRCA (TCGA)','The Cancer Genome Atlas (TCGA) Breast Invasive Carcinoma project. 825 cases.
Nature 2012. Raw data via the TCGA Data Portal.',1,'23000897','TCGA, Nature 2012','SU2C-PI3K;PUBLIC;GDAC',1); +VALUES (1,'study_tcga_pub','brca','Breast Invasive Carcinoma (TCGA, Nature 2012)','BRCA (TCGA)','The Cancer Genome Atlas (TCGA) Breast Invasive Carcinoma project. 825 cases.
Nature 2012. Raw data via the TCGA Data Portal.',1,'23000897,26451490','TCGA, Nature 2012, ...','SU2C-PI3K;PUBLIC;GDAC',1); -- gene as genetic_entity INSERT INTO "genetic_entity" ("ENTITY_TYPE") VALUES ('GENE'); diff --git a/db-scripts/src/main/resources/adjust_col_size_to_utf8.sql b/db-scripts/src/main/resources/adjust_col_size_to_utf8.sql deleted file mode 100644 index a860fbaa134..00000000000 --- a/db-scripts/src/main/resources/adjust_col_size_to_utf8.sql +++ /dev/null @@ -1,16 +0,0 @@ -DROP PROCEDURE IF EXISTS adjust_col_size_to_utf8; -DELIMITER $$ -CREATE PROCEDURE adjust_col_size_to_utf8() BEGIN - IF ((SELECT MAX(LENGTH(TUMOR_SEQ_ALLELE)) FROM mutation_event) < 256 - AND (SELECT MAX(LENGTH(REFERENCE_ALLELE)) FROM mutation_event) < 256) - THEN - ALTER TABLE mutation_event - MODIFY REFERENCE_ALLELE varchar(255), - MODIFY TUMOR_SEQ_ALLELE varchar(255), - MODIFY MUTATION_TYPE varchar(64), - MODIFY LINK_XVAR varchar(255), - MODIFY LINK_PDB varchar(255), - MODIFY LINK_MSA varchar(255); - END IF; -END$$ -DELIMITER ; \ No newline at end of file diff --git a/db-scripts/src/main/resources/cgds.sql b/db-scripts/src/main/resources/cgds.sql index 8b93c29d8b5..a31a315b411 100644 --- a/db-scripts/src/main/resources/cgds.sql +++ b/db-scripts/src/main/resources/cgds.sql @@ -123,8 +123,7 @@ CREATE TABLE `reference_genome` ( `URL` varchar(256) NOT NULL, `RELEASE_DATE` datetime DEFAULT NULL, PRIMARY KEY (`REFERENCE_GENOME_ID`), - UNIQUE INDEX `BUILD_NAME_UNIQUE` (`BUILD_NAME` ASC), - CHECK(`SPECIES` = 'human') + UNIQUE INDEX `BUILD_NAME_UNIQUE` (`BUILD_NAME` ASC) ); -- -------------------------------------------------------- diff --git a/db-scripts/src/main/resources/migration.sql b/db-scripts/src/main/resources/migration.sql index ac2fac60c5c..06bdbe1ff5e 100644 --- a/db-scripts/src/main/resources/migration.sql +++ b/db-scripts/src/main/resources/migration.sql @@ -690,3 +690,28 @@ UPDATE `info` SET `DB_SCHEMA_VERSION`="2.10.0"; ALTER TABLE `copy_number_seg` MODIFY COLUMN `SEG_ID` BIGINT(20); UPDATE `info` SET `DB_SCHEMA_VERSION`="2.10.1"; + +##version: 2.11.0 +UPDATE `reference_genome` SET `GENOME_SIZE` = 2897310462 WHERE `NAME`='hg19'; +UPDATE `reference_genome` SET `GENOME_SIZE` = 3049315783 WHERE `NAME`='hg38'; +UPDATE `reference_genome` SET `GENOME_SIZE` = 2652783500 WHERE `NAME`='mm10'; +ALTER TABLE `reference_genome_gene` MODIFY COLUMN `CHR` varchar(5); +INSERT INTO reference_genome_gene (ENTREZ_GENE_ID, CYTOBAND, EXONIC_LENGTH, CHR, REFERENCE_GENOME_ID) +SELECT + ENTREZ_GENE_ID, + CYTOBAND, + LENGTH, + SUBSTRING_INDEX(SUBSTRING_INDEX(SUBSTRING_INDEX(gene.CYTOBAND,IF(LOCATE('p', gene.CYTOBAND), 'p', 'q'), 1),'q',1),'cen',1), + 1 +FROM `gene` +WHERE NOT EXISTS (SELECT * FROM reference_genome_gene); +ALTER TABLE `gene` DROP COLUMN `CYTOBAND`, DROP COLUMN `LENGTH`; +ALTER TABLE `cancer_study` ADD COLUMN `REFERENCE_GENOME_ID` INT(4) DEFAULT 1, + ADD CONSTRAINT `FK_REFERENCE_GENOME` FOREIGN KEY (`REFERENCE_GENOME_ID`) + REFERENCES `reference_genome`(`REFERENCE_GENOME_ID`) ON DELETE CASCADE; +UPDATE `cancer_study` + INNER JOIN `genetic_profile` ON `cancer_study`.CANCER_STUDY_ID = `genetic_profile`.CANCER_STUDY_ID + INNER JOIN `mutation` ON `mutation`.GENETIC_PROFILE_ID = `genetic_profile`.GENETIC_PROFILE_ID + INNER JOIN `mutation_event` ON `mutation`.MUTATION_EVENT_ID = `mutation_event`.MUTATION_EVENT_ID +SET `cancer_study`.REFERENCE_GENOME_ID = IF(`mutation_event`.NCBI_BUILD in ('37', 'hg19','GRCh37'), 1, 2); +UPDATE `info` SET `DB_SCHEMA_VERSION`="2.11.0"; diff --git a/docs/Using-the-dataset-validator.md b/docs/Using-the-dataset-validator.md index 830927fd053..14e6f4ee250 100644 --- a/docs/Using-the-dataset-validator.md +++ b/docs/Using-the-dataset-validator.md @@ -40,10 +40,10 @@ optional arguments: installation -species SPECIES, --species SPECIES species information (default: assumed human) - -genome REFERENCE_GENOME, --reference_genome REFERENCE_GENOME - reference genome build (default: assumed hg19) - -build GENOME_BUILD, --genome_build GENOME_BUILD - reference genome build (default: assumed 37 for reference genome hg19) + -ucsc UCSC_BUILD_NAME, --ucsc_build_name UCSC_BUILD_NAME + UCSC reference genome assembly name (default: assumed hg19) + -ncbi NCBI_BUILD_NUMBER, --ncbi_build_number NCBI_BUILD_NUMBER + NCBI reference genome build number (default: assumed 37 for UCSC reference genome build hg19) -html HTML_TABLE, --html_table HTML_TABLE path to html report output file -e ERROR_FILE, --error_file ERROR_FILE @@ -429,18 +429,18 @@ Validation of study succeeded with warnings. ## Validation of non-human data ## When importing a study, the validator assumes by default that the following three parameters -`--species` , `--reference_genome` , `--genome_build` are set to the following: +`--species` , `--ucsc_build_name` , `--ncbi_build_number` are set to the following: ``` ---species human ---reference_genome hg19 ---genome_build 37 +--species=human +--ncbi_build_number=37 +--ucsc_build_name=hg19 ``` cBioPortal is gradually introducing support for mouse. If you want to load mouse studies and you have [set up your database for mouse](Import-the-Seed-Database.md#download-the-cbioportal-database), you should set the previous parameters to: ``` ---species mouse ---reference_genome mm10 ---genome_build 38 +--species=mouse +--ncbi_build_number=38 +--ucsc_build_name=mm10 ``` As an example, the command for the mouse example using the three parameters is given: @@ -483,10 +483,10 @@ optional arguments: installation -species SPECIES, --species SPECIES species information (default: assumed human) - -genome REFERENCE_GENOME, --reference_genome REFERENCE_GENOME - reference genome build (default: assumed hg19) - -build GENOME_BUILD, --genome_build GENOME_BUILD - reference genome build (default: assumed 37 for reference genome hg19) + -ucsc UCSC_BUILD_NAME, --ucsc_build_name UCSC_BUILD_NAME + UCSC reference genome assembly name (default: assumed hg19) + -ncbi NCBI_BUILD_NUMBER, --ncbi_build_number NCBI_BUILD_NUMBER + NCBI reference genome build number (default: assumed 37 for UCSC genome build hg19) -m, --strict_maf_checks Option to enable strict mode for validator when validating mutation data diff --git a/docs/Using-the-metaImport-script.md b/docs/Using-the-metaImport-script.md index f799850bed2..7672c3d308a 100644 --- a/docs/Using-the-metaImport-script.md +++ b/docs/Using-the-metaImport-script.md @@ -10,7 +10,7 @@ and then run the following command: ``` This will tell you the parameters you can use: ``` - $./metaImport.py -h +$./metaImport.py -h usage: metaImport.py [-h] -s STUDY_DIRECTORY [-u URL_SERVER | -p PORTAL_INFO_DIR | -n] [-species SPECIES] [-genome REFERENCE_GENOME] @@ -34,10 +34,10 @@ optional arguments: installation -species SPECIES, --species SPECIES species information (default: assumed human) - -genome REFERENCE_GENOME, --reference_genome REFERENCE_GENOME - reference genome build (default: assumed hg19) - -build GENOME_BUILD, --genome_build GENOME_BUILD - reference genome build (default: assumed 37) + -ucsc UCSC_BUILD_NAME, --ucsc_build_name UCSC_BUILD_NAME + UCSC reference genome assembly name (default: assumed hg19) + -ncbi NCBI_BUILD_NUMBER, --ncbi_build_number NCBI_BUILD_NUMBER + NCBI reference genome build number (default: assumed 37 for UCSC genome build hg19) -jar JAR_PATH, --jar_path JAR_PATH Path to scripts JAR file (default: locate it relative to the import script) diff --git a/persistence/persistence-api/src/main/java/org/cbioportal/persistence/ReferenceGenomeGeneRepository.java b/persistence/persistence-api/src/main/java/org/cbioportal/persistence/ReferenceGenomeGeneRepository.java index ecf47ee4b7d..e648a26b734 100644 --- a/persistence/persistence-api/src/main/java/org/cbioportal/persistence/ReferenceGenomeGeneRepository.java +++ b/persistence/persistence-api/src/main/java/org/cbioportal/persistence/ReferenceGenomeGeneRepository.java @@ -1,34 +1,3 @@ -/* - * Copyright (c) 2016 Memorial Sloan Kettering Cancer Center. - * - * This library is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY, WITHOUT EVEN THE IMPLIED WARRANTY OF MERCHANTABILITY OR FITNESS - * FOR A PARTICULAR PURPOSE. The software and documentation provided hereunder - * is on an "as is" basis, and Memorial Sloan-Kettering Cancer Center has no - * obligations to provide maintenance, support, updates, enhancements or - * modifications. In no event shall Memorial Sloan-Kettering Cancer Center be - * liable to any party for direct, indirect, special, incidental or - * consequential damages, including lost profits, arising out of the use of this - * software and its documentation, even if Memorial Sloan-Kettering Cancer - * Center has been advised of the possibility of such damage. - */ - -/* - * This file is part of cBioPortal. - * - * cBioPortal is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as - * published by the Free Software Foundation, either version 3 of the - * License. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ package org.cbioportal.persistence; import org.cbioportal.model.ReferenceGenomeGene; diff --git a/persistence/persistence-mybatis/src/main/java/org/cbioportal/persistence/mybatis/ReferenceGenomeGeneMyBatisRepository.java b/persistence/persistence-mybatis/src/main/java/org/cbioportal/persistence/mybatis/ReferenceGenomeGeneMyBatisRepository.java index 01d04fe9be4..dc4091a2ec7 100644 --- a/persistence/persistence-mybatis/src/main/java/org/cbioportal/persistence/mybatis/ReferenceGenomeGeneMyBatisRepository.java +++ b/persistence/persistence-mybatis/src/main/java/org/cbioportal/persistence/mybatis/ReferenceGenomeGeneMyBatisRepository.java @@ -1,6 +1,5 @@ package org.cbioportal.persistence.mybatis; - import java.util.List; import org.cbioportal.model.ReferenceGenomeGene; diff --git a/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/ReferenceGenomeGeneMapper.xml b/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/ReferenceGenomeGeneMapper.xml index d13293cacd1..9c9e8252024 100644 --- a/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/ReferenceGenomeGeneMapper.xml +++ b/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/ReferenceGenomeGeneMapper.xml @@ -88,7 +88,6 @@ reference_genome.NAME = #{genomeName} AND gene.ENTREZ_GENE_ID = #{geneId}
-