Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(2/7) RFC79: Make study_es_0_inc data pass validation #44

Merged
merged 5 commits into from
Jun 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 29 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,36 @@ Build docker image with:
docker build -t cbioportal-core .
```

Example of how to start loading of the whole study:
### Example of how to load `study_es_0` study

Import gene panels

```bash
docker run -it -v $(pwd)/tests/test_data/:/data/ -v $(pwd)/application.properties:/application.properties cbioportal-core \
perl importGenePanel.pl --data /data/study_es_0/data_gene_panel_testpanel1.txt
docker run -it -v $(pwd)/tests/test_data/:/data/ -v $(pwd)/application.properties:/application.properties cbioportal-core \
perl importGenePanel.pl --data /data/study_es_0/data_gene_panel_testpanel2.txt
```

Import gene sets and supplementary data

```bash
docker run -it -v $(pwd)/src/test/resources/:/data/ -v $(pwd)/application.properties:/application.properties cbioportal-core \
perl importGenesetData.pl --data /data/genesets/study_es_0_genesets.gmt --new-version msigdb_7.5.1 --supp /data/genesets/study_es_0_supp-genesets.txt
```

Import gene set hierarchy data

```bash
docker run -it -v $(pwd)/src/test/resources/:/data/ -v $(pwd)/application.properties:/application.properties cbioportal-core \
perl importGenesetHierarchy.pl --data /data/genesets/study_es_0_tree.yaml
```

Import study

```bash
docker run -it -v $(pwd)/data/:/data/ -v $(pwd)/application.properties:/application.properties cbioportal-core python importer/metaImport.py -s /data/study_es_0 -p /data/api_json -o
docker run -it -v $(pwd)/tests/test_data/:/data/ -v $(pwd)/application.properties:/application.properties cbioportal-core \
python importer/metaImport.py -s /data/study_es_0 -p /data/api_json_system_tests -o
```

### Incremental upload of data
Expand Down
23 changes: 17 additions & 6 deletions src/main/java/org/mskcc/cbio/portal/dao/DaoCancerStudy.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,24 @@

package org.mskcc.cbio.portal.dao;

import java.sql.*;
import java.text.*;
import java.util.*;
import org.apache.commons.lang3.StringUtils;
import org.mskcc.cbio.portal.model.*;
import org.mskcc.cbio.portal.util.*;
import org.mskcc.cbio.portal.model.CancerStudy;
import org.mskcc.cbio.portal.model.CancerStudyTags;
import org.mskcc.cbio.portal.model.ReferenceGenome;
import org.mskcc.cbio.portal.model.TypeOfCancer;

import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;

/**
* Analogous to and replaces the old DaoCancerType. A CancerStudy has a NAME and
Expand All @@ -61,7 +73,6 @@ public static enum Status {
private static final Map<Integer,CancerStudy> byInternalId = new HashMap<Integer,CancerStudy>();

static {
SpringUtil.initDataSource();
reCacheAll();
}

Expand Down
16 changes: 11 additions & 5 deletions src/main/java/org/mskcc/cbio/portal/dao/DaoGeneticProfile.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,17 @@

package org.mskcc.cbio.portal.dao;

import java.sql.*;
import java.util.*;
import org.mskcc.cbio.portal.model.*;
import org.mskcc.cbio.portal.util.SpringUtil;
import org.mskcc.cbio.portal.model.GeneticAlterationType;
import org.mskcc.cbio.portal.model.GeneticProfile;

import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
* Analogous to and replaces the old DaoCancerType. A CancerStudy has a NAME and
Expand All @@ -52,7 +59,6 @@ private DaoGeneticProfile() {}
private static final Map<Integer,List<GeneticProfile>> byStudy = new HashMap<Integer,List<GeneticProfile>>();

static {
SpringUtil.initDataSource();
reCache();
}

Expand Down
13 changes: 8 additions & 5 deletions src/main/java/org/mskcc/cbio/portal/dao/DaoReferenceGenome.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,15 @@

package org.mskcc.cbio.portal.dao;

import java.sql.*;
import org.mskcc.cbio.portal.model.*;
import org.mskcc.cbio.portal.util.SpringUtil;
import org.mskcc.cbio.portal.model.ReferenceGenome;

import java.util.*;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.HashMap;
import java.util.Map;


/**
Expand All @@ -36,7 +40,6 @@ public final class DaoReferenceGenome {
private static final Map<String, Integer> genomeInternalIds = new HashMap<String,Integer>();

static {
SpringUtil.initDataSource();
reCache();
}

Expand Down
9 changes: 3 additions & 6 deletions src/main/java/org/mskcc/cbio/portal/scripts/AddCaseList.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,17 @@

package org.mskcc.cbio.portal.scripts;

import java.util.ArrayList;
import java.util.List;

import org.mskcc.cbio.portal.dao.DaoCancerStudy;
import org.mskcc.cbio.portal.dao.DaoSample;
import org.mskcc.cbio.portal.dao.DaoSampleList;
import org.mskcc.cbio.portal.model.CancerStudy;
import org.mskcc.cbio.portal.model.Sample;
import org.mskcc.cbio.portal.model.SampleList;
import org.mskcc.cbio.portal.model.SampleListCategory;
import org.mskcc.cbio.portal.util.ConsoleUtil;
import org.mskcc.cbio.portal.util.ProgressMonitor;
import org.mskcc.cbio.portal.util.SpringUtil;

import java.util.ArrayList;
import java.util.List;

/**
* Command Line tool to Add new case lists by generating them based on some rules.
Expand Down Expand Up @@ -131,7 +129,6 @@ public void run() {
throw new UsageException(progName, null, argSpec,
"cancer_study_identifier is not specified.");
}
SpringUtil.initDataSource();
CancerStudy theCancerStudy = DaoCancerStudy.getCancerStudyByStableId(cancerStudyIdentifier);
if (theCancerStudy == null) {
throw new IllegalArgumentException("cancer study identified by cancer_study_identifier '"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,11 @@

package org.mskcc.cbio.portal.scripts;

import org.mskcc.cbio.portal.util.*;
import org.mskcc.cbio.portal.model.*;
import org.mskcc.cbio.portal.model.CancerStudy;
import org.mskcc.cbio.portal.model.CancerStudyTags;
import org.mskcc.cbio.portal.util.CancerStudyReader;
import org.mskcc.cbio.portal.util.CancerStudyTagsReader;
import org.mskcc.cbio.portal.util.ProgressMonitor;

import java.io.File;

Expand All @@ -53,7 +56,6 @@ public void run() {
}

File file = new File(args[0]);
SpringUtil.initDataSource();
CancerStudy cancerStudy = CancerStudyReader.loadCancerStudy(file);
CancerStudyTags cancerStudyTags = CancerStudyTagsReader.loadCancerStudyTags(file, cancerStudy);
String message = "Loaded the following cancer study:" +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,40 @@

package org.mskcc.cbio.portal.scripts;

import org.mskcc.cbio.portal.dao.*;
import org.mskcc.cbio.portal.model.*;
import org.mskcc.cbio.portal.util.*;
import joptsimple.OptionException;
import joptsimple.OptionParser;
import joptsimple.OptionSet;
import joptsimple.OptionSpec;
import org.apache.commons.collections4.map.MultiKeyMap;
import org.mskcc.cbio.portal.dao.DaoCancerStudy;
import org.mskcc.cbio.portal.dao.DaoClinicalAttributeMeta;
import org.mskcc.cbio.portal.dao.DaoClinicalData;
import org.mskcc.cbio.portal.dao.DaoException;
import org.mskcc.cbio.portal.dao.DaoPatient;
import org.mskcc.cbio.portal.dao.DaoSample;
import org.mskcc.cbio.portal.dao.MySQLbulkLoader;
import org.mskcc.cbio.portal.model.CancerStudy;
import org.mskcc.cbio.portal.model.ClinicalAttribute;
import org.mskcc.cbio.portal.model.Patient;
import org.mskcc.cbio.portal.model.Sample;
import org.mskcc.cbio.portal.util.FileUtil;
import org.mskcc.cbio.portal.util.ProgressMonitor;
import org.mskcc.cbio.portal.util.StableIdUtil;
import org.mskcc.cbio.portal.util.SurvivalAttributeUtil;
import org.mskcc.cbio.portal.util.SurvivalAttributeUtil.SurvivalStatusAttributes;

import java.io.*;
import joptsimple.*;
import java.util.*;
import java.util.regex.*;
import org.apache.commons.collections4.map.MultiKeyMap;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Properties;
import java.util.Set;
import java.util.regex.Matcher;

public class ImportClinicalData extends ConsoleRunnable {

Expand Down Expand Up @@ -677,7 +701,6 @@ public void run() {
overwriteExisting = true;

}
SpringUtil.initDataSource();
CancerStudy cancerStudy = DaoCancerStudy.getCancerStudyByStableId(cancerStudyStableId);
if (cancerStudy == null) {
throw new IllegalArgumentException("Unknown cancer study: " + cancerStudyStableId);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,30 @@

package org.mskcc.cbio.portal.scripts;

import org.mskcc.cbio.portal.dao.*;
import org.mskcc.cbio.portal.util.*;
import org.mskcc.cbio.portal.model.*;

import joptsimple.*;

import java.io.*;
import joptsimple.OptionSet;
import org.mskcc.cbio.portal.dao.DaoCancerStudy;
import org.mskcc.cbio.portal.dao.DaoCopyNumberSegment;
import org.mskcc.cbio.portal.dao.DaoCopyNumberSegmentFile;
import org.mskcc.cbio.portal.dao.DaoException;
import org.mskcc.cbio.portal.dao.DaoSample;
import org.mskcc.cbio.portal.dao.MySQLbulkLoader;
import org.mskcc.cbio.portal.model.CancerStudy;
import org.mskcc.cbio.portal.model.CopyNumberSegment;
import org.mskcc.cbio.portal.model.CopyNumberSegmentFile;
import org.mskcc.cbio.portal.model.ReferenceGenome;
import org.mskcc.cbio.portal.model.Sample;
import org.mskcc.cbio.portal.util.ConsoleUtil;
import org.mskcc.cbio.portal.util.FileUtil;
import org.mskcc.cbio.portal.util.ProgressMonitor;
import org.mskcc.cbio.portal.util.StableIdUtil;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.math.BigDecimal;
import java.util.*;
import java.util.Properties;

/**
* Import Segment data into database.
Expand Down Expand Up @@ -118,7 +133,6 @@ public void run() {

ProgressMonitor.setCurrentMessage("Reading data from: " + dataFile);

SpringUtil.initDataSource();
CancerStudy cancerStudy = getCancerStudy(properties);

if (segmentDataExistsForCancerStudy(cancerStudy)) {
Expand Down
23 changes: 16 additions & 7 deletions src/main/java/org/mskcc/cbio/portal/scripts/ImportCosmicData.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,26 @@

package org.mskcc.cbio.portal.scripts;

import org.mskcc.cbio.portal.dao.*;
import org.mskcc.cbio.portal.util.*;
import org.mskcc.cbio.portal.model.*;
import org.mskcc.cbio.portal.dao.DaoCosmicData;
import org.mskcc.cbio.portal.dao.DaoException;
import org.mskcc.cbio.portal.dao.DaoGeneOptimized;
import org.mskcc.cbio.portal.dao.MySQLbulkLoader;
import org.mskcc.cbio.portal.model.CanonicalGene;
import org.mskcc.cbio.portal.model.CosmicMutationFrequency;
import org.mskcc.cbio.portal.util.ConsoleUtil;
import org.mskcc.cbio.portal.util.FileUtil;
import org.mskcc.cbio.portal.util.MutationKeywordUtils;
import org.mskcc.cbio.portal.util.ProgressMonitor;
import org.springframework.util.Assert;

import java.io.*;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.IntStream;

public class ImportCosmicData {
Expand Down Expand Up @@ -180,7 +190,6 @@ public static void main(String[] args) throws Exception {
System.out.println("command line usage: importCosmicData.pl <CosmicCodingMuts.vcf>");
return;
}
SpringUtil.initDataSource();
DaoCosmicData.deleteAllRecords();
ProgressMonitor.setConsoleMode(true);

Expand Down
13 changes: 9 additions & 4 deletions src/main/java/org/mskcc/cbio/portal/scripts/ImportDrugs.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,17 @@

package org.mskcc.cbio.portal.scripts;

import org.mskcc.cbio.portal.util.*;
import org.mskcc.cbio.portal.dao.*;
import org.mskcc.cbio.portal.dao.DaoException;
import org.mskcc.cbio.portal.dao.DaoGeneOptimized;
import org.mskcc.cbio.portal.model.CanonicalGene;
import org.mskcc.cbio.portal.util.ConsoleUtil;
import org.mskcc.cbio.portal.util.FileUtil;
import org.mskcc.cbio.portal.util.ProgressMonitor;

import java.io.*;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;

/**
* Command Line tool to import background drug information.
Expand Down Expand Up @@ -79,7 +85,6 @@ public static void main(String[] args) throws Exception {
return;
}
ProgressMonitor.setConsoleMode(true);
SpringUtil.initDataSource();

File file = new File(args[0]);
System.out.println("Reading drug data from: " + file.getAbsolutePath());
Expand Down
Loading
Loading