Skip to content

Commit

Permalink
Import of BTMCINNES/UMLS-Interface-1.33 from CPAN.
Browse files Browse the repository at this point in the history
gitpan-cpan-distribution: UMLS-Interface
gitpan-cpan-version:      1.33
gitpan-cpan-path:         BTMCINNES/UMLS-Interface-1.33.tar.gz
gitpan-cpan-author:       BTMCINNES
gitpan-cpan-maturity:     released
  • Loading branch information
Bridget McInnes authored and Gitpan committed Oct 23, 2014
1 parent 65bffec commit 987780b
Show file tree
Hide file tree
Showing 11 changed files with 592 additions and 28 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG
@@ -1,6 +1,11 @@
NAME
UMLS::Interface CHANGES

Changes from version 1.31 to 1.33
1. Updated documentation

2. Added getIntrinsicIC function and corresponding util/ program

Changes from version 1.29 to 1.31
1. modified ICFinder.pm to return an error if the icfrequency
information does not match the configuration file -- similar to what we
Expand Down
31 changes: 19 additions & 12 deletions INSTALL
Expand Up @@ -256,9 +256,10 @@ Stage 3 - Install MySQL, if already installed go to Stage 4
<http://dev.mysql.com/doc/refman/5.1/en/adding-users.html>

Stage 4 - Install UMLS, if already installed go to Stage 5

VERSION
The UMLS-Interface requires version UMLS 2008AA or higher
The UMLS-Interface requires version UMLS 2008AA or higher.

Directionsmodified for installation version 2012AA.

REMINDER
If you already have the UMLS installed, make certain that you have the
Expand Down Expand Up @@ -314,16 +315,16 @@ Stage 4 - Install UMLS, if already installed go to Stage 5
3. <current release>-2-meta.nlm
4. mmsys.zip
5. <current release>.CHK
6. Copyright_Notice.txt
7. README.txt
6. Copyright_Notice.txt
7. README.txt

Step 2: Unzip the mmsys.zip file (unzip mmsys.zip)
The following should be created: 1. linux_mmsys.sh 2. solaris_mmsys.sh
3. macintosh_mmsys.sh 4. windows_mmsys.bat 5. MMSYS/ (directory)

Step 3: Run MetamorphoSys (the install wizard)
-- Run the appropriate .sh file for your system
--> I ran ./linux_mmsys.sh
--> I ran ./run_linux.sh

-- Click 'Install UMLS'

Expand All @@ -334,6 +335,8 @@ Stage 4 - Install UMLS, if already installed go to Stage 5
box is checked. Although, I installed all three of the
Knowledge Sources: 'Metathesaurus', 'Semantic
Network' and 'SPECIALIST Lexicon and Lexical Tools'.
- Choose the mysql database for the Semantic Network load
scripts
- Click 'OK'

-- 'Install UMLS' and 'MetamorphoSys Configuration' windows will appear
Expand Down Expand Up @@ -367,7 +370,8 @@ Stage 5 - Load UMLS into MySQL, if already installed go to Stage 6
Step 1: Create the MySQL database
Log into MySQL and create a database called 'umls' as follows:

CREATE DATABASE IF NOT EXISTS umls CHARACTER SET utf8 COLLATE utf8_unicode_ci;
CREATE DATABASE IF NOT EXISTS umls CHARACTER SET utf8 COLLATE
utf8_unicode_ci;

Step 2: Modify the 'my.cnf' file.
This has been put in a different place every version or distribution
Expand All @@ -385,12 +389,15 @@ Stage 5 - Load UMLS into MySQL, if already installed go to Stage 6
performance, the MySQL 5 server requires changing buffer sizes to make
use of the memory available.

key_buffer = 300M
table_cache = 300
sort_buffer_size = 20M
read_buffer_size = 20M
query_cache_limit = 3M
query_cache_size = 100M
key_buffer = 600M
table_cache = 300
sort_buffer_size = 20M
read_buffer_size = 200M
query_cache_limit = 3M
query_cache_size = 100M
myisam_sort_buffer = 200M
bulk_insert_buffer_size = 100M
join_buffer_size = 100M

Also make certain that this file is readable in order to run this as
non-root. To change permission on it: chmod gou+r /etc/my.cnf
Expand Down
1 change: 1 addition & 0 deletions MANIFEST
Expand Up @@ -177,6 +177,7 @@ utils/getCuiDef.pl
utils/getCuiList.pl
utils/getExtendedDef.pl
utils/getIC.pl
utils/getIntrinsicIC.pl
utils/getParents.pl
utils/getRelated.pl
utils/getRelations.pl
Expand Down
2 changes: 1 addition & 1 deletion META.yml
@@ -1,6 +1,6 @@
--- #YAML:1.0
name: UMLS-Interface
version: 1.31
version: 1.33
abstract: Perl interface to the Unified Medical Language System (UMLS)
author:
- Bridget McInnes <bthomson@cs.umn.edu>, Ted Pedersen <tpederse@d.umn.edu>, Serguei Pakhomov <pakh0002@umn.edu>, Siddharth Patwardhan <sidd@cs.utah.edu>
Expand Down
2 changes: 1 addition & 1 deletion Makefile.PL
Expand Up @@ -24,7 +24,7 @@ WriteMakefile(
'Digest::SHA1' => '2.12',
'File::Spec' => '3.31',
'File::Path' => '2.08'},
'EXE_FILES' => [("utils/removeConfigData.pl", "utils/findLeastCommonSubsumer.pl", "utils/findPathToRoot.pl", "utils/findShortestPath.pl", "utils/getChildren.pl", "utils/getParents.pl", "utils/getCuiDef.pl", "utils/getExtendedDef.pl", "utils/getRelated.pl", "utils/getRelations.pl", "utils/getStDef.pl", "utils/getAssociatedTerms.pl", "utils/getAssociatedCuis.pl", "utils/findCuiDepth.pl", "utils/getSts.pl","utils/getSemanticGroup.pl", "utils/getSemanticRelation.pl", "utils/findDFS.pl", "utils/getTableNames.pl", "utils/getCuiList.pl", "utils/getIC.pl", "utils/getStString.pl", "utils/findShortestPathLength.pl", "utils/getCompounds.pl", "utils/findNumberOfCloserConcepts.pl")],
'EXE_FILES' => [("utils/removeConfigData.pl", "utils/findLeastCommonSubsumer.pl", "utils/findPathToRoot.pl", "utils/findShortestPath.pl", "utils/getChildren.pl", "utils/getParents.pl", "utils/getCuiDef.pl", "utils/getExtendedDef.pl", "utils/getRelated.pl", "utils/getRelations.pl", "utils/getStDef.pl", "utils/getAssociatedTerms.pl", "utils/getAssociatedCuis.pl", "utils/findCuiDepth.pl", "utils/getSts.pl","utils/getSemanticGroup.pl", "utils/getSemanticRelation.pl", "utils/findDFS.pl", "utils/getTableNames.pl", "utils/getCuiList.pl", "utils/getIC.pl", "utils/getIntrinsicIC.pl", "utils/getStString.pl", "utils/findShortestPathLength.pl", "utils/getCompounds.pl", "utils/findNumberOfCloserConcepts.pl")],
'dist' => {'COMPRESS' => 'gzip -9f', 'SUFFIX' => 'gz'},
($] >= 5.005 ?
('ABSTRACT_FROM' => 'lib/UMLS/Interface.pm', # retrieve abstract from module
Expand Down
69 changes: 67 additions & 2 deletions lib/UMLS/Interface.pm
@@ -1,5 +1,5 @@
# UMLS::Interface
# (Last Updated $Id: Interface.pm,v 1.132 2012/06/23 20:51:41 btmcinnes Exp $)
# (Last Updated $Id: Interface.pm,v 1.135 2013/04/08 11:35:23 btmcinnes Exp $)
#
# Perl module that provides a perl interface to the
# Unified Medical Language System (UMLS)
Expand Down Expand Up @@ -421,7 +421,7 @@ my $pkg = "UMLS::Interface";

use vars qw($VERSION);

$VERSION = '1.31';
$VERSION = '1.33';

my $debug = 0;

Expand Down Expand Up @@ -1973,6 +1973,71 @@ sub getIC {
return $ic;
}

=head3 getSecoIntrinsicIC
description:
returns the intrinsic information content of a given cui using
the formula proposed by Seco, Veale and Hayes 2004
input:
$concept <- string containing a cui
output:
$double <- double containing its IC
example:
use UMLS::Interface;
my $umls = UMLS::Interface->new();
my $concept = "C0018563";
my $double = $umls->getSecoIntrinsicIC($concept);
print "The Intrinsic IC of $concept is $double\n";
=cut
sub getSecoIntrinsicIC {
my $self = shift;
my $concept = shift;

my $ic = $icfinder->_getSecoIntrinsicIC($concept);

return $ic;
}
=head3 getSanchezIntrinsicIC
description:
returns the intrinsic information content of a given cui using
the formula proposed by Sanchez and Batet 2011
input:
$concept <- string containing a cui
output:
$double <- double containing its IC
example:
use UMLS::Interface;
my $umls = UMLS::Interface->new();
my $concept = "C0018563";
my $double = $umls->getSanchezIntrinsicIC($concept);
print "The Intrinsic IC of $concept is $double\n";
=cut
sub getSanchezIntrinsicIC {
my $self = shift;
my $concept = shift;

my $ic = $icfinder->_getSanchezIntrinsicIC($concept);

return $ic;
}

=head3 getProbability
description:
Expand Down
2 changes: 1 addition & 1 deletion lib/UMLS/Interface/CuiFinder.pm
@@ -1,5 +1,5 @@
# UMLS::Interface::CuiFinder
# (Last Updated $Id: CuiFinder.pm,v 1.76 2011/07/28 18:25:45 btmcinnes Exp $)
# (Last Updated $Id: CuiFinder.pm,v 1.77 2012/06/24 11:16:07 btmcinnes Exp $)
#
# Perl module that provides a perl interface to the
# Unified Medical Language System (UMLS)
Expand Down
159 changes: 157 additions & 2 deletions lib/UMLS/Interface/ICFinder.pm
@@ -1,5 +1,5 @@
# UMLS::Interface::ICFinder
# (Last Updated $Id: ICFinder.pm,v 1.26 2012/06/23 20:51:41 btmcinnes Exp $)
# (Last Updated $Id: ICFinder.pm,v 1.31 2013/04/17 13:44:57 btmcinnes Exp $)
#
# Perl module that provides a perl interface to the
# Unified Medical Language System (UMLS)
Expand Down Expand Up @@ -70,6 +70,10 @@ my $configN = 0;
my $errorhandler = "";
my $cuifinder = "";

my %leafs = ();
my %subsumers = ();
my $max_leaves = 0;

# UMLS-specific stuff ends ----------

# -------------------- Class methods start here --------------------
Expand Down Expand Up @@ -223,10 +227,161 @@ sub _getN
my $function = "_getN";
&_debug($function);

if($configN == 0) {
my $hash = $cuifinder->_getCuiList();
$configN = keys %{$hash};
}
return $configN;
}


# returns the intrinsic information content (IC) of a cui
# input : $concept <- string containing a cui
# output: $double <- double containing its IC
sub _getSecoIntrinsicIC
{
my $self = shift;
my $concept = shift;

my $function = "_getIC";
&_debug($function);

# check self
if(!defined $self || !ref $self) {
$errorhandler->_error($pkg, $function, "", 2);
}

# check concept was obtained
if(!$concept) {
$errorhandler->_error($pkg, $function, "Error with input variable \$concept.", 4);
}

# check if valid concept
if(! ($errorhandler->_validCui($concept)) ) {
$errorhandler->_error($pkg, $function, "Concept ($concept) in not valid.", 6);
}

my $children = $cuifinder->_getChildren($concept);

my $n = _getN();

my $children_num = ($#{$children}) + 2;
my $ic = 1 - ( (log($children_num)/log(10)) / (log($n)/log(10)) );

return $ic;
}

#########################################################################
# Depth First Search (DFS)
#########################################################################
sub _getSubsumers
{
my $concept = shift;
my $array = shift;

if($concept=~/^\s*$/) { return; }

# if concept is one of the following just return
#C1274012|Ambiguous concept (inactive concept)
if($concept=~/C1274012/) { return; }
#C1274013|Duplicate concept (inactive concept)
if($concept=~/C1274013/) { return; }
#C1276325|Reason not stated concept (inactive concept)
if($concept=~/C1276325/) { return; }
#C1274014|Outdated concept (inactive concept)
if($concept=~/C1274014/) { return; }
#C1274015|Erroneous concept (inactive concept)
if($concept=~/C1274015/) { return; }
#C1274021|Moved elsewhere (inactive concept)
if($concept=~/C1274021/) { return; }
#C2733115|limited status concept
if($concept=~/C2733115/) { return; }

# set the new path
my @path = @{$array};
push @path, $concept;

my $series = join " ", @path;

# get all the children
my $children = $cuifinder->_getChildren($concept);

$subsumers{$concept}++;
if($#{$children} < 0) {
$leafs{$concept}++;
}

# search through the children
foreach my $child (@{$children}) {

# check if child cui has already in the path
my $flag = 0;
foreach my $cui (@path) {
if($cui eq $child) {
$flag = 1;
}
}

# if it isn't continue on with the depth first search
if($flag == 0) {
&_getSubsumers($child, \@path);
}
}
}

# returns the intrinsic information content (IC) of a cui
# input : $concept <- string containing a cui
# output: $double <- double containing its IC
sub _getSanchezIntrinsicIC
{
my $self = shift;
my $concept = shift;

my $function = "_getIC";
&_debug($function);

# check self
if(!defined $self || !ref $self) {
$errorhandler->_error($pkg, $function, "", 2);
}

# check concept was obtained
if(!$concept) {
$errorhandler->_error($pkg, $function, "Error with input variable \$concept.", 4);
}

# check if valid concept
if(! ($errorhandler->_validCui($concept)) ) {
$errorhandler->_error($pkg, $function, "Concept ($concept) in not valid.", 6);
}

%leafs = (); %subsumers = (); my @path = ();
_getSubsumers($concept, \@path);

my $leaves = keys %leafs;
my $subsumes = keys %subsumers;

my $a = $leaves/$subsumes; $a++;
my $b = _getMaxLeaves(); $b++;

my $ic = -1 * ( (log( $a/$b )/log(10)) );

return $ic;
}

sub _getMaxLeaves {

if($max_leaves == 0) {

my @path = (); %leafs = (); %subsumers = ();
_getSubsumers($cuifinder->_root(), \@path);
$max_leaves = keys %leafs;
%leafs = (); %subsumers = ();
}

return $max_leaves;
}

# returns the information content (IC) of a cui
# input : $concept <- string containing a cui
# output: $double <- double containing its IC
Expand All @@ -252,7 +407,7 @@ sub _getIC
if(! ($errorhandler->_validCui($concept)) ) {
$errorhandler->_error($pkg, $function, "Concept ($concept) in not valid.", 6);
}

# if option frequency then the propagation hash
# hash has not been loaded and we should determine
# the information content of the concept using the
Expand Down

0 comments on commit 987780b

Please sign in to comment.