Skip to content

Commit

Permalink
Merge pull request #116 from ericleasemorgan/solr-cloud
Browse files Browse the repository at this point in the history
Adding SolrCloud scripts and configuration files
  • Loading branch information
ralphlevan authored Jun 24, 2020
2 parents 201f730 + 62c60e8 commit 7dce185
Show file tree
Hide file tree
Showing 11 changed files with 378 additions and 0 deletions.
39 changes: 39 additions & 0 deletions bin/createNewCollections.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#!/usr/bin/env bash
set -e
set -x
hostname=`hostname`
echo $hostname
hostnumber=${hostname:5:2}
hostlist="solr-master:2181,solr-worker:2181"
echo hostnumber=$hostnumber

#only do this on one server. The zookeepers will pass it around
if [ $hostnumber -eq 1 ]; then
export JAVA_HOME=/export/java
echo "Creating Collections!"
cd /home/ralphlevan/
solrDirectory=`find . -maxdepth 1 -type d -name "solr*"`
echo solrDirectory=$solrDirectory
cd $solrDirectory
bin/solr zk upconfig -n cord -d /export/solr/node$hostnumber/configsets/cord/conf -z $hostlist

#reinstate databases already existing on /data
# for f in `find /export/solr/node$hostnumber/data -name index`
# do
# name=$(cut -d/ -f3 <<<"${f}")
# db=$(cut -d- -f1 <<<"${name}")
# week=$(cut -d- -f3 <<<"${name}")
# if [[ $((CurrentWeek)) < $((week)) ]]
# then
# #happy new year!
# collectionName=$db$LastYear$week
# else
# collectionName=$db$CurrentYear$week
# fi
# echo collection: $collectionName
# echo "bin/solr create_collection -c $collectionName -n $name -shards 4 -replicationFactor 1"
# bin/solr create_collection -c $collectionName -n $name -shards 4 -replicationFactor 1
# done

fi

46 changes: 46 additions & 0 deletions bin/deploySolrCloud.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#!/usr/bin/env bash
set -x
set -e
echo "Hello, World!"
hostname=`hostname`
echo $hostname
hostnumber=${hostname:5:2}
hostlist="solr-master:2181,solr-worker:2181"
echo hostnumber=$hostnumber
wget http://apache.mirrors.hoobly.com/lucene/solr/8.5.2/solr-8.5.2.tgz -O solr-8.5.2.tgz
tar --overwrite -zxf solr*tgz
solrDirectory=`find . -maxdepth 1 -type d -name "solr*"`
echo solrDirectory=$solrDirectory
cd $solrDirectory
#sed -i.bak 's|<dataDir>${solr.data.dir:}</dataDir>|<dataDir>/data</dataDir>|' $solrDirectory/server/solr/configsets/_default/conf/solrconfig.xml
sed -i.bak 's|#SOLR_JAVA_MEM="-Xms512m -Xmx512m"|SOLR_JAVA_MEM="-Xms3g -Xmx3g"|' bin/solr.in.sh

mkdir -p /export/solr/node$hostnumber/configsets/cord/conf
cp server/solr/configsets/_default/conf/solrconfig.xml /export/solr/node$hostnumber/configsets/cord/conf/
cp /export/coredir/conf/DIHconfigfile.xml /export/solr/node$hostnumber/configsets/cord/conf/
sed -i.bak "s|<dataDir>/data</dataDir>|<dataDir>/export/solr/node$hostnumber/data</dataDir>|" /export/solr/node$hostnumber/configsets/cord/conf/solrconfig.xml
sed -i.bak "s| <!-- SearchHandler| <lib dir=\"\${solr.install.dir:/home/ralphlevan/solr-8.5.2}/dist/\" regex=\"solr-dataimporthandler-.*\.jar\"/>\n <requestHandler class=\"solr.DataImportHandler\" name=\"/dataimport\">\n <lst name=\"defaults\">\n <str name=\"config\">DIHconfigfile.xml</str>\n </lst>\n</requestHandler>\n <!-- SearchHandler|" /export/solr/node$hostnumber/configsets/cord/conf/solrconfig.xml
cp /export/conf/cord-managed-schema /export/solr/node$hostnumber/configsets/cord/conf/managed-schema
cp -rp server/solr/configsets/_default/conf/lang/ /export/solr/node$hostnumber/configsets/cord/conf/
cp server/solr/configsets/_default/conf/*.txt /export/solr/node$hostnumber/configsets/cord/conf/
mkdir -p /export/solr/node$hostnumber/data
cp server/solr/solr.xml /export/solr/node$hostnumber/data

wget https://repo1.maven.org/maven2/org/xerial/sqlite-jdbc/3.30.1/sqlite-jdbc-3.30.1.jar -O server/solr-webapp/webapp/WEB-INF/lib/sqlite-jdbc-3.30.1.jar
#mv Solr-LCCN-plugin*.jar $solrDirectory/server/solr-webapp/webapp/WEB-INF/lib
cd ..


#only the first three hosts get zookeepers
if [ $hostnumber -lt 4 ]; then
wget http://apache.mirrors.hoobly.com/zookeeper/zookeeper-3.6.1/apache-zookeeper-3.6.1-bin.tar.gz -O apache-zookeeper-3.6.1-bin.tar.gz
tar --overwrite -zxf apache-zookeeper*tar.gz
zookeeperDirectory=`find . -maxdepth 1 -type d -name "apache-zookeeper*"`
echo zookeeperDirectory=$zookeeperDirectory
if [ ! -d $zookeeperDirectory/data ]; then
mkdir $zookeeperDirectory/data
fi
echo $hostnumber>$zookeeperDirectory/data/myid
mv $zookeeperDirectory/conf/zoo_sample.cfg $zookeeperDirectory/conf/zoo.cfg
sed -i.bak "s|dataDir=/tmp/zookeeper|dataDir=/home/ralphlevan/$zookeeperDirectory/data\nserver.1=solr-master:2888:3888\nserver.2=solr-worker:2888:3888|" $zookeeperDirectory/conf/zoo.cfg
fi
47 changes: 47 additions & 0 deletions bin/deploySolrNode.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/usr/bin/env bash
set -x
set -e
echo "Hello, World!"
hostname=`hostname`
echo $hostname
hostnumber=${hostname:5:2}
hostlist="solr-master:2181,solr-worker:2181"
echo hostnumber=$hostnumber
wget http://apache.mirrors.hoobly.com/lucene/solr/8.5.2/solr-8.5.2.tgz -O solr-8.5.2.tgz
tar --overwrite -zxf solr*tgz
solrDirectory=`find . -maxdepth 1 -type d -name "solr*"`
echo solrDirectory=$solrDirectory
cd $solrDirectory
#sed -i.bak 's|<dataDir>${solr.data.dir:}</dataDir>|<dataDir>/data</dataDir>|' $solrDirectory/server/solr/configsets/_default/conf/solrconfig.xml
sed -i.bak 's|#SOLR_JAVA_MEM="-Xms512m -Xmx512m"|SOLR_JAVA_MEM="-Xms3g -Xmx3g"|' bin/solr.in.sh

mkdir -p /export/solr/node$hostnumber/configsets/cord/conf
cp server/solr/configsets/_default/conf/solrconfig.xml /export/solr/node$hostnumber/configsets/cord/conf/
cp /export/coredir/conf/DIHconfigfile.xml /export/solr/node$hostnumber/configsets/cord/conf/
sed -i.bak "s|<dataDir>/data</dataDir>|<dataDir>/export/solr/node$hostnumber/data</dataDir>|" /export/solr/node$hostnumber/configsets/cord/conf/solrconfig.xml
sed -i.bak "s| <!-- SearchHandler| <lib dir=\"\${solr.install.dir:/home/ralphlevan/solr-8.5.2}/dist/\" regex=\"solr-dataimporthandler-.*\.jar\"/>\n <requestHandler class=\"solr.DataImportHandler\" name=\"/dataimport\">\n <lst name=\"defaults\">\n <str name=\"config\">DIHconfigfile.xml</str>\n </lst>\n</requestHandler>\n <!-- SearchHandler|" /export/solr/node$hostnumber/configsets/cord/conf/solrconfig.xml
cp /export/conf/cord-managed-schema /export/solr/node$hostnumber/configsets/cord/conf/managed-schema
cp -rp server/solr/configsets/_default/conf/lang/ /export/solr/node$hostnumber/configsets/cord/conf/
cp server/solr/configsets/_default/conf/*.txt /export/solr/node$hostnumber/configsets/cord/conf/
mkdir -p /export/solr/node$hostnumber/data
cp server/solr/solr.xml /export/solr/node$hostnumber/data

wget https://repo1.maven.org/maven2/org/xerial/sqlite-jdbc/3.30.1/sqlite-jdbc-3.30.1.jar -O server/solr-webapp/webapp/WEB-INF/lib/sqlite-jdbc-3.30.1.jar
#mv Solr-LCCN-plugin*.jar $solrDirectory/server/solr-webapp/webapp/WEB-INF/lib
cd ..


#only the first three hosts get zookeepers
if [ $hostnumber -lt 4 ]; then
wget http://apache.mirrors.hoobly.com/zookeeper/zookeeper-3.6.1/apache-zookeeper-3.6.1-bin.tar.gz -O apache-zookeeper-3.6.1-bin.tar.gz
tar --overwrite -zxf apache-zookeeper*tar.gz
zookeeperDirectory=`find . -maxdepth 1 -type d -name "apache-zookeeper*"`
echo zookeeperDirectory=$zookeeperDirectory
if [ ! -d $zookeeperDirectory/data ]; then
mkdir $zookeeperDirectory/data
fi
echo $hostnumber>$zookeeperDirectory/data/myid
mv $zookeeperDirectory/conf/zoo_sample.cfg $zookeeperDirectory/conf/zoo.cfg
sed -i.bak "s|dataDir=/tmp/zookeeper|dataDir=/home/ralphlevan/$zookeeperDirectory/data\nserver.1=solr-master:2888:3888\nserver.2=solr-worker:2888:3888|" $zookeeperDirectory/conf/zoo.cfg
fi

Empty file added bin/solrCloudHostlist.txt
Empty file.
71 changes: 71 additions & 0 deletions bin/solrfields.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#!/bin/bash
INDEX=http://localhost:8983/solr/cord/schema

echo "adding field: carrels"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"carrels", "type":"text_general", "multiValued":true, "stored":true}}' $INDEX
echo "adding field: authors"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"authors", "type":"text_general", "multiValued":true, "stored":true}}' $INDEX
echo "adding field: keywords"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"keywords", "type":"text_general", "multiValued":true, "stored":true}}' $INDEX
echo "adding field: sources"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"sources", "type":"text_general", "multiValued":true, "stored":true}}' $INDEX
echo "adding field: urls"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"urls", "type":"text_general", "multiValued":true, "stored":true}}' $INDEX
echo "adding field: title"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"title", "type":"text_general", "multiValued":false, "stored":true}}' $INDEX
echo "adding field: date"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"date", "type":"text_general", "multiValued":false, "stored":true}}' $INDEX
echo "adding field: year"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"year", "type":"pint", "multiValued":false, "stored":true}}' $INDEX
echo "adding field: journal"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"journal", "type":"text_general", "multiValued":false, "stored":true}}' $INDEX
echo "adding field: source"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"source", "type":"text_general", "multiValued":false, "stored":true}}' $INDEX
echo "adding field: abstract"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"abstract", "type":"text_general", "multiValued":false, "stored":true}}' $INDEX
echo "adding field: license"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"license", "type":"text_general", "multiValued":false, "stored":true}}' $INDEX
echo "adding field: pdf_json"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"pdf_json", "type":"string", "multiValued":false, "stored":false}}' $INDEX
echo "adding field: pmc_json"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"pmc_json", "type":"string", "multiValued":false, "stored":false}}' $INDEX
echo "adding field: sha"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"sha", "type":"string", "multiValued":false, "stored":false}}' $INDEX
echo "adding field: doi"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"doi", "type":"string", "multiValued":false, "stored":false}}' $INDEX
echo "adding field: arxiv_id"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"arxiv_id", "type":"string", "multiValued":false, "stored":true}}' $INDEX
echo "adding field: cord_uid"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"cord_uid", "type":"string", "multiValued":false, "stored":true}}' $INDEX
echo "adding field: mag_id"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"mag_id", "type":"string", "multiValued":false, "stored":false}}' $INDEX
echo "adding field: pmc_id"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"pmc_id", "type":"string", "multiValued":false, "stored":true}}' $INDEX
echo "adding field: pubmed_id"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"pubmed_id", "type":"string", "multiValued":false, "stored":true}}' $INDEX
echo "adding field: who_id"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"who_id", "type":"string", "multiValued":false, "stored":false}}' $INDEX
echo "adding field: fulltext"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"fulltext", "type":"text_general", "multiValued":false, "stored":false}}' $INDEX
echo "adding field: entity"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"entity", "type":"text_general", "multiValued":true, "stored":true}}' $INDEX
echo "adding field: type"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"type", "type":"text_general", "multiValued":true, "stored":true}}' $INDEX
echo "adding facet: authors"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"facet_authors", "type":"string", "multiValued":true, "stored":true, "omitTermFreqAndPositions":true}}' $INDEX
echo "adding facet: journal"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"facet_journal", "type":"string", "multiValued":false, "stored":true, "omitTermFreqAndPositions":true}}' $INDEX
echo "adding facet: sources"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"facet_sources", "type":"string", "multiValued":true, "stored":true, "omitTermFreqAndPositions":true}}' $INDEX
echo "adding facet: urls"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"facet_urls", "type":"string", "multiValued":true, "stored":true, "omitTermFreqAndPositions":true}}' $INDEX
echo "adding facet: keywords"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"facet_keywords", "type":"string", "multiValued":true, "stored":true, "omitTermFreqAndPositions":true}}' $INDEX
echo "adding facet: license"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"facet_license", "type":"string", "multiValued":false, "stored":true, "omitTermFreqAndPositions":true}}' $INDEX
echo "adding facet: entity"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"facet_entity", "type":"string", "multiValued":true, "stored":true, "omitTermFreqAndPositions":true}}' $INDEX
echo "adding facet: type"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"facet_type", "type":"string", "multiValued":true, "stored":true, "omitTermFreqAndPositions":true}}' $INDEX
echo "adding catch-all field: _text_"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-copy-field" : {"source":"*","dest":"_text_"}}' $INDEX
1 change: 1 addition & 0 deletions bin/startSolrCloud.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
parallel --tag --nonall --slf /export/coredir/solrCloudHostlist.txt "/export/coredir/startSolrNode.sh"
30 changes: 30 additions & 0 deletions bin/startSolrNode.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/usr/bin/env bash
set -e
set -x

hostname=`hostname`
echo $hostname
hostnumber=${hostname:5:2}
hostlist="solr-master:2181,solr-worker:2181"
echo hostnumber=$hostnumber
export JAVA_HOME=/export/java

if [ $hostnumber -lt 4 ]; then
echo "Hello, Zookeeper!"
cd /home/ralphlevan/
zookeeperDirectory=`find . -maxdepth 1 -type d -name "apache-zookeeper*"`
echo zookeeperDirectory=$zookeeperDirectory
cd $zookeeperDirectory
bin/zkServer.sh start
fi

echo "Hello, Solr!"
cd /home/ralphlevan/
solrDirectory=`find . -maxdepth 1 -type d -name "solr*"`
echo solrDirectory=$solrDirectory
#$solrDirectory/bin/solr -c -z $hostlist -s /prod/data/solr
cd $solrDirectory
#bin/solr start -c -z $hostlist -s /prod/viafsolrcloud/prod/solrcloud/$solrDirectory/server/solr
#cp /prod/viafsolrcloud/prod/solrcloud/$solrDirectory/server/solr/solr.xml /data
bin/solr start -c -z $hostlist -s /export/solr/node$hostnumber/data -DzkClientTimeout=600000

1 change: 1 addition & 0 deletions bin/stopSolrCloud.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
parallel --tag --nonall --slf /export/coredir/solrCloudHostlist.txt "/export/coredir/stopSolrNode.sh"
26 changes: 26 additions & 0 deletions bin/stopSolrNode.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/usr/bin/env bash
set -e
echo "Hello, Solr!"
cd /home/ralphlevan/

solrDirectory=`find . -maxdepth 1 -type d -name "solr*"`
echo solrDirectory=$solrDirectory
export JAVA_HOME=/export/java
cd $solrDirectory
bin/solr stop -all

hostname=`hostname`
echo $hostname
hostnumber=${hostname:5:2}
hostlist="solr-master:2181,solr-worker:2181"
echo hostnumber=$hostnumber

if [ $hostnumber -lt 4 ]; then
echo "Hello, Zookeeper!"
cd /home/ralphlevan/
zookeeperDirectory=`find . -maxdepth 1 -type d -name "apache-zookeeper*"`
echo zookeeperDirectory=$zookeeperDirectory
cd $zookeeperDirectory
bin/zkServer.sh stop
fi

11 changes: 11 additions & 0 deletions bin/testSolrCloud.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
while read host; do
echo "$host"
result=`wget -qO- http://$host:8983/solr | head -1`
if (echo $result | grep html 1>/dev/null 2>&1);
then
echo success
else
echo failure
echo response: $result
fi
done </export/coredir/solrCloudHostlist.txt
Loading

0 comments on commit 7dce185

Please sign in to comment.