Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding SolrCloud scripts and configuration files #116

Merged
merged 5 commits into from
Jun 24, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions bin/createNewCollections.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#!/usr/bin/env bash
set -e
set -x
hostname=`hostname`
echo $hostname
hostnumber=${hostname:5:2}
hostlist="solr-master:2181,solr-worker:2181"
echo hostnumber=$hostnumber

#only do this on one server. The zookeepers will pass it around
if [ $hostnumber -eq 1 ]; then
export JAVA_HOME=/export/java
echo "Creating Collections!"
cd /home/ralphlevan/
solrDirectory=`find . -maxdepth 1 -type d -name "solr*"`
echo solrDirectory=$solrDirectory
cd $solrDirectory
bin/solr zk upconfig -n cord -d /export/solr/node$hostnumber/configsets/cord/conf -z $hostlist

#reinstate databases already existing on /data
# for f in `find /export/solr/node$hostnumber/data -name index`
# do
# name=$(cut -d/ -f3 <<<"${f}")
# db=$(cut -d- -f1 <<<"${name}")
# week=$(cut -d- -f3 <<<"${name}")
# if [[ $((CurrentWeek)) < $((week)) ]]
# then
# #happy new year!
# collectionName=$db$LastYear$week
# else
# collectionName=$db$CurrentYear$week
# fi
# echo collection: $collectionName
# echo "bin/solr create_collection -c $collectionName -n $name -shards 4 -replicationFactor 1"
# bin/solr create_collection -c $collectionName -n $name -shards 4 -replicationFactor 1
# done

fi

46 changes: 46 additions & 0 deletions bin/deploySolrCloud.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#!/usr/bin/env bash
set -x
set -e
echo "Hello, World!"
hostname=`hostname`
echo $hostname
hostnumber=${hostname:5:2}
hostlist="solr-master:2181,solr-worker:2181"
echo hostnumber=$hostnumber
wget http://apache.mirrors.hoobly.com/lucene/solr/8.5.2/solr-8.5.2.tgz -O solr-8.5.2.tgz
tar --overwrite -zxf solr*tgz
solrDirectory=`find . -maxdepth 1 -type d -name "solr*"`
echo solrDirectory=$solrDirectory
cd $solrDirectory
#sed -i.bak 's|<dataDir>${solr.data.dir:}</dataDir>|<dataDir>/data</dataDir>|' $solrDirectory/server/solr/configsets/_default/conf/solrconfig.xml
sed -i.bak 's|#SOLR_JAVA_MEM="-Xms512m -Xmx512m"|SOLR_JAVA_MEM="-Xms3g -Xmx3g"|' bin/solr.in.sh

mkdir -p /export/solr/node$hostnumber/configsets/cord/conf
cp server/solr/configsets/_default/conf/solrconfig.xml /export/solr/node$hostnumber/configsets/cord/conf/
cp /export/coredir/conf/DIHconfigfile.xml /export/solr/node$hostnumber/configsets/cord/conf/
sed -i.bak "s|<dataDir>/data</dataDir>|<dataDir>/export/solr/node$hostnumber/data</dataDir>|" /export/solr/node$hostnumber/configsets/cord/conf/solrconfig.xml
sed -i.bak "s| <!-- SearchHandler| <lib dir=\"\${solr.install.dir:/home/ralphlevan/solr-8.5.2}/dist/\" regex=\"solr-dataimporthandler-.*\.jar\"/>\n <requestHandler class=\"solr.DataImportHandler\" name=\"/dataimport\">\n <lst name=\"defaults\">\n <str name=\"config\">DIHconfigfile.xml</str>\n </lst>\n</requestHandler>\n <!-- SearchHandler|" /export/solr/node$hostnumber/configsets/cord/conf/solrconfig.xml
cp /export/conf/cord-managed-schema /export/solr/node$hostnumber/configsets/cord/conf/managed-schema
cp -rp server/solr/configsets/_default/conf/lang/ /export/solr/node$hostnumber/configsets/cord/conf/
cp server/solr/configsets/_default/conf/*.txt /export/solr/node$hostnumber/configsets/cord/conf/
mkdir -p /export/solr/node$hostnumber/data
cp server/solr/solr.xml /export/solr/node$hostnumber/data

wget https://repo1.maven.org/maven2/org/xerial/sqlite-jdbc/3.30.1/sqlite-jdbc-3.30.1.jar -O server/solr-webapp/webapp/WEB-INF/lib/sqlite-jdbc-3.30.1.jar
#mv Solr-LCCN-plugin*.jar $solrDirectory/server/solr-webapp/webapp/WEB-INF/lib
cd ..


#only the first three hosts get zookeepers
if [ $hostnumber -lt 4 ]; then
wget http://apache.mirrors.hoobly.com/zookeeper/zookeeper-3.6.1/apache-zookeeper-3.6.1-bin.tar.gz -O apache-zookeeper-3.6.1-bin.tar.gz
tar --overwrite -zxf apache-zookeeper*tar.gz
zookeeperDirectory=`find . -maxdepth 1 -type d -name "apache-zookeeper*"`
echo zookeeperDirectory=$zookeeperDirectory
if [ ! -d $zookeeperDirectory/data ]; then
mkdir $zookeeperDirectory/data
fi
echo $hostnumber>$zookeeperDirectory/data/myid
mv $zookeeperDirectory/conf/zoo_sample.cfg $zookeeperDirectory/conf/zoo.cfg
sed -i.bak "s|dataDir=/tmp/zookeeper|dataDir=/home/ralphlevan/$zookeeperDirectory/data\nserver.1=solr-master:2888:3888\nserver.2=solr-worker:2888:3888|" $zookeeperDirectory/conf/zoo.cfg
fi
47 changes: 47 additions & 0 deletions bin/deploySolrNode.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/usr/bin/env bash
set -x
set -e
echo "Hello, World!"
hostname=`hostname`
echo $hostname
hostnumber=${hostname:5:2}
hostlist="solr-master:2181,solr-worker:2181"
echo hostnumber=$hostnumber
wget http://apache.mirrors.hoobly.com/lucene/solr/8.5.2/solr-8.5.2.tgz -O solr-8.5.2.tgz
tar --overwrite -zxf solr*tgz
solrDirectory=`find . -maxdepth 1 -type d -name "solr*"`
echo solrDirectory=$solrDirectory
cd $solrDirectory
#sed -i.bak 's|<dataDir>${solr.data.dir:}</dataDir>|<dataDir>/data</dataDir>|' $solrDirectory/server/solr/configsets/_default/conf/solrconfig.xml
sed -i.bak 's|#SOLR_JAVA_MEM="-Xms512m -Xmx512m"|SOLR_JAVA_MEM="-Xms3g -Xmx3g"|' bin/solr.in.sh

mkdir -p /export/solr/node$hostnumber/configsets/cord/conf
cp server/solr/configsets/_default/conf/solrconfig.xml /export/solr/node$hostnumber/configsets/cord/conf/
cp /export/coredir/conf/DIHconfigfile.xml /export/solr/node$hostnumber/configsets/cord/conf/
sed -i.bak "s|<dataDir>/data</dataDir>|<dataDir>/export/solr/node$hostnumber/data</dataDir>|" /export/solr/node$hostnumber/configsets/cord/conf/solrconfig.xml
sed -i.bak "s| <!-- SearchHandler| <lib dir=\"\${solr.install.dir:/home/ralphlevan/solr-8.5.2}/dist/\" regex=\"solr-dataimporthandler-.*\.jar\"/>\n <requestHandler class=\"solr.DataImportHandler\" name=\"/dataimport\">\n <lst name=\"defaults\">\n <str name=\"config\">DIHconfigfile.xml</str>\n </lst>\n</requestHandler>\n <!-- SearchHandler|" /export/solr/node$hostnumber/configsets/cord/conf/solrconfig.xml
cp /export/conf/cord-managed-schema /export/solr/node$hostnumber/configsets/cord/conf/managed-schema
cp -rp server/solr/configsets/_default/conf/lang/ /export/solr/node$hostnumber/configsets/cord/conf/
cp server/solr/configsets/_default/conf/*.txt /export/solr/node$hostnumber/configsets/cord/conf/
mkdir -p /export/solr/node$hostnumber/data
cp server/solr/solr.xml /export/solr/node$hostnumber/data

wget https://repo1.maven.org/maven2/org/xerial/sqlite-jdbc/3.30.1/sqlite-jdbc-3.30.1.jar -O server/solr-webapp/webapp/WEB-INF/lib/sqlite-jdbc-3.30.1.jar
#mv Solr-LCCN-plugin*.jar $solrDirectory/server/solr-webapp/webapp/WEB-INF/lib
cd ..


#only the first three hosts get zookeepers
if [ $hostnumber -lt 4 ]; then
wget http://apache.mirrors.hoobly.com/zookeeper/zookeeper-3.6.1/apache-zookeeper-3.6.1-bin.tar.gz -O apache-zookeeper-3.6.1-bin.tar.gz
tar --overwrite -zxf apache-zookeeper*tar.gz
zookeeperDirectory=`find . -maxdepth 1 -type d -name "apache-zookeeper*"`
echo zookeeperDirectory=$zookeeperDirectory
if [ ! -d $zookeeperDirectory/data ]; then
mkdir $zookeeperDirectory/data
fi
echo $hostnumber>$zookeeperDirectory/data/myid
mv $zookeeperDirectory/conf/zoo_sample.cfg $zookeeperDirectory/conf/zoo.cfg
sed -i.bak "s|dataDir=/tmp/zookeeper|dataDir=/home/ralphlevan/$zookeeperDirectory/data\nserver.1=solr-master:2888:3888\nserver.2=solr-worker:2888:3888|" $zookeeperDirectory/conf/zoo.cfg
fi

Empty file added bin/solrCloudHostlist.txt
Empty file.
71 changes: 71 additions & 0 deletions bin/solrfields.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#!/bin/bash
INDEX=http://localhost:8983/solr/cord/schema

echo "adding field: carrels"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"carrels", "type":"text_general", "multiValued":true, "stored":true}}' $INDEX
echo "adding field: authors"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"authors", "type":"text_general", "multiValued":true, "stored":true}}' $INDEX
echo "adding field: keywords"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"keywords", "type":"text_general", "multiValued":true, "stored":true}}' $INDEX
echo "adding field: sources"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"sources", "type":"text_general", "multiValued":true, "stored":true}}' $INDEX
echo "adding field: urls"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"urls", "type":"text_general", "multiValued":true, "stored":true}}' $INDEX
echo "adding field: title"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"title", "type":"text_general", "multiValued":false, "stored":true}}' $INDEX
echo "adding field: date"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"date", "type":"text_general", "multiValued":false, "stored":true}}' $INDEX
echo "adding field: year"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"year", "type":"pint", "multiValued":false, "stored":true}}' $INDEX
echo "adding field: journal"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"journal", "type":"text_general", "multiValued":false, "stored":true}}' $INDEX
echo "adding field: source"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"source", "type":"text_general", "multiValued":false, "stored":true}}' $INDEX
echo "adding field: abstract"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"abstract", "type":"text_general", "multiValued":false, "stored":true}}' $INDEX
echo "adding field: license"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"license", "type":"text_general", "multiValued":false, "stored":true}}' $INDEX
echo "adding field: pdf_json"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"pdf_json", "type":"string", "multiValued":false, "stored":false}}' $INDEX
echo "adding field: pmc_json"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"pmc_json", "type":"string", "multiValued":false, "stored":false}}' $INDEX
echo "adding field: sha"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"sha", "type":"string", "multiValued":false, "stored":false}}' $INDEX
echo "adding field: doi"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"doi", "type":"string", "multiValued":false, "stored":false}}' $INDEX
echo "adding field: arxiv_id"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"arxiv_id", "type":"string", "multiValued":false, "stored":true}}' $INDEX
echo "adding field: cord_uid"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"cord_uid", "type":"string", "multiValued":false, "stored":true}}' $INDEX
echo "adding field: mag_id"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"mag_id", "type":"string", "multiValued":false, "stored":false}}' $INDEX
echo "adding field: pmc_id"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"pmc_id", "type":"string", "multiValued":false, "stored":true}}' $INDEX
echo "adding field: pubmed_id"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"pubmed_id", "type":"string", "multiValued":false, "stored":true}}' $INDEX
echo "adding field: who_id"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"who_id", "type":"string", "multiValued":false, "stored":false}}' $INDEX
echo "adding field: fulltext"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"fulltext", "type":"text_general", "multiValued":false, "stored":false}}' $INDEX
echo "adding field: entity"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"entity", "type":"text_general", "multiValued":true, "stored":true}}' $INDEX
echo "adding field: type"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"type", "type":"text_general", "multiValued":true, "stored":true}}' $INDEX
echo "adding facet: authors"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"facet_authors", "type":"string", "multiValued":true, "stored":true, "omitTermFreqAndPositions":true}}' $INDEX
echo "adding facet: journal"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"facet_journal", "type":"string", "multiValued":false, "stored":true, "omitTermFreqAndPositions":true}}' $INDEX
echo "adding facet: sources"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"facet_sources", "type":"string", "multiValued":true, "stored":true, "omitTermFreqAndPositions":true}}' $INDEX
echo "adding facet: urls"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"facet_urls", "type":"string", "multiValued":true, "stored":true, "omitTermFreqAndPositions":true}}' $INDEX
echo "adding facet: keywords"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"facet_keywords", "type":"string", "multiValued":true, "stored":true, "omitTermFreqAndPositions":true}}' $INDEX
echo "adding facet: license"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"facet_license", "type":"string", "multiValued":false, "stored":true, "omitTermFreqAndPositions":true}}' $INDEX
echo "adding facet: entity"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"facet_entity", "type":"string", "multiValued":true, "stored":true, "omitTermFreqAndPositions":true}}' $INDEX
echo "adding facet: type"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-field": {"name":"facet_type", "type":"string", "multiValued":true, "stored":true, "omitTermFreqAndPositions":true}}' $INDEX
echo "adding catch-all field: _text_"
curl -X POST -H 'Content-type:application/json' --data-binary '{"add-copy-field" : {"source":"*","dest":"_text_"}}' $INDEX
1 change: 1 addition & 0 deletions bin/startSolrCloud.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
parallel --tag --nonall --slf /export/coredir/solrCloudHostlist.txt "/export/coredir/startSolrNode.sh"
30 changes: 30 additions & 0 deletions bin/startSolrNode.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/usr/bin/env bash
set -e
set -x

hostname=`hostname`
echo $hostname
hostnumber=${hostname:5:2}
hostlist="solr-master:2181,solr-worker:2181"
echo hostnumber=$hostnumber
export JAVA_HOME=/export/java

if [ $hostnumber -lt 4 ]; then
echo "Hello, Zookeeper!"
cd /home/ralphlevan/
zookeeperDirectory=`find . -maxdepth 1 -type d -name "apache-zookeeper*"`
echo zookeeperDirectory=$zookeeperDirectory
cd $zookeeperDirectory
bin/zkServer.sh start
fi

echo "Hello, Solr!"
cd /home/ralphlevan/
solrDirectory=`find . -maxdepth 1 -type d -name "solr*"`
echo solrDirectory=$solrDirectory
#$solrDirectory/bin/solr -c -z $hostlist -s /prod/data/solr
cd $solrDirectory
#bin/solr start -c -z $hostlist -s /prod/viafsolrcloud/prod/solrcloud/$solrDirectory/server/solr
#cp /prod/viafsolrcloud/prod/solrcloud/$solrDirectory/server/solr/solr.xml /data
bin/solr start -c -z $hostlist -s /export/solr/node$hostnumber/data -DzkClientTimeout=600000

1 change: 1 addition & 0 deletions bin/stopSolrCloud.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
parallel --tag --nonall --slf /export/coredir/solrCloudHostlist.txt "/export/coredir/stopSolrNode.sh"
26 changes: 26 additions & 0 deletions bin/stopSolrNode.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/usr/bin/env bash
set -e
echo "Hello, Solr!"
cd /home/ralphlevan/

solrDirectory=`find . -maxdepth 1 -type d -name "solr*"`
echo solrDirectory=$solrDirectory
export JAVA_HOME=/export/java
cd $solrDirectory
bin/solr stop -all

hostname=`hostname`
echo $hostname
hostnumber=${hostname:5:2}
hostlist="solr-master:2181,solr-worker:2181"
echo hostnumber=$hostnumber

if [ $hostnumber -lt 4 ]; then
echo "Hello, Zookeeper!"
cd /home/ralphlevan/
zookeeperDirectory=`find . -maxdepth 1 -type d -name "apache-zookeeper*"`
echo zookeeperDirectory=$zookeeperDirectory
cd $zookeeperDirectory
bin/zkServer.sh stop
fi

11 changes: 11 additions & 0 deletions bin/testSolrCloud.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
while read host; do
echo "$host"
result=`wget -qO- http://$host:8983/solr | head -1`
if (echo $result | grep html 1>/dev/null 2>&1);
then
echo success
else
echo failure
echo response: $result
fi
done </export/coredir/solrCloudHostlist.txt
Loading