diff --git a/exception_lists/check_rtime b/exception_lists/check_rtime index 05eae9f23ab8..1de5d2d078c2 100644 --- a/exception_lists/check_rtime +++ b/exception_lists/check_rtime @@ -92,7 +92,6 @@ STAB ^platform/.*/MACH(kernel)/unix$ # Files that are allowed undefined references UNDEF_REF ^usr/lib/libnisdb\.so\.2$ -UNDEF_REF ^usr/snadm/lib/libsvm\.so\.1$ # Objects allowed to have unused dependencies UNUSED_DEPS ^usr/lib/picl/plugins/ # require devtree dependencies diff --git a/exception_lists/closed-bins b/exception_lists/closed-bins index 8a7ecbc0665b..044a2b8d81d8 100644 --- a/exception_lists/closed-bins +++ b/exception_lists/closed-bins @@ -40,6 +40,7 @@ ./usr/has ./usr/has/bin ./usr/has/bin/patch +./usr/include/sys/lvm ./usr/include/sys/pcmcia ./usr/include/sys/usb/clients/hwarc ./usr/include/sys/uwb diff --git a/exception_lists/packaging b/exception_lists/packaging index 5fba2183a2bd..54c545ecd466 100644 --- a/exception_lists/packaging +++ b/exception_lists/packaging @@ -23,9 +23,9 @@ # Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. # Copyright 2012 OmniTI Computer Consulting, Inc. All rights reserved. # Copyright 2014 Garrett D'Amore -# Copyright 2014 Nexenta Systems, Inc. All rights reserved. # Copyright 2016 Toomas Soome # Copyright 2016 Hans Rosenfeld +# Copyright 2016 Nexenta Systems, Inc. # # @@ -299,15 +299,6 @@ usr/include/sys/ontrap.h usr/include/sys/sysmsg_impl.h usr/include/sys/vlan.h # -# These files are installed in the proto area so lvm can use -# them during the build process. -# -lib/llib-lmeta -lib/llib-lmeta.ln -usr/include/sdssc.h -usr/lib/llib-lmeta -usr/lib/llib-lmeta.ln -# # non-public pci header # usr/include/sys/pci_impl.h @@ -716,9 +707,10 @@ usr/include/sys/kiconv_utf8_gb2312.h usr/include/sys/kiconv_utf8_hkscs.h usr/include/sys/kiconv_utf8_uhc.h # -# At this time, the ttydefs.cleanup file is only useful on sun4u systems +# At this time, the directory and its contents +# are only useful on sun4u systems # -etc/flash/postdeployment/ttydefs.cleanup i386 +etc/flash/postdeployment i386 # # This header file is shared only between the power commands and # ppm/srn modules # and should not be in any package diff --git a/usr/src/Makefile.lint b/usr/src/Makefile.lint index 414b87b84a9b..cb1805e5c480 100644 --- a/usr/src/Makefile.lint +++ b/usr/src/Makefile.lint @@ -22,8 +22,9 @@ # # Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. # Copyright (c) 2012 by Delphix. All rights reserved. -# Copyright 2015 Nexenta Systems, Inc. All rights reserved. # Copyright 2015 Garrett D'Amore +# Copyright 2016 Nexenta Systems, Inc. +# # include global definitions include Makefile.master @@ -193,7 +194,6 @@ COMMON_SUBDIRS = \ cmd/logins \ cmd/ls \ cmd/luxadm \ - cmd/lvm \ cmd/mailwrapper \ cmd/makekey \ cmd/mdb \ @@ -450,7 +450,6 @@ COMMON_SUBDIRS = \ lib/libzfs_jni \ lib/libzonecfg \ lib/libzoneinfo \ - lib/lvm \ lib/madv \ lib/mpss \ lib/nametoaddr \ diff --git a/usr/src/Targetdirs b/usr/src/Targetdirs index 13ac9e2798fa..2ef8bf9df174 100644 --- a/usr/src/Targetdirs +++ b/usr/src/Targetdirs @@ -26,8 +26,8 @@ # Copyright 2012 OmniTI Computer Consulting, Inc. All rights reserved. # Copyright (c) 2013 RackTop Systems. # Copyright 2014 Garrett D'Amore -# Copyright 2014 Nexenta Systems, Inc. All rights reserved. # Copyright 2016 Hans Rosenfeld +# Copyright 2016 Nexenta Systems, Inc. # # @@ -260,7 +260,6 @@ DIRS= \ /usr/lib/class/SDC \ /usr/lib/class/TS \ /usr/lib/crypto \ - /usr/lib/drv \ /usr/lib/elfedit \ /usr/lib/fm \ /usr/lib/font \ @@ -329,8 +328,6 @@ DIRS= \ /usr/share/lib/xml/dtd \ /usr/share/man \ /usr/share/src \ - /usr/snadm \ - /usr/snadm/lib \ /usr/ucb \ $(MACH32_DIRS) \ /usr/ucblib \ @@ -874,8 +871,6 @@ $(ROOT)/usr/lib/libmd.so.1:= REALPATH=../../lib/libmd.so.1 $(ROOT)/usr/lib/libmd.so:= REALPATH=../../lib/libmd.so.1 $(ROOT)/usr/lib/libmd5.so.1:= REALPATH=../../lib/libmd5.so.1 $(ROOT)/usr/lib/libmd5.so:= REALPATH=../../lib/libmd5.so.1 -$(ROOT)/usr/lib/libmeta.so.1:= REALPATH=../../lib/libmeta.so.1 -$(ROOT)/usr/lib/libmeta.so:= REALPATH=../../lib/libmeta.so.1 $(ROOT)/usr/lib/libmp.so.1:= REALPATH=../../lib/libmp.so.1 $(ROOT)/usr/lib/libmp.so.2:= REALPATH=../../lib/libmp.so.2 $(ROOT)/usr/lib/libmp.so:= REALPATH=../../lib/libmp.so.2 @@ -995,8 +990,6 @@ $(ROOT)/usr/lib/llib-lm:= REALPATH=../../lib/llib-lm $(ROOT)/usr/lib/llib-lm.ln:= REALPATH=../../lib/llib-lm.ln $(ROOT)/usr/lib/llib-lmd5.ln:= REALPATH=../../lib/llib-lmd5.ln $(ROOT)/usr/lib/llib-lmd5:= REALPATH=../../lib/llib-lmd5 -$(ROOT)/usr/lib/llib-lmeta.ln:= REALPATH=../../lib/llib-lmeta.ln -$(ROOT)/usr/lib/llib-lmeta:= REALPATH=../../lib/llib-lmeta $(ROOT)/usr/lib/llib-lnsl.ln:= REALPATH=../../lib/llib-lnsl.ln $(ROOT)/usr/lib/llib-lnsl:= REALPATH=../../lib/llib-lnsl $(ROOT)/usr/lib/llib-lnvpair.ln:= REALPATH=../../lib/llib-lnvpair.ln @@ -1505,8 +1498,6 @@ SYM.USRLIB= \ /usr/lib/libmd.so.1 \ /usr/lib/libmd5.so \ /usr/lib/libmd5.so.1 \ - /usr/lib/libmeta.so \ - /usr/lib/libmeta.so.1 \ /usr/lib/libmp.so \ /usr/lib/libmp.so.1 \ /usr/lib/libmp.so.2 \ @@ -1624,8 +1615,6 @@ SYM.USRLIB= \ /usr/lib/llib-lm.ln \ /usr/lib/llib-lmd5 \ /usr/lib/llib-lmd5.ln \ - /usr/lib/llib-lmeta \ - /usr/lib/llib-lmeta.ln \ /usr/lib/llib-lnsl \ /usr/lib/llib-lnsl.ln \ /usr/lib/llib-lnvpair \ diff --git a/usr/src/cmd/Makefile b/usr/src/cmd/Makefile index 2d34c74bbe07..331fc0c1f865 100644 --- a/usr/src/cmd/Makefile +++ b/usr/src/cmd/Makefile @@ -19,13 +19,15 @@ # CDDL HEADER END # +# # Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. -# Copyright 2015 Nexenta Systems, Inc. All rights reserved. # Copyright 2016 Joyent, Inc. # Copyright (c) 2012 by Delphix. All rights reserved. # Copyright (c) 2013 DEY Storage Systems, Inc. All rights reserved. # Copyright 2014 Garrett D'Amore # Copyright 2016 Toomas Soome +# Copyright 2016 Nexenta Systems, Inc. +# include ../Makefile.master @@ -248,7 +250,6 @@ COMMON_SUBDIRS= \ look \ ls \ luxadm \ - lvm \ mach \ mail \ mailwrapper \ @@ -619,7 +620,6 @@ MSGSUBDIRS= \ logins \ ls \ luxadm \ - lvm \ mailx \ make \ man \ diff --git a/usr/src/cmd/Makefile.check b/usr/src/cmd/Makefile.check index 96fea6b370ed..8c141358e769 100644 --- a/usr/src/cmd/Makefile.check +++ b/usr/src/cmd/Makefile.check @@ -19,8 +19,11 @@ # CDDL HEADER END # +# # Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. # Copyright 2014 Garrett D'Amore +# Copyright 2016 Nexenta Systems, Inc. +# include ../Makefile.master @@ -118,12 +121,6 @@ MANIFEST_SUBDIRS= \ krb5/kwarn \ krb5/slave \ lp/cmd/lpsched \ - lvm/rpc.mdcommd \ - lvm/rpc.metad \ - lvm/rpc.metamedd \ - lvm/rpc.metamhd \ - lvm/md_monitord \ - lvm/util \ picl/picld \ pools/poold \ print/bsd-sysv-commands \ diff --git a/usr/src/cmd/boot/bootadm/Makefile b/usr/src/cmd/boot/bootadm/Makefile index 40ad948ba275..d721aac3c728 100644 --- a/usr/src/cmd/boot/bootadm/Makefile +++ b/usr/src/cmd/boot/bootadm/Makefile @@ -21,9 +21,9 @@ # # Copyright 2009 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. -# -# Copyright 2011 Nexenta Systems, Inc. All rights reserved. +# # Copyright 2016 Toomas Soome +# Copyright 2016 Nexenta Systems, Inc. # PROG= bootadm @@ -43,7 +43,7 @@ POFILE= bootadm_cmd.po LDLIBS_i386= -lfdisk LDLIBS += -lficl-sys -lpkcs11 -lcryptoutil -lnvpair -lgen -ladm -lefi -LDLIBS += -lscf -lz -lbe -lzfs $(LDLIBS_$(MACH)) +LDLIBS += -lz -lbe -lzfs $(LDLIBS_$(MACH)) # Writing into string literals is incorrect. We need to match gcc's # behavior, which causes us to take SIGSEGV on such a write. diff --git a/usr/src/cmd/boot/bootadm/bootadm.c b/usr/src/cmd/boot/bootadm/bootadm.c index 58c1a825eca1..2b62f4910033 100644 --- a/usr/src/cmd/boot/bootadm/bootadm.c +++ b/usr/src/cmd/boot/bootadm/bootadm.c @@ -18,15 +18,13 @@ * * CDDL HEADER END */ -/* - * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - */ /* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2012 Milan Jurik. All rights reserved. - * Copyright 2015 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2015 by Delphix. All rights reserved. * Copyright 2016 Toomas Soome + * Copyright 2016 Nexenta Systems, Inc. */ /* @@ -272,7 +270,6 @@ static char *expand_path(const char *); static long s_strtol(char *); static int s_fputs(char *, FILE *); -static int is_ufs(char *root); static int is_amd64(void); static char *get_machine(void); static void append_to_flist(filelist_t *, char *); @@ -4001,30 +3998,6 @@ is_zfs(char *root) } } -static int -is_ufs(char *root) -{ - struct statvfs vfs; - int ret; - const char *fcn = "is_ufs()"; - - ret = statvfs(root, &vfs); - INJECT_ERROR1("STATVFS_UFS", ret = 1); - if (ret != 0) { - bam_error(_("statvfs failed for %s: %s\n"), root, - strerror(errno)); - return (0); - } - - if (strncmp(vfs.f_basetype, "ufs", strlen("ufs")) == 0) { - BAM_DPRINTF(("%s: is a UFS filesystem: %s\n", fcn, root)); - return (1); - } else { - BAM_DPRINTF(("%s: is *NOT* a UFS filesystem: %s\n", fcn, root)); - return (0); - } -} - int is_pcfs(char *root) { @@ -7968,236 +7941,6 @@ zfs_get_physical(char *special, char ***physarray, int *n) return (0); } -/* - * Certain services needed to run metastat successfully may not - * be enabled. Enable them now. - */ -/* - * Checks if the specified service is online - * Returns: 1 if the service is online - * 0 if the service is not online - * -1 on error - */ -static int -is_svc_online(char *svc) -{ - char *state; - const char *fcn = "is_svc_online()"; - - BAM_DPRINTF(("%s: entered. arg: %s\n", fcn, svc)); - - state = smf_get_state(svc); - INJECT_ERROR2("GET_SVC_STATE", free(state), state = NULL); - if (state == NULL) { - bam_error(_("failed to determine state of service: %s\n"), svc); - return (-1); - } - BAM_DPRINTF(("%s: got status for service: %s\n", fcn, svc)); - - if (strcmp(state, SCF_STATE_STRING_ONLINE) == 0) { - BAM_DPRINTF(("%s: service is online: %s\n", fcn, svc)); - free(state); - return (1); - } - - BAM_DPRINTF(("%s: service is *NOT* online(%s): %s\n", fcn, state, svc)); - - free(state); - - return (0); -} - -static int -enable_svc(char *svc) -{ - int ret; - int sleeptime; - const char *fcn = "enable_svc()"; - - ret = is_svc_online(svc); - if (ret == -1) { - bam_error(_("failed to determine if service is online: %s\n"), - svc); - return (-1); - } else if (ret == 1) { - BAM_DPRINTF(("%s: service is already online: %s\n", fcn, svc)); - return (0); - } - - /* Service is not enabled. Enable it now. */ - ret = smf_enable_instance(svc, 0); - INJECT_ERROR1("ENABLE_SVC_FAILED", ret = -1); - if (ret != 0) { - bam_error(_("failed to online service: %s\n"), svc); - return (-1); - } - - BAM_DPRINTF(("%s: initiated online of service: %s\n", fcn, svc)); - - sleeptime = 0; - do { - ret = is_svc_online(svc); - INJECT_ERROR1("SVC_ONLINE_SUCCESS", ret = 1); - INJECT_ERROR1("SVC_ONLINE_FAILURE", ret = -1); - INJECT_ERROR1("SVC_ONLINE_NOTYET", ret = 0); - if (ret == -1) { - bam_error(_("failed to get online status for " - "service: %s\n"), svc); - return (-1); - } else if (ret == 1) { - BAM_DPRINTF(("%s: service is NOW online: %s\n", - fcn, svc)); - return (1); - } - (void) sleep(1); - } while (++sleeptime < 60); - - bam_error(_("timed out waiting for service to online: %s\n"), svc); - - return (-1); -} - -static int -ufs_get_physical(char *special, char ***physarray, int *n) -{ - char cmd[PATH_MAX]; - char *shortname; - filelist_t flist = {0}; - char *meta; - char *type; - char *comp1; - char *comp2; - char *comp3; - char *comp4; - int i; - line_t *lp; - int ret; - char *svc; - const char *fcn = "ufs_get_physical()"; - - assert(special); - - BAM_DPRINTF(("%s: entered. arg: %s\n", fcn, special)); - - if (strncmp(special, "/dev/md/", strlen("/dev/md/")) != 0) { - bam_error(_("not a SVM metadevice: %s. Cannot derive physical " - "device\n"), special); - return (-1); - } - - if (strncmp(special, "/dev/md/dsk/", strlen("/dev/md/dsk/")) == 0) { - shortname = special + strlen("/dev/md/dsk/"); - } else if (strncmp(special, "/dev/md/rdsk/", - strlen("/dev/md/rdsk/")) == 0) { - shortname = special + strlen("/dev/md/rdsk"); - } else { - bam_error(_("invalid SVM metadevice name: %s. Cannot derive " - "physical device\n"), special); - return (-1); - } - - BAM_DPRINTF(("%s: short SVM name for special=%s is %s\n", - fcn, special, shortname)); - - svc = "network/rpc/meta:default"; - if (enable_svc(svc) == -1) { - bam_error(_("failed to start service %s for metastat " - "command\n"), svc); - } - - (void) snprintf(cmd, sizeof (cmd), "/sbin/metastat -p %s", shortname); - - ret = exec_cmd(cmd, &flist); - INJECT_ERROR1("UFS_SVM_METASTAT", ret = 1); - if (ret != 0) { - bam_error(_("metastat command failed on SVM metadevice: %s\n"), - shortname); - return (-1); - } - - INJECT_ERROR1("UFS_SVM_METASTAT_OUT", flist.head = NULL); - if (flist.head == NULL) { - bam_error(_("bad output from metastat command on SVM " - "metadevice: %s\n"), shortname); - filelist_free(&flist); - return (-1); - } - - /* - * Check if not a mirror. We only parse a single metadevice - * if not a mirror - */ - meta = strtok(flist.head->line, " \t"); - type = strtok(NULL, " \t"); - if (meta == NULL || type == NULL) { - bam_error(_("error parsing metastat output for SVM " - "metadevice: %s\n"), shortname); - filelist_free(&flist); - return (-1); - } - if (strcmp(type, "-m") != 0) { - comp1 = strtok(NULL, " \t"); - comp2 = strtok(NULL, " \t"); - if (comp1 == NULL || comp2 != NULL) { - bam_error(_("invalid fields in metastat output for " - "SVM metadevice: %s\n"), shortname); - filelist_free(&flist); - return (-1); - } - BAM_DPRINTF(("%s: single component %s for metadevice %s\n", - fcn, comp1, shortname)); - *physarray = s_calloc(1, sizeof (char *)); - (*physarray)[0] = s_strdup(comp1); - *n = 1; - filelist_free(&flist); - return (0); - } - - /* - * Okay we have a mirror. Everything after the first line - * is a submirror - */ - for (i = 0, lp = flist.head->next; lp; lp = lp->next) { - if (strstr(lp->line, "/dev/dsk/") == NULL && - strstr(lp->line, "/dev/rdsk/") == NULL) { - bam_error(_("cannot parse output of metastat command " - "for metadevice: %s\n"), shortname); - filelist_free(&flist); - return (-1); - } - i++; - } - - *physarray = s_calloc(i, sizeof (char *)); - *n = i; - - for (i = 0, lp = flist.head->next; lp; lp = lp->next) { - comp1 = strtok(lp->line, " \t"); - comp2 = strtok(NULL, " \t"); - comp3 = strtok(NULL, " \t"); - comp4 = strtok(NULL, " \t"); - - if (comp3 == NULL || comp4 == NULL || - (strncmp(comp4, "/dev/dsk/", strlen("/dev/dsk/")) != 0 && - strncmp(comp4, "/dev/rdsk/", strlen("/dev/rdsk/")) != 0)) { - bam_error(_("cannot parse submirror line in metastat " - "output for metadevice: %s\n"), shortname); - filelist_free(&flist); - free_physarray(*physarray, *n); - return (-1); - } - - (*physarray)[i++] = s_strdup(comp4); - } - - assert(i == *n); - - filelist_free(&flist); - - BAM_DPRINTF(("%s: returning SUCCESS\n", fcn)); - return (0); -} - static int get_physical(char *menu_root, char ***physarray, int *n) { @@ -8237,8 +7980,6 @@ get_physical(char *menu_root, char ***physarray, int *n) if (is_zfs(menu_root)) { ret = zfs_get_physical(special, physarray, n); - } else if (is_ufs(menu_root)) { - ret = ufs_get_physical(special, physarray, n); } else { bam_error(_("cannot derive physical device for %s (%s), " "unsupported filesystem\n"), menu_root, special); diff --git a/usr/src/cmd/boot/scripts/update_grub.ksh b/usr/src/cmd/boot/scripts/update_grub.ksh index 7799e7053f0d..8148717e1c4f 100644 --- a/usr/src/cmd/boot/scripts/update_grub.ksh +++ b/usr/src/cmd/boot/scripts/update_grub.ksh @@ -23,6 +23,7 @@ # # Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. +# Copyright 2016 Nexenta Systems, Inc. # PATH="/usr/bin:/usr/sbin:${PATH}"; export PATH @@ -70,7 +71,7 @@ check_zfs_boot() } # -# Detect SVM root and return the list of raw devices under the mirror +# Return the list of raw devices # get_rootdev_list() { @@ -90,14 +91,10 @@ get_rootdev_list() egrep -v "mirror|spare|replacing" | sed -n -e '/--/q' -e p | awk '{print $1}'` else - metadev=`grep -v "^#" "$ALT_ROOT"/etc/vfstab | \ - grep "[ ]/[ ]" | nawk '{print $2}'` - if [[ $metadev = /dev/rdsk/* ]]; then - rootdevlist=`basename "$metadev"` - elif [[ $metadev = /dev/md/rdsk/* ]]; then - metavol=`basename "$metadev"` - rootdevlist=`metastat -p $metavol |\ - grep -v "^$metavol[ ]" | nawk '{print $4}'` + dev=`grep -v "^#" "$ALT_ROOT"/etc/vfstab | \ + grep "[ ]/[ ]" | nawk '{print $2}'` + if [[ $dev = /dev/rdsk/* ]]; then + rootdevlist=`basename "$dev"` fi fi for rootdev in $rootdevlist diff --git a/usr/src/cmd/devfsadm/Makefile.com b/usr/src/cmd/devfsadm/Makefile.com index 4df3b005855d..1585db289423 100644 --- a/usr/src/cmd/devfsadm/Makefile.com +++ b/usr/src/cmd/devfsadm/Makefile.com @@ -67,7 +67,6 @@ LINK_OBJS_CMN = \ fssnap_link.o \ sgen_link.o \ smp_link.o \ - md_link.o \ dtrace_link.o \ vscan_link.o \ zfs_link.o \ diff --git a/usr/src/cmd/devfsadm/md_link.c b/usr/src/cmd/devfsadm/md_link.c deleted file mode 100644 index f187a2f2a027..000000000000 --- a/usr/src/cmd/devfsadm/md_link.c +++ /dev/null @@ -1,200 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define MD_LINK_RE_DEVICES "^md/r?dsk/.+$" -#define MD_LINK_RE_SHARED "^md/shared/[0-9]+/r?dsk/.+$" -#define MD_LINK_RE_ADMIN "^md/admin" - -/* - * The devfsadm link module require the next section to - * be defined in order to know what and when to call functions - * in the module on device creation and removal. - */ - -/* setup for device creation */ - -static int md_create(di_minor_t minor, di_node_t node); - -static devfsadm_create_t md_cbt[] = { - { "pseudo", "ddi_pseudo", "md", - TYPE_EXACT | DRV_EXACT, ILEVEL_0, md_create, - }, -}; - -DEVFSADM_CREATE_INIT_V0(md_cbt); - -/* - * remove devices - always allow disks to be dynamically removed. Only allow - * admin device to be removed at reboot. - */ - -static devfsadm_remove_t md_remove_cbt[] = { - {"pseudo", MD_LINK_RE_DEVICES, RM_ALWAYS | RM_PRE | RM_HOT, - ILEVEL_0, devfsadm_rm_all}, - {"pseudo", MD_LINK_RE_SHARED, RM_ALWAYS | RM_PRE | RM_HOT, - ILEVEL_0, devfsadm_rm_all}, - {"pseudo", MD_LINK_RE_ADMIN, RM_ALWAYS | RM_PRE, - ILEVEL_0, devfsadm_rm_all}, -}; - -DEVFSADM_REMOVE_INIT_V0(md_remove_cbt); - - -/* - * minor_fini - module cleanup routine - */ -int -minor_fini(void) -{ - metarpccloseall(); - return (DEVFSADM_SUCCESS); -} - -/* - * For the admin device: - * /dev/md/admin -> /devices/pseudo/md@0:admin - * - * For metadevice: - * /dev/md/dsk/foobar --> /devices/pseudo/md@0:0,100,blk - * /dev/md/rdsk/foobar --> /devices/pseudo/md@0:0,100,raw - * - * Where 'foobar' is user specified arbitrary name and '100' - * is the minor number returned by MD_IOCMAKE_DEV ioctl - * - */ -static int -md_create(di_minor_t minor, di_node_t node) -{ - char mn[MAXNAMELEN + 1]; - char path[PATH_MAX + 1]; - char set_path[PATH_MAX +1]; - char sym_path[PATH_MAX + 1]; - int set = -1, ret; - char *type, *dir; - char *device_name; - dev_t minor_devt = di_minor_devt(minor); - int key; - mdsetname_t *sp = NULL; - md_error_t ep; - - /* - * Initialize sdssc entry points. Don't worry about the return - * value here since the interface functions will get initialized - * correctly regardless. - */ - (void) sdssc_bind_library(); - - (void) strcpy(mn, di_minor_name(minor)); - - /* - * Check whether we are being requested to setup the admin - * device link or one of the metadevice links. They need - * to be treated differently. - */ - - if (strcmp(mn, "admin") == 0) { - /* there is only one admin link and always in /dev/md/admin */ - (void) devfsadm_mklink("md/admin", node, minor, 0); - } else { - /* - * Extract out the minor components and create the - * appropriate links. The node looks like: - * md@,, - * where the number is the named diskset, - * is the metadevice number, and - * is the trailing "blk" or "raw" indication. - * - * NOTE: when is non-zero, we need to create - * under the "shared" directory entry instead of linking - * into the top level dsk/rdsk directories. - */ - ret = sscanf(mn, "%d,", &set); - if (ret == 1 && (type = strrchr(mn, ',')) != NULL) { - type++; - if (strcmp(type, "blk") == 0) { - dir = "dsk"; - } else { - dir = "rdsk"; - } - - (void) memset(&ep, '\0', sizeof (ep)); - if ((device_name = meta_getnmentbydev(set, - MD_SIDEWILD, minor_devt, NULL, NULL, - &key, &ep)) == NULL) { - (void) close_admin(&ep); - return (DEVFSADM_CONTINUE); - } - - if (set == 0) { - /* this is a simple md */ - (void) snprintf(path, sizeof (path), - "md/%s/%s", dir, basename(device_name)); - } else { - /* this is a shared md */ - (void) snprintf(path, sizeof (path), - "md/shared/%d/%s/%s", set, dir, - basename(device_name)); - - /* - * flush the caches so the next call to - * metasetnosetname will get us the - * updated cache. - */ - metaflushnames(0); - if ((sp = metasetnosetname(set, &ep)) - != NULL) { - (void) snprintf(set_path, - sizeof (set_path), "md/shared/%d", - sp->setno); - (void) snprintf(sym_path, - sizeof (sym_path), "md/%s", - sp->setname); - } - } - (void) devfsadm_mklink(path, node, minor, 0); - Free(device_name); - - if (sp != NULL) { - (void) devfsadm_secondary_link(sym_path, - set_path, 0); - } - } - } - - (void) close_admin(&ep); - return (DEVFSADM_CONTINUE); -} diff --git a/usr/src/cmd/fs.d/ufs/mkfs/mkfs.c b/usr/src/cmd/fs.d/ufs/mkfs/mkfs.c index 7d77e7a8b92b..a3464f9b5c95 100644 --- a/usr/src/cmd/fs.d/ufs/mkfs/mkfs.c +++ b/usr/src/cmd/fs.d/ufs/mkfs/mkfs.c @@ -544,12 +544,9 @@ int label_type; /* * logging support */ -int ismdd; /* true if device is a SVM device */ -int islog; /* true if ufs or SVM logging is enabled */ -int islogok; /* true if ufs/SVM log state is good */ - -static int isufslog; /* true if ufs logging is enabled */ -static int waslog; /* true when ufs logging disabled during grow */ +int islog; /* true if ufs logging is enabled */ +int islogok; /* true if ufs log state is good */ +int waslog; /* true when ufs logging disabled during grow */ /* * growfs defines, globals, and forward references @@ -631,7 +628,6 @@ main(int argc, char *argv[]) char *special; struct statvfs64 fs; struct dk_geom dkg; - struct dk_cinfo dkcinfo; struct dk_minfo dkminfo; char pbuf[sizeof (uint64_t) * 3 + 1]; char *tmpbuf; @@ -1373,27 +1369,10 @@ main(int argc, char *argv[]) /* * get the controller info */ - ismdd = 0; islog = 0; islogok = 0; waslog = 0; - if (ioctl(fsi, DKIOCINFO, &dkcinfo) == 0) - /* - * if it is an MDD (disksuite) device - */ - if (dkcinfo.dki_ctype == DKC_MD) { - ismdd++; - /* - * check the logging device - */ - if (ioctl(fsi, _FIOISLOG, NULL) == 0) { - islog++; - if (ioctl(fsi, _FIOISLOGOK, NULL) == 0) - islogok++; - } - } - /* * Do not grow the file system, but print on stdout the maximum * size in sectors to which the file system can be increased. @@ -3997,7 +3976,7 @@ checksblock(struct fs sb, int proceed) /* * Roll the embedded log, if any, and set up the global variables - * islog, islogok and isufslog. + * islog and islogok. */ static void logsetup(char *devstr) @@ -4014,17 +3993,14 @@ logsetup(char *devstr) /* * No log present, nothing to do. */ - islogok = 0; islog = 0; - isufslog = 0; + islogok = 0; return; } else { /* * There's a log in a yet unknown state, attempt to roll it. */ - islog = 1; islogok = 0; - isufslog = 0; /* * We failed to roll the log, bail out. @@ -4032,7 +4008,7 @@ logsetup(char *devstr) if (rl_roll_log(devstr) != RL_SUCCESS) return; - isufslog = 1; + islog = 1; /* log is not okay; check the fs */ if ((FSOKAY != (sblock.fs_state + sblock.fs_time)) || @@ -4140,7 +4116,7 @@ growinit(char *devstr) /* * disable ufs logging for growing */ - if (isufslog) { + if (islog) { if (rl_log_control(devstr, _FIOLOGDISABLE) != RL_SUCCESS) { (void) fprintf(stderr, gettext( "failed to disable logging\n")); diff --git a/usr/src/cmd/iscsid/iscsid.c b/usr/src/cmd/iscsid/iscsid.c index b8ffaab4b59e..463e3cae9671 100644 --- a/usr/src/cmd/iscsid/iscsid.c +++ b/usr/src/cmd/iscsid/iscsid.c @@ -38,7 +38,6 @@ #include #include #include -#include #include #include #include diff --git a/usr/src/cmd/lvm/Makefile b/usr/src/cmd/lvm/Makefile deleted file mode 100644 index f655679dfd48..000000000000 --- a/usr/src/cmd/lvm/Makefile +++ /dev/null @@ -1,72 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2004 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# -# Makefile definitions for volume management -# -# -# cmd/lvm/Makefile -# - -include ../Makefile.cmd - -SUBDIRS = md_monitord util rpc.metad rpc.metamhd rpc.metamedd etc -SUBDIRS += rpc.mdcommd metassist -POSUBDIRS = md_monitord util rpc.metad rpc.mdcommd metassist - -CAT= cat - -# -# for messaging catalog -# -POFILE= SUNW_MD.po -POFILES= $(POSUBDIRS:%=%/%p.po) - -all := TARGET= all -install := TARGET= install -clean := TARGET= clean -clobber := TARGET= clobber -lint := TARGET= lint -cstyle := TARGET= cstyle -_msg := TARGET= catalog - -.KEEP_STATE: - -all install clean cstyle lint: $(SUBDIRS) - -clobber: $(SUBDIRS) - $(RM) $(CLOBBERFILES) - -$(SUBDIRS): FRC - @cd $@; pwd; $(MAKE) $(TARGET) - -$(POFILE): $(POFILES) - $(BUILDPO.pofiles) - -_msg: $(POSUBDIRS) .WAIT $(MSGDOMAINPOFILE) - -FRC: - -include $(SRC)/Makefile.msg.targ diff --git a/usr/src/cmd/lvm/Makefile.lvm b/usr/src/cmd/lvm/Makefile.lvm deleted file mode 100644 index 8b93647d9006..000000000000 --- a/usr/src/cmd/lvm/Makefile.lvm +++ /dev/null @@ -1,47 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2000-2003 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# - -# -# Libraries and utilities go here. -# -RPCGENFLAGS = -C -M $(DEFINES) -RPCGENFLAGS_SERVER = $(RPCGENFLAGS) -s circuit_n - -CPPFLAGS += -D_FILE_OFFSET_BITS=64 -CERRWARN += -_gcc=-Wno-unused-variable -CERRWARN += -_gcc=-Wno-parentheses -CERRWARN += -_gcc=-Wno-unused-label -CERRWARN += -_gcc=-Wno-uninitialized -CERRWARN += -_gcc=-Wno-switch -CERRWARN += -_gcc=-Wno-type-limits -CERRWARN += -_gcc=-Wno-unused-value - -# -# Lint flags we use for volume management. -# -LINTFLAGS += -un - -CSTYLE=cstyle diff --git a/usr/src/cmd/lvm/etc/Makefile b/usr/src/cmd/lvm/etc/Makefile deleted file mode 100644 index ec076d763577..000000000000 --- a/usr/src/cmd/lvm/etc/Makefile +++ /dev/null @@ -1,94 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# Copyright 2009 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# Makefile for logical volume management -# -# -# cmd/lvm/etc/Makefile - - -include ../../Makefile.cmd -include ../Makefile.lvm - -FLASHPRESRC= svm.save.sh -FLASHPOSTSRC= svm.cleanup.sh -FLASHPREPROG= $(FLASHPRESRC:%.sh=%) -FLASHPOSTPROG= $(FLASHPOSTSRC:%.sh=%) -LVMINITPROG= md.cf md.ctlrmap md.tab mddb.cf lock runtime.cf devpath -# -# - -all:= TARGET= all -install:= TARGET= install -clean:= TARGET= clean -clobber:= TARGET= clobber -lint:= TARGET= lint - -ETCLVMD= $(ROOTETC)/lvm -ETCFLASHD= $(ROOTETC)/flash -ETCFLASHPRED= $(ROOTETC)/flash/predeployment -ETCFLASHPOSTD= $(ROOTETC)/flash/postdeployment - -DIRS= $(ETCLVMD) $(ETCFLASHD) $(ETCFLASHPRED) $(ETCFLASHPOSTD) - -ETCLVMINITPROG= $(LVMINITPROG:%=$(ETCLVMD)/%) -ETCFLASHPREPROG= $(FLASHPREPROG:%=$(ETCFLASHPRED)/%) -ETCFLASHPOSTPROG= $(FLASHPOSTPROG:%=$(ETCFLASHPOSTD)/%) - -$(ETCLVMD) := DIRMODE= 755 -$(ETCFLASHD) := DIRMODE= 755 -$(ETCFLASHPRED) := DIRMODE= 755 -$(ETCFLASHPOSTD) := DIRMODE= 755 -$(ETCLVMINITPROG) := FILEMODE= 0644 -$(ETCFLASHPREPROG) := FILEMODE= 0744 -$(ETCFLASHPOSTPROG) := FILEMODE= 0744 - -.KEEP_STATE: - -all: $(LVMINITPROG) $(FLASHPREPROG) $(FLASHPOSTPROG) - -install: all .WAIT $(DIRS) .WAIT $(ETCLVMINITPROG) - -install: all .WAIT $(DIRS) .WAIT $(ETCLVMINITPROG) \ - $(ETCFLASHPREPROG) $(ETCFLASHPOSTPROG) - -cstyle: - -lint: - -$(DIRS): - $(INS.dir) - -$(ETCLVMD)/% : % - $(INS.file) - -$(ETCFLASHPRED)/% : % - $(INS.file) - -$(ETCFLASHPOSTD)/% : % - $(INS.file) - -clean: - -clobber: clean - $(RM) $(FLASHPREPROG) $(FLASHPOSTPROG) diff --git a/usr/src/cmd/lvm/etc/devpath b/usr/src/cmd/lvm/etc/devpath deleted file mode 100644 index 68a59ff4afaa..000000000000 --- a/usr/src/cmd/lvm/etc/devpath +++ /dev/null @@ -1,27 +0,0 @@ -# -#pragma ident "%Z%%M% %I% %E% SMI" -# Copyright 2005 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -/dev/ap/rdsk -/dev/did/rdsk diff --git a/usr/src/cmd/lvm/etc/lock b/usr/src/cmd/lvm/etc/lock deleted file mode 100644 index f97565a63099..000000000000 --- a/usr/src/cmd/lvm/etc/lock +++ /dev/null @@ -1,26 +0,0 @@ -#pragma ident "%Z%%M% %I% %E% SMI" -# -# Copyright 2005 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -#metadevice lock file - do not delete diff --git a/usr/src/cmd/lvm/etc/md.cf b/usr/src/cmd/lvm/etc/md.cf deleted file mode 100644 index cd2add72cea5..000000000000 --- a/usr/src/cmd/lvm/etc/md.cf +++ /dev/null @@ -1,27 +0,0 @@ -#pragma ident "%Z%%M% %I% %E% SMI" -# -# Copyright 2005 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# metadevice database configuration file -# do not hand edit diff --git a/usr/src/cmd/lvm/etc/md.ctlrmap b/usr/src/cmd/lvm/etc/md.ctlrmap deleted file mode 100644 index d61c337a31a9..000000000000 --- a/usr/src/cmd/lvm/etc/md.ctlrmap +++ /dev/null @@ -1,32 +0,0 @@ -# -#pragma ident "%Z%%M% %I% %E% SMI" -# Copyright 2005 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# each bus in the SSA100 -"/SUNW,pln@.*/ssd@0,[0-9]:[a-h](,raw){0,1}$" 0 0 "/SUNW,pln@" "%*04lx%04lx,%08lx/" -"/SUNW,pln@.*/ssd@1,[0-9]:[a-h](,raw){0,1}$" 0 1 "/SUNW,pln@" "%*04lx%04lx,%08lx/" -"/SUNW,pln@.*/ssd@2,[0-9]:[a-h](,raw){0,1}$" 1 2 "/SUNW,pln@" "%*04lx%04lx,%08lx/" -"/SUNW,pln@.*/ssd@3,[0-9]:[a-h](,raw){0,1}$" 1 3 "/SUNW,pln@" "%*04lx%04lx,%08lx/" -"/SUNW,pln@.*/ssd@4,[0-9]:[a-h](,raw){0,1}$" 2 4 "/SUNW,pln@" "%*04lx%04lx,%08lx/" -"/SUNW,pln@.*/ssd@5,[0-9]:[a-h](,raw){0,1}$" 2 5 "/SUNW,pln@" "%*04lx%04lx,%08lx/" diff --git a/usr/src/cmd/lvm/etc/md.tab b/usr/src/cmd/lvm/etc/md.tab deleted file mode 100644 index 4ea97cc0235e..000000000000 --- a/usr/src/cmd/lvm/etc/md.tab +++ /dev/null @@ -1,76 +0,0 @@ -# -# Copyright 2005 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# ident "%Z%%M% %I% %E% SMI" -# -# md.tab -# -# metainit utility input file. -# -# The following examples show the format for local metadevices, and a -# similar example for a shared metadevice, where appropiate. The shared -# metadevices are in the diskset named "blue": -# -# Metadevice database entry: -# -# mddb01 /dev/dsk/c0t2d0s0 /dev/dsk/c0t0d0s0 -# -# Concatenation of devices: -# -# d10 2 1 /dev/dsk/c0t2d0s0 1 /dev/dsk/c0t0d0s0 -# blue/d10 2 1 /dev/dsk/c2t2d0s0 1 /dev/dsk/c2t0d0s0 -# -# Stripe of devices: -# -# d11 1 2 /dev/dsk/c0t2d0s1 /dev/dsk/c0t0d0s1 -# blue/d11 1 2 /dev/dsk/c2t2d0s1 /dev/dsk/c2t0d0s1 -# -# Concatenation of stripes (with a hot spare pool): -# -# d13 2 2 /dev/dsk/c0t2d0s0 /dev/dsk/c0t0d0s0 \ -# 2 /dev/dsk/c0t2d0s1 /dev/dsk/c0t0d0s1 -h hsp001 -# blue/d13 2 2 /dev/dsk/c2t2d0s0 /dev/dsk/c2t0d0s0 \ -# 2 /dev/dsk/c2t2d0s1 /dev/dsk/c2t0d0s1 -h blue/hsp001 -# -# Two way mirror: -# -# d14 -m d10 d11 -# blue/d14 -m blue/d10 blue/d11 -# -# RAID of devices -# -# d15 -r /dev/dsk/c1t0d0s0 /dev/dsk/c1t1d0s0 \ -# /dev/dsk/c1t2d0s0 /dev/dsk/c1t3d0s0 -# blue/d15 -r /dev/dsk/c2t0d0s0 /dev/dsk/c2t1d0s0 \ -# /dev/dsk/c2t2d0s0 /dev/dsk/c2t3d0s0 -# -# Hot Spare Pool of devices -# -# hsp001 /dev/dsk/c1t0d0s0 -# blue/hsp001 /dev/dsk/c2t0d0s0 -# -# 100MB Soft Partition -# -# d1 -p /dev/dsk/c1t0d0s1 100M -# blue/d1 -p /dev/dsk/c2t0d0s1 100M diff --git a/usr/src/cmd/lvm/etc/mddb.cf b/usr/src/cmd/lvm/etc/mddb.cf deleted file mode 100644 index d03e0004c416..000000000000 --- a/usr/src/cmd/lvm/etc/mddb.cf +++ /dev/null @@ -1,27 +0,0 @@ -#pragma ident "%Z%%M% %I% %E% SMI" -# -# Copyright 2005 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -#metadevice database location file do not hand edit -#driver minor_t daddr_t checksum diff --git a/usr/src/cmd/lvm/etc/runtime.cf b/usr/src/cmd/lvm/etc/runtime.cf deleted file mode 100644 index a517b3a8292c..000000000000 --- a/usr/src/cmd/lvm/etc/runtime.cf +++ /dev/null @@ -1,34 +0,0 @@ -# -#pragma ident "%Z%%M% %I% %E% SMI" -# -# Copyright 2005 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# runtime parameter configuration file -# -# CAUTION: This file implements a contract private interface. -# Do *NOT* edit it unless using that interface. -# Errors in the content of this file will probably -# cause data loss on systems running DiskSuite. -# -ownerioctls=on diff --git a/usr/src/cmd/lvm/etc/svm.cleanup.sh b/usr/src/cmd/lvm/etc/svm.cleanup.sh deleted file mode 100755 index d8cbab907038..000000000000 --- a/usr/src/cmd/lvm/etc/svm.cleanup.sh +++ /dev/null @@ -1,212 +0,0 @@ -#! /usr/bin/sh -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# ident "%Z%%M% %I% %E% SMI" -# -# Copyright 2005 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# SVM Flash cleanup -# Remove existing master SVM configuration on clone after full flash install. -# Restore existing clone SVM configuation after differential flash install. -# - -IN_SYS=${FLASH_ROOT}/etc/system -IN_CONF=${FLASH_ROOT}/kernel/drv/md.conf -IN_CF=${FLASH_ROOT}/etc/lvm/mddb.cf - -TMP_SYS=/var/tmp/system.1 -TMP_CONF=/var/tmp/md.conf.1 -TMP_CF=/var/tmp/mddb.cf.1 - -# Directory where original clone config is saved. -SAVE_DIR=${FLASH_DIR}/flash/svm - -if [ "${FLASH_TYPE}" = "FULL" ]; then - # Full flash install, remove master's SVM configuration from clone. - - # Remove any SVM root entry from /etc/system file. - nawk ' - BEGIN {delroot=0} - /^\* Begin MDD root info \(do not edit\)$/ {delroot=1} - /^\* End MDD root info \(do not edit\)$/ {delroot=0; next} - {if (delroot == 0) print $0} - ' ${IN_SYS} > ${TMP_SYS} - cp ${TMP_SYS} ${IN_SYS} - - # Check if we are on the mini-root. If we are, we need to clean up the - # mddb configuration since this implies we are doing a full flash onto - # a fresh system. - # - # If we are not on the mini-root that must mean we are installing - # the full flash via live-upgrade. In that case we share the - # SVM configuration with the currently running system so we - # need to copy the md.conf file from the current root onto the - # newly installed root. Note that the flash archive might not have - # been created from the currently running system. - if [ -h /kernel/drv/md.conf ]; then - # Remove SVM mddb entries from /kernel/drv/md.conf. - nawk ' - BEGIN {delmddb=0} - /^# Begin MDD database info \(do not edit\)$/ {delmddb=1} - /^# End MDD database info \(do not edit\)$/ {delmddb=0; next} - {if (delmddb == 0) print $0} - ' ${IN_CONF} > ${TMP_CONF} - cp ${TMP_CONF} ${IN_CONF} - - # Remove SVM mddb entries from /etc/lvm/mddb.cf. - nawk ' - /^#/ {print $0} - ' ${IN_CF} > ${TMP_CF} - cp ${TMP_CF} ${IN_CF} - - else - # copy SVM config from current root to new root - cp /kernel/drv/md.conf ${IN_CONF} - cp /etc/lvm/mddb.cf ${IN_CF} - fi - - # We may need to enable the SVM services in SMF. This could happen - # if we used jumpstart or live-upgrade to create SVM volumes as - # part of the flash install. - # - # It doesn't matter if we are doing a flash install via a jumpstart - # on the mini-root or via a live-upgrade. In both cases we check - # the md.conf on the currently running root to see if SVM is - # configured. For the jumpstart case it will have setup the - # volumes already so the mini-root md.conf has the mddb info. For - # the live-upgade case both roots will be sharing the same md.conf - # and have the same view of the SVM configuration. - # - # Check if there are mddb entries in md.conf to determine if SVM is - # configured. - sed -e 's/#.*$//' /kernel/drv/md.conf | \ - egrep '^[ ]*mddb_bootlist' >/dev/null 2>&1 - MDDB_STATUS=$? - - if [ $MDDB_STATUS -eq 0 ]; then - echo "/usr/sbin/svcadm enable system/metainit:default" >> \ - ${FLASH_ROOT}/var/svc/profile/upgrade - - echo "/usr/sbin/svcadm enable system/mdmonitor:default" >> \ - ${FLASH_ROOT}/var/svc/profile/upgrade - - echo "/usr/sbin/svcadm enable network/rpc/meta:default" >> \ - ${FLASH_ROOT}/var/svc/profile/upgrade - fi - -else - # Differential flash install, restore clone SVM configuration. - # The matrix of master/clone SVM config w/ diff. flash looks like: - # - # master clone clone after differential flash - # - # yes yes same as clone prior to diff. flash - # yes no no - # no yes same as clone prior to diff. flash - # no no no - # - - # restore saved config files - cp ${SAVE_DIR}/md.conf ${FLASH_ROOT}/kernel/drv/md.conf - cp ${SAVE_DIR}/devpath ${FLASH_ROOT}/etc/lvm/devpath - cp ${SAVE_DIR}/md.cf ${FLASH_ROOT}/etc/lvm/md.cf - cp ${SAVE_DIR}/md.ctlrmap ${FLASH_ROOT}/etc/lvm/md.ctlrmap - cp ${SAVE_DIR}/md.tab ${FLASH_ROOT}/etc/lvm/md.tab - cp ${SAVE_DIR}/mddb.cf ${FLASH_ROOT}/etc/lvm/mddb.cf - cp ${SAVE_DIR}/runtime.cf ${FLASH_ROOT}/etc/lvm/runtime.cf - - # Now process the various permutations for the master and clone - # /etc/system file SVM root entries. - - # First check if we need to do anything with /etc/system. - if `cmp -s ${SAVE_DIR}/system ${IN_SYS} >/dev/null 2>&1`; then - # There is no difference so leave it alone. - exit 0; - fi - - # Get any SVM root entry from master /etc/system file. - MASTER_ROOT=`nawk ' - BEGIN {inroot=0} - /^\* Begin MDD root info \(do not edit\)$/ {inroot=1; next} - /^\* End MDD root info \(do not edit\)$/ {inroot=0} - {if (inroot == 1) print $0} - ' ${IN_SYS}` - - # Get any SVM root entry from clone /etc/system file. - CLONE_ROOT=`nawk ' - BEGIN {inroot=0} - /^\* Begin MDD root info \(do not edit\)$/ {inroot=1; next} - /^\* End MDD root info \(do not edit\)$/ {inroot=0} - {if (inroot == 1) print $0} - ' ${SAVE_DIR}/system` - - # If there is an SVM root entry in the master /etc/system file. - if [ "${MASTER_ROOT}" ]; then - - # If there is an SVM root entry in the clone /etc/system file. - if [ "${CLONE_ROOT}" ]; then - - # Restore clone SVM root entry in /etc/system file. - nawk -v clone_root="${CLONE_ROOT}" ' - BEGIN {newroot=0} - /^\* Begin MDD root info \(do not edit\)$/ { - newroot=1 - print $0 - print clone_root - } - /^\* End MDD root info \(do not edit\)$/ {newroot=0} - {if (newroot == 0) print $0} - ' ${IN_SYS} >${TMP_SYS} - cp ${TMP_SYS} ${IN_SYS} - - else - - # There is no SVM root entry in the clone so remove the entry - # from the /etc/system file. - nawk ' - BEGIN {delroot=0} - /^\* Begin MDD root info \(do not edit\)$/ {delroot=1} - /^\* End MDD root info \(do not edit\)$/ {delroot=0; next } - {if (delroot == 0) print $0} - ' ${IN_SYS} >${TMP_SYS} - cp ${TMP_SYS} ${IN_SYS} - - fi - - else - # Master has no SVM root entry in the /etc/system file. - if [ "${CLONE_ROOT}" ]; then - # But clone does have one so we need to add it back in. - - echo "* Begin MDD root info (do not edit)" >> ${IN_SYS} - echo "${CLONE_ROOT}" >> ${IN_SYS} - echo "* End MDD root info (do not edit)" >> ${IN_SYS} - fi - - # If neither master nor clone has SVM root entry then - # we just leave the system file alone. - fi -fi - -exit 0 diff --git a/usr/src/cmd/lvm/etc/svm.save.sh b/usr/src/cmd/lvm/etc/svm.save.sh deleted file mode 100755 index 84a9c396401d..000000000000 --- a/usr/src/cmd/lvm/etc/svm.save.sh +++ /dev/null @@ -1,46 +0,0 @@ -#! /usr/bin/sh -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# ident "%Z%%M% %I% %E% SMI" -# -# Copyright 2004 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# Save existing clone SVM configuation before differential flash installation. - -# directory where we save original clone config -SAVE_DIR=${FLASH_DIR}/flash/svm - -if [ "${FLASH_TYPE}" = "DIFFERENTIAL" ]; then - mkdir -p ${SAVE_DIR} - cp ${FLASH_ROOT}/kernel/drv/md.conf ${SAVE_DIR} - cp ${FLASH_ROOT}/etc/lvm/devpath ${SAVE_DIR} - cp ${FLASH_ROOT}/etc/lvm/md.cf ${SAVE_DIR} - cp ${FLASH_ROOT}/etc/lvm/md.ctlrmap ${SAVE_DIR} - cp ${FLASH_ROOT}/etc/lvm/md.tab ${SAVE_DIR} - cp ${FLASH_ROOT}/etc/lvm/mddb.cf ${SAVE_DIR} - cp ${FLASH_ROOT}/etc/lvm/runtime.cf ${SAVE_DIR} - cp ${FLASH_ROOT}/etc/system ${SAVE_DIR} -fi - -exit 0 diff --git a/usr/src/cmd/lvm/md_monitord/Makefile b/usr/src/cmd/lvm/md_monitord/Makefile deleted file mode 100644 index 33f2dce12f00..000000000000 --- a/usr/src/cmd/lvm/md_monitord/Makefile +++ /dev/null @@ -1,100 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -#pragma ident "%Z%%M% %I% %E% SMI" -# -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# - -# -# Makefile for logical volume management -# -# -# cmd/lvm/md_monitord/Makefile - -PROG= mdmonitord -MANIFEST= mdmonitor.xml -SVCMETHOD= svc-mdmonitor - -include ../../Makefile.cmd -include ../Makefile.lvm - -SUBDIRS= $(MACH) - -SRCS= probedev.c md_monitord.c - -CLOBBERFILES += $(SVCMETHOD) - -# -# i18n: For catalog we need to put messages from $(SRCS) and $(SVCMETHOD).sh -# into "`basename $PWD`p.po" = md_monitordp.po = $(POFILE) (that's what -# ../Makefile expects). We'll use $(BUILDPO.pofiles) to build $(POFILE) from -# $(MDPO) and $(SVCMETHODPO), and we'll use $(BUILDPO.msgfiles) to build -# $(MDPO) from $(SRCS). The catch is that $(BUILDPO.msgfiles) is defined to -# produce $(POFILE). Fortunately we can conditionally define POFILE = MDPO -# when building MDPO. make complains when this conditional definition is -# before we include Makefile.msg.targ, so it must lie at the end. -# -MSGFILES= $(SRCS:%.c=%.i) -MDPO= $(PROG:%=%.po) -SVCMETHODPO= $(SVCMETHOD:%=%.po) -POFILES= $(MDPO) $(SVCMETHODPO) -POFILE= md_monitordp.po - -ROOTMANIFESTDIR= $(ROOTSVCSYSTEM) - -$(ROOTSVCMETHOD): $(SVCMETHOD) - -all := TARGET = all -install := TARGET = install -clean := TARGET = clean -clobber := TARGET = clobber -lint := TARGET = lint - -.KEEP_STATE: - -all clobber lint: $(SUBDIRS) - -$(MDPO): $(MSGFILES) - $(BUILDPO.msgfiles) - -$(POFILE): $(POFILES) - $(BUILDPO.pofiles) - -catalog: $(POFILE) - -check: $(CHKMANIFEST) - -clean: $(SUBDIRS) - $(RM) $(MSGFILES) - -install: $(SUBDIRS) $(ROOTMANIFEST) $(ROOTSVCMETHOD) - -$(SUBDIRS): FRC - @cd $@; pwd; $(MAKE) $(TARGET) - -FRC: - -include ../../Makefile.targ -include $(SRC)/Makefile.msg.targ - -$(MDPO) := POFILE = $(MDPO) diff --git a/usr/src/cmd/lvm/md_monitord/i386/Makefile b/usr/src/cmd/lvm/md_monitord/i386/Makefile deleted file mode 100644 index e11cd9adea20..000000000000 --- a/usr/src/cmd/lvm/md_monitord/i386/Makefile +++ /dev/null @@ -1,71 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 1996, 2000-2003 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# -# Makefile for logical volume management -# - -PROG= mdmonitord - -OBJS= probedev.o \ - md_monitord.o -SRCS= $(OBJS:%.o=../%.c) - -include ../../../Makefile.cmd -include ../../Makefile.lvm - -LDLIBS += -lmeta - -DEFINES += -DDEBUG -CFLAGS += ${DEFINES} - -# -# -lint := LINTFLAGS += -m - -.KEEP_STATE: - -%.o: ../%.c - $(COMPILE.c) $< - -all: $(PROG) - -$(PROG): $(OBJS) - $(LINK.c) -o $@ $(OBJS) $(LDLIBS) - $(POST_PROCESS) - -ROOTUSRSBINPROG=$(PROG:%=$(ROOTUSRSBIN)/%) -install: all $(ROOTUSRSBINPROG) - -cstyle: - ${CSTYLE} ${SRCS} - -lint: - ${LINT.c} $(LINTFLAGS) ${SRCS} - -clean: - ${RM} ${OBJS} *.o - -include $(SRC)/cmd/Makefile.targ diff --git a/usr/src/cmd/lvm/md_monitord/md_monitord.c b/usr/src/cmd/lvm/md_monitord/md_monitord.c deleted file mode 100644 index e24770a9eead..000000000000 --- a/usr/src/cmd/lvm/md_monitord/md_monitord.c +++ /dev/null @@ -1,526 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* - * probedev issues ioctls for all the metadevices - */ - -#include "md_monitord.h" -#include - -extern char queue_name[]; -boolean_e issue_ioctl = True; - - -#define DEBUG_LEVEL_FORK 9 /* will run in background at all */ - /* levels less than DEBUG_LEVEL_FORK */ - -/* function prototypes */ -static void usage(void); -static void catch_sig(int); -static pid_t enter_daemon_lock(void); -static void exit_daemon_lock(void); -static void probe_all_devs(boolean_e, md_error_t *, boolean_e); - -#define DAEMON_LOCK_FILE "/etc/lvm/.mdmonitord.lock" - -/* - * Global variable - */ -mdsetname_t *sp; - -static int hold_daemon_lock; -static const char *daemon_lock_file = DAEMON_LOCK_FILE; -static int daemon_lock_fd; - -static int debug_level; -static int logflag; -static char *prog; -static struct itimerval itimer; -static boolean_e probe_started; /* flag to indicate main is probing */ - -static void -usage() { - (void) fprintf(stderr, gettext( - "usage: mdmonitord [-d ] [-t poll time]\n" - "higher debug levels get progressively more" - "detailed debug information.\n\n" - "mdmonitord will run in background if run" - "with a debug_level less than %d.\n"), DEBUG_LEVEL_FORK); - exit(-1); -} - - -/* common exit function which ensures releasing locks */ -void -monitord_exit(int status) -{ - monitord_print(1, gettext("exit status = %d\n"), status); - - monitord_print(8, "hold_daemon_lock %d\n", hold_daemon_lock); - if (hold_daemon_lock) { - exit_daemon_lock(); - } - md_exit(sp, status); -} - - -/* - * When SIGHUP is received, reload modules? - */ -void -catch_sig(int sig) -{ - boolean_e startup = False; - md_error_t status = mdnullerror; - boolean_e sig_verbose = True; - - if (sig == SIGALRM) { - monitord_print(6, gettext("SIGALRM processing")); - if (probe_started == True) { - monitord_print(6, gettext( - " probe_started returning\n")); - return; - } - monitord_print(6, gettext( - " starting probe from signal handler\n")); - probe_all_devs(startup, &status, sig_verbose); - (void) setitimer(ITIMER_REAL, &itimer, NULL); - } - if (sig == SIGHUP) - monitord_exit(sig); -} - -/* - * Use an advisory lock to ensure that only one daemon process is - * active at any point in time. - */ -static pid_t -check_daemon_lock(void) -{ - struct flock lock; - - monitord_print(1, gettext("check_daemon_lock: lock file = %s\n"), - daemon_lock_file); - - daemon_lock_fd = open(daemon_lock_file, O_CREAT|O_RDWR, 0644); - if (daemon_lock_fd < 0) { - monitord_print(0, "open(%s) - %s\n", daemon_lock_file, - strerror(errno)); - monitord_exit(-1); - } - - lock.l_type = F_WRLCK; - lock.l_whence = SEEK_SET; - lock.l_start = 0; - lock.l_len = 0; - - if (fcntl(daemon_lock_fd, F_GETLK, &lock) == -1) { - monitord_print(0, "lock(%s) - %s", daemon_lock_file, - strerror(errno)); - monitord_exit(-1); - } - - return (lock.l_type == F_UNLCK ? 0 : lock.l_pid); -} - -static pid_t -enter_daemon_lock(void) -{ - struct flock lock; - - monitord_print(1, gettext( - "enter_daemon_lock: lock file = %s\n"), daemon_lock_file); - - daemon_lock_fd = open(daemon_lock_file, O_CREAT|O_RDWR, 0644); - if (daemon_lock_fd < 0) { - monitord_print(0, "open(%s) - %s\n", - daemon_lock_file, strerror(errno)); - monitord_exit(-1); - } - - lock.l_type = F_WRLCK; - lock.l_whence = SEEK_SET; - lock.l_start = 0; - lock.l_len = 0; - - if (fcntl(daemon_lock_fd, F_SETLK, &lock) == -1) { - - if (errno == EAGAIN || errno == EDEADLK) { - - if (fcntl(daemon_lock_fd, F_GETLK, &lock) == -1) { - monitord_print(0, "lock(%s) - %s", - daemon_lock_file, strerror(errno)); - monitord_exit(-1); - } - - return (lock.l_pid); - } - } - hold_daemon_lock = 1; - - return (0); -} - -/* - * Drop the advisory daemon lock, close lock file - */ -static void -exit_daemon_lock(void) -{ - struct flock lock; - - lock.l_type = F_UNLCK; - lock.l_whence = SEEK_SET; - lock.l_start = 0; - lock.l_len = 0; - - if (fcntl(daemon_lock_fd, F_SETLK, &lock) == -1) { - monitord_print(0, "unlock(%s) - %s", - daemon_lock_file, strerror(errno)); - } - - if (close(daemon_lock_fd) == -1) { - monitord_print(0, "close(%s) failed - %s\n", - daemon_lock_file, strerror(errno)); - monitord_exit(-1); - } - (void) unlink(daemon_lock_file); -} - - -/* - * print error messages to the terminal or to syslog - */ -/*PRINTFLIKE2*/ -void -monitord_print(int level, char *message, ...) -{ - va_list ap; - static int newline = 1; - - if (level > debug_level) { - return; - } - - va_start(ap, message); - if (level == 0) { - if (logflag) { - (void) vsyslog(LOG_ERR, message, ap); - } else { - (void) vfprintf(stderr, message, ap); - } - - } else { - if (logflag) { - (void) syslog(LOG_DEBUG, "%s[%ld]: ", - prog, getpid()); - (void) vsyslog(LOG_DEBUG, message, ap); - } else { - if (newline) { - (void) fprintf(stdout, "%s[%ld]: ", - prog, getpid()); - (void) vfprintf(stdout, message, ap); - } else { - (void) vfprintf(stdout, message, ap); - } - } - } - if (message[strlen(message)-1] == '\n') { - newline = 1; - } else { - newline = 0; - } - va_end(ap); -} - - -char * -int2string(intmap_t *map, int value) -{ - const char *name = (const char *)NULL; - char charstr[100]; - - for (; map->im_name != (const char *)NULL; map++) { - if (map->im_int == value) { - name = map->im_name; - break; - } - } - if (name == (const char *)NULL) { - /* No match. Convert the string to an int. */ - (void) sprintf(charstr, "%d", value); - } else { - (void) snprintf(charstr, sizeof (charstr), "%d %s", - value, name); - } - return (strdup(charstr)); -} - -void -probe_all_devs(boolean_e startup, md_error_t *statusp, boolean_e verbose) -{ - set_t max_sets, set_idx; - - probe_started = True; - (void) set_snarf(statusp); - - if ((max_sets = get_max_sets(statusp)) == 0) { - mde_perror(statusp, gettext( - "Can't find max number of sets\n")); - monitord_exit(1); - } - - /* - * We delete the FF_Q to avoid recurse errors. Yes we will lose - * some but its the corner case. - */ - - if (startup == False && - (meta_notify_deleteq(MD_FF_Q, statusp) != 0)) { - mde_perror(statusp, gettext( - "delete queue failed\n")); - monitord_exit(1); - } - - for (set_idx = 0; set_idx < max_sets; set_idx++) { - if ((sp = metasetnosetname(set_idx, statusp)) == NULL) { - if (mdiserror(statusp, MDE_NO_SET) == 0) { - /* - * done break the loop - */ - break; - } else { - mdclrerror(statusp); - continue; - } - } - - /* if we dont have ownership or cannot lock it continue. */ - if ((meta_check_ownership(sp, statusp) == NULL) && - meta_lock(sp, TRUE, statusp)) - continue; - - /* Skip if a MN set */ - if (meta_is_mn_set(sp, statusp)) { - (void) meta_unlock(sp, statusp); - continue; - } - - probe_mirror_devs(verbose); - probe_raid_devs(verbose); - probe_trans_devs(verbose); - probe_hotspare_devs(verbose); - (void) meta_unlock(sp, statusp); - } - if (meta_notify_createq(MD_FF_Q, 0, statusp)) { - mde_perror(statusp, gettext( - "create queue failed")); - monitord_exit(1); - } - probe_started = False; - /* - * need to do it here only at startup. - * The daemon will restart the alarm. - */ - - if (startup == True) - (void) setitimer(ITIMER_REAL, &itimer, NULL); -} - -evid_t -wait_for_event(md_error_t *statusp) -{ - md_ev_t event; - - - event.setno = EV_ALLSETS; - event.obj = EV_ALLOBJS; - - do { - if (meta_notify_getev(MD_FF_Q, EVFLG_WAIT, &event, - statusp) < 0) { - monitord_print(8, - "meta_notify_getev: errno 0x%x\n", -errno); - monitord_exit(-errno); - } - } while ((event.ev != EV_IOERR && event.ev != EV_ERRED && - event.ev != EV_LASTERRED)); - return (event.ev); -} - -int -main(int argc, char **argv) -{ - boolean_e startup = True; - boolean_e verbose = False; - int i; - char c; - md_error_t status = mdnullerror; - struct sigaction act; - sigset_t mask; - unsigned long timerval = 0; - - /* - * Get the locale set up before calling any other routines - * with messages to ouput. Just in case we're not in a build - * environment, make sure that TEXT_DOMAIN gets set to - * something. - */ -#if !defined(TEXT_DOMAIN) -#define TEXT_DOMAIN "SYS_TEST" -#endif - (void) setlocale(LC_ALL, ""); - (void) textdomain(TEXT_DOMAIN); - - if (sdssc_bind_library() == SDSSC_ERROR) { - (void) printf(gettext( - "%s: Interface error with libsds_sc.so\n"), argv[0]); - exit(1); - } - - if (md_init(argc, argv, 0, 1, &status) != 0 || - meta_check_root(&status) != 0) { - mde_perror(&status, ""); - monitord_exit(1); - } - - (void) sigfillset(&mask); - (void) thr_sigsetmask(SIG_BLOCK, &mask, NULL); - - if (argc > 7) { - usage(); - } - - if ((prog = strrchr(argv[0], '/')) == NULL) { - prog = argv[0]; - } else { - prog++; - } - - /* - * Reset optind/opterr so that the command line arguments can be - * parsed. This is in case anything has already called getopt, - * for example sdssc_cmd_proxy which is not currently used but - * may be in the future. - */ - optind = 1; - opterr = 1; - while ((c = getopt(argc, argv, "ivd:t:")) != EOF) { - switch (c) { - case 'v': - verbose = True; - break; - case 'i': - issue_ioctl = True; - break; - case 'd': - debug_level = atoi(optarg); - break; - case 't': - timerval = atol(optarg); - break; - default: - usage(); - exit(0); - } - } - - if (timerval == 0) { - monitord_print(8, gettext( - "operating in interrupt mode\n")); - } else { - itimer.it_value.tv_sec = timerval; - itimer.it_interval.tv_sec = timerval; - monitord_print(8, gettext( - "set value and interval %lu sec mode\n"), timerval); - } - /* - * set up our signal handler for SIGALRM. The - * rest are setup by md_init. - */ - - act.sa_handler = catch_sig; - (void) sigemptyset(&act.sa_mask); - act.sa_flags = SA_RESTART; - (void) sigaction(SIGALRM, &act, NULL); - (void) sigaction(SIGHUP, &act, NULL); - - (void) sigemptyset(&mask); - (void) sigaddset(&mask, SIGALRM); - (void) sigaddset(&mask, SIGHUP); - (void) thr_sigsetmask(SIG_UNBLOCK, &mask, NULL); - - /* demonize ourselves */ - if (debug_level < DEBUG_LEVEL_FORK) { - pid_t pid; - - if ((pid = check_daemon_lock()) != 0) { - monitord_print(0, gettext( - "mdmonitord daemon pid %ld already running\n"), - pid); - exit(-1); - } - - if (fork()) { - exit(0); - } - - /* only one daemon can run at a time */ - if ((pid = enter_daemon_lock()) != 0) { - monitord_print(0, gettext( - "mdmonitord daemon pid %ld already running\n"), - pid); - exit(-1); - } - - (void) chdir("/"); - - (void) setsid(); - if (debug_level <= 1) { - for (i = 0; i < 3; i++) { - (void) close(i); - } - (void) open("/dev/null", 0); - (void) dup2(0, 1); - (void) dup2(0, 2); - logflag = 1; - } - } - - openlog("mdmonitord", LOG_PID, LOG_DAEMON); - - monitord_print(8, gettext( - "mdmonitord started, debug level = %d\n"), debug_level); - - - /* loop forever waiting for events */ - do { - metaflushnames(1); - probe_all_devs(startup, &status, verbose); - startup = False; /* since we have gone through once */ - } while (wait_for_event(&status)); - return (0); -} diff --git a/usr/src/cmd/lvm/md_monitord/md_monitord.h b/usr/src/cmd/lvm/md_monitord/md_monitord.h deleted file mode 100644 index 7c19e46aa109..000000000000 --- a/usr/src/cmd/lvm/md_monitord/md_monitord.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 1999-2002 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _MD_MONITORD_H -#define _MD_MONITORD_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#ifdef __cplusplus -extern "C" { -#endif - -#define _REENTRANT -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - /* #include */ -#include - /* #include */ -#include -#include -#include - -#define MD_FF_Q "md_failfast_queue_01" -#define MD_FF_TAG 1 -#define INTMAP(n) {n, #n} -#define CCNULL ((const char *)0) - -enum boolean { True, False }; -typedef enum boolean boolean_e; - -typedef struct intmap { - int im_int; - const char *im_name; -} intmap_t; - -extern boolean_e issue_ioctl; -extern mdsetname_t *sp; -void monitord_exit(int status); -void monitord_print(int level, char *message, ...); -void probe_mirror_devs(boolean_e verbose); -void probe_raid_devs(boolean_e verbose); -void probe_trans_devs(boolean_e verbose); -void probe_hotspare_devs(boolean_e verbose); -#ifdef __cplusplus -} -#endif - -#endif /* _MD_MONITORD_H */ diff --git a/usr/src/cmd/lvm/md_monitord/mdmonitor.xml b/usr/src/cmd/lvm/md_monitord/mdmonitor.xml deleted file mode 100644 index afa984d64232..000000000000 --- a/usr/src/cmd/lvm/md_monitord/mdmonitor.xml +++ /dev/null @@ -1,114 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/usr/src/cmd/lvm/md_monitord/probedev.c b/usr/src/cmd/lvm/md_monitord/probedev.c deleted file mode 100644 index b2d94caedb4f..000000000000 --- a/usr/src/cmd/lvm/md_monitord/probedev.c +++ /dev/null @@ -1,525 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - - -#include "md_monitord.h" - -#define MD_PROBE_OPEN_T "probe open test" - -/* - * Failure return's a 1 - */ -int -hotspare_ok(char *bname) -{ - int fd; - char buf[512]; - - if ((fd = open(bname, O_RDONLY)) < 0) - return (0); - if (read(fd, buf, sizeof (buf)) < 0) { - (void) close(fd); - return (0); - } - (void) close(fd); - return (1); -} - -void -delete_hotspares_impl(mdhspname_t *hspnp, md_hsp_t *hspp, boolean_e verbose) -{ - md_hs_t *hsp; - uint_t hsi; - char *cname, *bname, *hs_state; - md_error_t e = mdnullerror; - int deleted_hs = 0; - - for (hsi = 0; (hsi < hspp->hotspares.hotspares_len); ++hsi) { - mdnamelist_t *nlp; - - hsp = &hspp->hotspares.hotspares_val[hsi]; - if (verbose == True) - monitord_print(6, "hsi %d\n", hsi); - cname = hsp->hsnamep->cname; - bname = hsp->hsnamep->bname; - nlp = NULL; - (void) metanamelist_append(&nlp, hsp->hsnamep); - hs_state = hs_state_to_name(hsp, NULL); - /* print hotspare */ - if (verbose == True) - monitord_print(6, "\t%-19s\t%-19s\t%-12s\n", - cname, bname, hs_state); - if (hsp->state == HSS_AVAILABLE) { - if (hotspare_ok(bname)) - continue; - - monitord_print(6, gettext( - "NOTICE: Hotspare %s in %s has failed.\n" - "\tDeleting %s since it is not in use\n\n"), - bname, hspnp->hspname, bname); - - if (meta_hs_delete(sp, hspnp, nlp, 0, &e) != NULL) { - mde_perror(&e, ""); - mdclrerror(&e); - } else { - deleted_hs++; - } - } else { - if (verbose == True) - monitord_print(6, gettext( - "%s in use - skipping\n"), cname); - } - } -} - - - -/* - * Generic routine to issue probe ioctls - */ - -int -md_probe_ioctl(mdnamelist_t *nlp, int ndevs, char *drvname, boolean_e verbose) -{ - mdnamelist_t *p; - mdname_t *np; - md_probedev_t probe_ioc, *iocp; - int i, retval = 0; - /* - * Allocate space for all the metadevices and fill in - * the minor numbers. - */ - - (void) memset(&probe_ioc, 0, sizeof (probe_ioc)); - iocp = &probe_ioc; - - if ((iocp->mnum_list = (uintptr_t)calloc(ndevs, sizeof (minor_t))) - == 0) { - monitord_print(0, "md_probe_ioctl: calloc"); - return (-1); - } - - (void) strcpy(iocp->test_name, MD_PROBE_OPEN_T); - MD_SETDRIVERNAME(&probe_ioc, drvname, sp->setno); - - if (verbose == True) { - monitord_print(6, "\n\nmd_probe_ioctl: %s: %s\n", - (strcmp(sp->setname, MD_LOCAL_NAME) == 0) ? - gettext("local_set") : - sp->setname, iocp->md_driver.md_drivername); - } - - iocp->nmdevs = ndevs; - if (verbose == True) - monitord_print(6, "...ndevs 0x%x\n", ndevs); - - for (p = nlp, i = 0; p; p = p->next, i++) { - np = p->namep; - ((minor_t *)(uintptr_t)iocp->mnum_list)[i] = - meta_getminor(np->dev); - if (verbose == True) - monitord_print(6, "...%s 0x%lx\n", np->cname, - ((minor_t *)(uintptr_t)iocp->mnum_list)[i]); - } - - - if (issue_ioctl == True) { - if (metaioctl(MD_IOCPROBE_DEV, iocp, &(iocp->mde), NULL) != 0) - retval = -1; - } - - Free((void *)(uintptr_t)iocp->mnum_list); - return (retval); -} -/* - * - * - remove p from nlp list - * - put it on the toplp list. - * - update the p to the next element - */ - -void -add_to_list(mdnamelist_t **curpp, mdnamelist_t **prevpp, - mdnamelist_t **newlpp) -{ - mdnamelist_t *p, *prevp, *nlp; - - p = *curpp; - prevp = *prevpp; - nlp = *newlpp; - - if (prevp == p) { - /* if first element reset prevp */ - prevp = p->next; - p->next = nlp; - nlp = p; - p = prevp; - } else { - prevp->next = p->next; - p->next = nlp; - nlp = p; - p = prevp->next; - } - *curpp = p; - *prevpp = prevp; - *newlpp = nlp; -} -/* - * Scans the given list of metadeivces and returns a list of top level - * metadevices. - * Note: The orignal list is not valid at the end and is set to NULL. - */ - -int -get_toplevel_mds(mdnamelist_t **lpp, mdnamelist_t **top_pp, boolean_e verbose) -{ - mdnamelist_t *p, *prevp, *toplp; - int ntopmd, i; - md_common_t *mdp; - md_error_t e = mdnullerror; - - i = ntopmd = 0; - prevp = p = *lpp; - toplp = NULL; - - while (p) { - if ((mdp = meta_get_unit(sp, p->namep, &e)) == NULL) { - mdclrerror(&e); - if (verbose == True) - monitord_print(6, gettext( - "......error on (%d)%s\n"), i, - p->namep->devicesname); - prevp = p; - p = p->next; - continue; - } - - if (mdp->parent == MD_NO_PARENT) { - /* increment the top level md count. */ - ntopmd++; - add_to_list(&p, &prevp, &toplp); - } else { - prevp = p; - p = p->next; - } - i++; - } - - *lpp = NULL; - *top_pp = toplp; - - return (ntopmd); -} - -int -get_namelist(mdnamelist_t **transdevlist, mdnamelist_t **devlist, - char *dev_type) -{ - mdnamelist_t *np, *prevp; - md_error_t e = mdnullerror; - char *type_name; - int i = 0; - - prevp = np = *transdevlist; - while (np) { - if ((type_name = metagetmiscname(np->namep, &e)) == NULL) { - *devlist = NULL; - mdclrerror(&e); - return (-1); - } - if (strcmp(type_name, dev_type) == 0) { - /* move it to the devlist */ - add_to_list(&np, &prevp, devlist); - i++; - } else { - prevp = np; - np = np->next; - } - } - return (i); -} - - -mdnamelist_t * -create_nlp() -{ - mdnamelist_t *np; - - if (np = (mdnamelist_t *)malloc(sizeof (mdnamelist_t))) { - np->next = NULL; - return (np); - } else { - /* error condition below */ - monitord_print(0, gettext( - "create_nlp: malloc failed\n")); - monitord_exit(errno); - } - return (0); -} - -/* - * Create a list of metadevices associated with trans. top_pp points to - * this list. The number of components in the list are also returned. - */ -int -create_trans_compslist(mdnamelist_t **lpp, mdnamelist_t **top_pp, - boolean_e verbose) -{ - mdnamelist_t *p, *tailp, *toplp, *newlp; - int ntoptrans; - md_error_t e = mdnullerror; - md_trans_t *tp; - - ntoptrans = 0; - p = *lpp; - tailp = toplp = NULL; - /* - * Scan the current list of trans devices. From that - * extract all the lower level metadevices and put them on - * toplp list. - */ - - while (p) { - if (tp = meta_get_trans(sp, p->namep, &e)) { - /* - * Check the master and log devices to see if they - * are metadevices - */ - if (metaismeta(tp->masternamep)) { - if (verbose == True) - monitord_print(6, gettext( - "master metadevice\n")); - /* get a mdnamelist_t. */ - newlp = create_nlp(); - newlp->namep = tp->masternamep; - if (toplp == NULL) { - toplp = tailp = newlp; - } else { - tailp->next = newlp; - tailp = newlp; - } - ntoptrans++; - } - - if (tp->lognamep && metaismeta(tp->lognamep)) { - if (verbose == True) - monitord_print(6, gettext( - "log metadevice\n")); - newlp = create_nlp(); - newlp->namep = tp->lognamep; - if (toplp == NULL) { - toplp = tailp = newlp; - } else { - tailp->next = newlp; - tailp = newlp; - } - ntoptrans++; - } - p = p->next; - } else { - mdclrerror(&e); - } - } - *top_pp = toplp; - return (ntoptrans); -} - -void -probe_mirror_devs(boolean_e verbose) -{ - mdnamelist_t *nlp, *toplp; - int cnt; - md_error_t e = mdnullerror; - - nlp = toplp = NULL; - - if (meta_get_mirror_names(sp, &nlp, 0, &e) > 0) { - /* - * We have some mirrors to probe - * get a list of top-level mirrors - */ - - cnt = get_toplevel_mds(&nlp, &toplp, verbose); - if (cnt && (md_probe_ioctl(toplp, cnt, - MD_MIRROR, verbose) < 0)) - monitord_print(0, gettext( - "probe_mirror_devs: " - "mirror components %d ioctl error\n"), - cnt); - - } else { - mdclrerror(&e); - } - - metafreenamelist(nlp); - metafreenamelist(toplp); -} - -void -probe_raid_devs(boolean_e verbose) -{ - mdnamelist_t *nlp, *toplp; - int cnt; - md_error_t e = mdnullerror; - - nlp = toplp = NULL; - - if (meta_get_raid_names(sp, &nlp, 0, &e) > 0) { - /* - * We have some mirrors to probe - * get a list of top-level mirrors - */ - - cnt = get_toplevel_mds(&nlp, &toplp, verbose); - - if (cnt && (md_probe_ioctl(toplp, cnt, - MD_RAID, verbose) < 0)) - monitord_print(0, gettext( - "probe_raid_devs: " - "RAID-5 components %d ioctl error\n"), - cnt); - } else { - mdclrerror(&e); - } - - metafreenamelist(nlp); - metafreenamelist(toplp); -} - -/* - * Trans probes are different. -- so whats new. - * we separate out the master and log device and then issue the - * probe calls. - * Since the underlying device could be disk, stripe, RAID or miror, - * we have to sort them out and then call the ioctl for each. - */ - -void -probe_trans_devs(boolean_e verbose) -{ - mdnamelist_t *nlp, *toplp; - mdnamelist_t *trans_raidlp, *trans_mmlp, *trans_stripelp; - int cnt; - md_error_t e = mdnullerror; - - nlp = toplp = NULL; - trans_raidlp = trans_mmlp = trans_stripelp = NULL; - - if (meta_get_trans_names(sp, &nlp, 0, &e) > 0) { - /* - * get a list of master and log metadevices. - */ - - cnt = create_trans_compslist(&nlp, &toplp, verbose); - if (verbose == True) { - int i; - - for (i = 0, nlp = toplp; i < cnt; i++) { - monitord_print(6, gettext( - "tran: underlying drv %s\n"), - (nlp->namep)->cname); - nlp = nlp->next; - } - } - - /* underlying RAID-5 components */ - - cnt = get_namelist(&toplp, &trans_raidlp, MD_RAID); - if ((cnt > 0) && (md_probe_ioctl(trans_raidlp, cnt, - MD_RAID, verbose) < 0)) - monitord_print(0, gettext( - "probe_trans_devs: " - "RAID-5 components %d ioctl error\n"), - cnt); - metafreenamelist(trans_raidlp); - - /* underlying mirror components */ - - cnt = get_namelist(&toplp, &trans_mmlp, MD_MIRROR); - - if ((cnt > 0) && (md_probe_ioctl(trans_mmlp, cnt, - MD_MIRROR, verbose) < 0)) - monitord_print(0, gettext( - "probe_trans_devs: " - "mirror components %d ioctl error\n"), - cnt); - metafreenamelist(trans_mmlp); - - /* underlying stripe components */ - - cnt = get_namelist(&toplp, &trans_stripelp, MD_STRIPE); - if ((cnt > 0) && (md_probe_ioctl(trans_stripelp, cnt, - MD_STRIPE, verbose) < 0)) - monitord_print(0, gettext( - "probe_trans_devs: " - "stripe components %d ioctl error\n"), - cnt); - - metafreenamelist(trans_stripelp); - metafreenamelist(nlp); - } else { - mdclrerror(&e); - } -} - -/* - * probe hot spares. This is differs from other approaches since - * there are no read/write routines through md. We check at the physical - * component level and then delete it if its bad. - */ - -void -probe_hotspare_devs(boolean_e verbose) -{ - mdhspnamelist_t *hspnlp = NULL; - mdhspnamelist_t *p; - md_hsp_t *hspp; - md_error_t e = mdnullerror; - - if (meta_get_hsp_names(sp, &hspnlp, 0, &e) <= 0) { - mdclrerror(&e); - return; - } - - for (p = hspnlp; (p != NULL); p = p->next) { - mdhspname_t *hspnp = p->hspnamep; - - if (verbose == True) - monitord_print(6, "%s %s\n", gettext("name"), - hspnp->hspname); - - if ((hspp = meta_get_hsp(sp, hspnp, &e)) == NULL) - continue; - - if (hspp->hotspares.hotspares_len != 0) { - if (verbose == True) - monitord_print(6, " %u hotspares\n", - hspp->hotspares.hotspares_len); - delete_hotspares_impl(hspnp, hspp, verbose); - } - } - mdclrerror(&e); - metafreehspnamelist(hspnlp); -} diff --git a/usr/src/cmd/lvm/md_monitord/sparc/Makefile b/usr/src/cmd/lvm/md_monitord/sparc/Makefile deleted file mode 100644 index f9db2aa8b55a..000000000000 --- a/usr/src/cmd/lvm/md_monitord/sparc/Makefile +++ /dev/null @@ -1,70 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 1996, 2000-2003 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# Makefile for logical volume management -# - -PROG= mdmonitord - -OBJS= probedev.o \ - md_monitord.o -SRCS= $(OBJS:%.o=../%.c) - -include ../../../Makefile.cmd -include ../../Makefile.lvm - -LDLIBS += -lmeta - -DEFINES += -DDEBUG -CFLAGS += ${DEFINES} - -# -# -lint := LINTFLAGS += -m - -.KEEP_STATE: - -%.o: ../%.c - $(COMPILE.c) $< - -all: $(PROG) - -$(PROG): $(OBJS) - $(LINK.c) -o $@ $(OBJS) $(LDLIBS) - $(POST_PROCESS) - -ROOTUSRSBINPROG=$(PROG:%=$(ROOTUSRSBIN)/%) -install: all $(ROOTUSRSBINPROG) - -cstyle: - ${CSTYLE} ${SRCS} - -lint: - ${LINT.c} $(LINTFLAGS) ${SRCS} - -clean: - ${RM} ${OBJS} *.o - -include $(SRC)/cmd/Makefile.targ diff --git a/usr/src/cmd/lvm/md_monitord/svc-mdmonitor.sh b/usr/src/cmd/lvm/md_monitord/svc-mdmonitor.sh deleted file mode 100644 index 1d0a74653181..000000000000 --- a/usr/src/cmd/lvm/md_monitord/svc-mdmonitor.sh +++ /dev/null @@ -1,48 +0,0 @@ -#!/bin/sh -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# ident "%Z%%M% %I% %E% SMI" -# -# Start mdmonitord. - -MDMONITORD=/usr/sbin/mdmonitord - -. /lib/svc/share/smf_include.sh - -if [ ! -x $MDMONITORD ]; then - echo "$MDMONITORD is missing or not executable." - exit $SMF_EXIT_ERR_CONFIG -fi - -$MDMONITORD -error=$? -case $error in -0) exit 0 - ;; - -*) echo "Could not start $MDMONITORD. Error $error." - exit $SMF_EXIT_ERR_FATAL - ;; -esac diff --git a/usr/src/cmd/lvm/metassist/Makefile b/usr/src/cmd/lvm/metassist/Makefile deleted file mode 100644 index 418bdeb5b967..000000000000 --- a/usr/src/cmd/lvm/metassist/Makefile +++ /dev/null @@ -1,87 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2003 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# ident "%Z%%M% %I% %E% SMI" -# - -METASSIST_TOPLEVEL = . - -PROG = metassist - -# Subdirectories with message catalogs -POSUBDIRS = \ - common \ - xml \ - layout \ - controller - -# For message catalog -POFILES = $(POSUBDIRS:%=%/%p.po) - -# Subdirectories that must be made -SUBDIRS = \ - scripts \ - sysfiles \ - $(POSUBDIRS) - -SUBOBJS = \ - common/volume_defaults.o \ - common/volume_devconfig.o \ - common/volume_dlist.o \ - common/volume_error.o \ - common/volume_nvpair.o \ - common/volume_output.o \ - common/volume_request.o \ - common/volume_string.o \ - xml/xml_convert.o \ - layout/layout.o \ - layout/layout_concat.o \ - layout/layout_device_cache.o \ - layout/layout_device_util.o \ - layout/layout_discovery.o \ - layout/layout_dlist_util.o \ - layout/layout_hsp.o \ - layout/layout_messages.o \ - layout/layout_mirror.o \ - layout/layout_request.o \ - layout/layout_slice.o \ - layout/layout_stripe.o \ - layout/layout_svm_util.o \ - layout/layout_validate.o \ - controller/getopt_ext.o \ - controller/metassist.o - -include $(METASSIST_TOPLEVEL)/../../Makefile.cmd -include $(METASSIST_TOPLEVEL)/Makefile.env - -LDLIBS += -ldiskmgt -lmeta -lnvpair -lxml2 -lxslt -lm - -POFILE = metassistp.po - -include $(METASSIST_TOPLEVEL)/Makefile.targ - -# Build master .po file from subdirs' .po files -$(POFILE): $(POSUBDIRS) .WAIT $(POFILES) - $(BUILDPO.pofiles) diff --git a/usr/src/cmd/lvm/metassist/Makefile.env b/usr/src/cmd/lvm/metassist/Makefile.env deleted file mode 100644 index 77cc84aaa5f4..000000000000 --- a/usr/src/cmd/lvm/metassist/Makefile.env +++ /dev/null @@ -1,32 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2003 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# - -# Define DEBUG when running nightly DEBUG build -$(NOT_RELEASE_BUILD) CPPFLAGS += -DDEBUG -CERRWARN += -_gcc=-Wno-switch -CERRWARN += -_gcc=-Wno-type-limits -CERRWARN += -_gcc=-Wno-uninitialized -CERRWARN += -_gcc=-Wno-unused-value diff --git a/usr/src/cmd/lvm/metassist/Makefile.targ b/usr/src/cmd/lvm/metassist/Makefile.targ deleted file mode 100644 index 2d2744146b6f..000000000000 --- a/usr/src/cmd/lvm/metassist/Makefile.targ +++ /dev/null @@ -1,108 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2009 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# - -# Suffixes for files that flag a particular check -SUFFIX_LINT = .lint - -all := TARGET = all -install := TARGET = install -clean := TARGET = clean -clobber := TARGET = clobber -catalog := TARGET = catalog -check := TARGET = check -cstyle := TARGET = cstyle -hdrchk := TARGET = hdrchk -lint := TARGET = lint - -.KEEP_STATE: - -all: ${SUBDIRS} ${OBJS} .WAIT ${PROG} - -# Create the binary -$(PROG): $(OBJS) $(SUBOBJS) - $(LINK.c) $(OBJS) $(SUBOBJS) -o $@ $(LDLIBS) - $(POST_PROCESS) - -# Make objects in subdirectories -$(SUBOBJS) $(POFILES): - @cd $(@D); ${MAKE} $(@F) - -ROOTXML= $(ROOTSHLIB)/xml - -# Rule/definitions for DTDs -ROOTXMLDTD= $(ROOTXML)/dtd -ROOTXMLDTDFILES= $(DTDFILES:%=$(ROOTXMLDTD)/%) -$(ROOTXMLDTDFILES) := FILEMODE = 0444 - -$(ROOTXMLDTD)/%: % - $(INS.file) - -# Rule/definitions for XSL style sheets -ROOTXMLSTYLE= $(ROOTXML)/style -ROOTXMLSTYLEFILES= $(STYLEFILES:%=$(ROOTXMLSTYLE)/%) -$(ROOTXMLSTYLEFILES) := FILEMODE = 0444 - -$(ROOTXMLSTYLE)/%: % - $(INS.file) - -# Install recursively -install: all .WAIT \ - ${SUBDIRS} \ - $(ROOTUSRSBINPROG) \ - $(ROOTETCDEFAULTFILES) \ - $(ROOTXMLDTDFILES) \ - $(ROOTXMLSTYLEFILES) - -# Pattern-matching rule for lint -%$(SUFFIX_LINT): % - ${LINT.c} -I. ${INCLUDES} -y -c $< && touch $@ - -# Run lint on all source files -lint: ${SUBDIRS} $(SRCS:%=%$(SUFFIX_LINT)) - -CSTYLE_FLAGS= -Pp -HDRCHK_FLAGS= -a - -cstyle: ${SUBDIRS} - -hdrchk: ${SUBDIRS} - -check: cstyle hdrchk - -clobber: ${SUBDIRS} - -clean: ${SUBDIRS} - $(RM) *.o *.ln *.i *.lint $(CLEANFILES) - -catalog: $(POFILE) - -${SUBDIRS}: FRC - @cd $@; pwd; ${MAKE} ${TARGET} - -FRC: - -# Included for message catalog handling -include $(SRC)/Makefile.msg.targ -include $(METASSIST_TOPLEVEL)/../../Makefile.targ diff --git a/usr/src/cmd/lvm/metassist/common/Makefile b/usr/src/cmd/lvm/metassist/common/Makefile deleted file mode 100644 index 5fdee448f96e..000000000000 --- a/usr/src/cmd/lvm/metassist/common/Makefile +++ /dev/null @@ -1,62 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# ident "%Z%%M% %I% %E% SMI" -# - -METASSIST_TOPLEVEL = .. - -SRCS= \ - volume_defaults.c \ - volume_devconfig.c \ - volume_dlist.c \ - volume_error.c \ - volume_nvpair.c \ - volume_output.c \ - volume_request.c \ - volume_string.c - -OBJS = $(SRCS:%.c=%.o) -HDRS = $(SRCS:%.c=%.h) -MSGFILES = $(SRCS:%.c=%.i) - -include $(METASSIST_TOPLEVEL)/../../Makefile.cmd -include $(METASSIST_TOPLEVEL)/Makefile.env - -INCLUDES += -I../../../../lib/lvm/libmeta/common/hdrs -CFLAGS += $(INCLUDES) - -POFILE = commonp.po - -include $(METASSIST_TOPLEVEL)/Makefile.targ - -# Build .po file from message files -$(POFILE): $(MSGFILES) - $(BUILDPO.msgfiles) - -cstyle: - $(CSTYLE) $(CSTYLE_FLAGS) $(SRCS) $(HDRS) - -hdrchk: - $(HDRCHK) $(HDRCHK_FLAGS) $(HDRS) diff --git a/usr/src/cmd/lvm/metassist/common/volume_defaults.c b/usr/src/cmd/lvm/metassist/common/volume_defaults.c deleted file mode 100644 index cb6909f77fd2..000000000000 --- a/usr/src/cmd/lvm/metassist/common/volume_defaults.c +++ /dev/null @@ -1,1875 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include -#include -#include "volume_defaults.h" -#include "volume_error.h" - -/* - * Methods which manipulate a defaults_t struct - */ - -static int defaults_get_singleton_component( - defaults_t *defaults, char *disksetname, - component_type_t type, devconfig_t **component, boolean_t create); - -/* - * Constructor: Create a defaults_t struct populated with default - * values. This defaults_t must be freed. - * - * @param defaults - * RETURN: a pointer to a new defaults_t - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -new_defaults( - defaults_t **defaults) -{ - devconfig_t *diskset; - int error = 0; - - *defaults = (defaults_t *)calloc(1, sizeof (defaults_t)); - if (*defaults == NULL) { - volume_set_error(gettext("new_defaults calloc() failed")); - return (-1); - } - - /* - * Create initial "global" (disk set-independent) defaults, as - * a devconfig_t of type disk set with NULL name - */ - if ((error = new_devconfig(&diskset, TYPE_DISKSET)) != 0) { - free_defaults(*defaults); - return (error); - } - - /* Append global defaults disk set to disksets */ - defaults_set_disksets( - *defaults, dlist_append(dlist_new_item(diskset), - defaults_get_disksets(*defaults), AT_TAIL)); - - /* Set defaults */ - if ((error = defaults_set_mirror_nsubs( - *defaults, NULL, DEFAULT_MIRROR_NSUBS)) != 0 || - - (error = defaults_set_mirror_read( - *defaults, NULL, DEFAULT_MIRROR_READ)) != 0 || - - (error = defaults_set_mirror_write( - *defaults, NULL, DEFAULT_MIRROR_WRITE)) != 0 || - - (error = defaults_set_mirror_pass( - *defaults, NULL, DEFAULT_MIRROR_PASS)) != 0 || - - (error = defaults_set_mirror_usehsp( - *defaults, NULL, DEFAULT_MIRROR_USEHSP)) != 0 || - - (error = defaults_set_concat_usehsp( - *defaults, NULL, DEFAULT_CONCAT_USEHSP)) != 0 || - - (error = defaults_set_stripe_interlace( - *defaults, NULL, DEFAULT_STRIPE_INTERLACE)) != 0 || - - (error = defaults_set_stripe_mincomp( - *defaults, NULL, DEFAULT_STRIPE_MINCOMP)) != 0 || - - (error = defaults_set_stripe_maxcomp( - *defaults, NULL, DEFAULT_STRIPE_MAXCOMP)) != 0 || - - (error = defaults_set_stripe_usehsp( - *defaults, NULL, DEFAULT_STRIPE_USEHSP)) != 0 || - - (error = defaults_set_volume_redundancy_level( - *defaults, NULL, DEFAULT_VOLUME_REDUND_LEVEL)) != 0 || - - (error = defaults_set_volume_npaths( - *defaults, NULL, DEFAULT_VOLUME_NPATHS)) != 0 || - - (error = defaults_set_volume_usehsp( - *defaults, NULL, DEFAULT_VOLUME_USEHSP)) != 0) { - - free_defaults(*defaults); - return (error); - } - - return (0); -} - -/* - * Free memory (recursively) allocated to a defaults_t struct - * - * @param arg - * pointer to the defaults_t struct to free - */ -void -free_defaults( - void *arg) -{ - defaults_t *defaults = (defaults_t *)arg; - - if (defaults == NULL) { - return; - } - - /* Free the disksets */ - if (defaults->disksets != NULL) { - dlist_free_items(defaults->disksets, free_devconfig); - } - - /* Free the devconfig itself */ - free(defaults); -} - -/* - * Set list of diskset specific defaults - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param disksets - * a dlist_t representing the defaults for specific - * named disk sets - */ -void -defaults_set_disksets( - defaults_t *defaults, - dlist_t *disksets) -{ - defaults->disksets = disksets; -} - -/* - * Get list of diskset specific defaults - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @return a dlist_t representing the defaults for specific - * named disk sets - */ -dlist_t * -defaults_get_disksets( - defaults_t *defaults) -{ - return (defaults->disksets); -} - -/* - * Get a disk set with the given name from the given defaults_t - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param name - * the name of the disk set whose defaults to retrieve, - * or NULL to get the defaults for all disk sets - * - * @param diskset - * RETURN: defaults for the given named disk set, or - * defaults for all disk sets if name is NULL - * - * @return ENOENT - * if the named disk set does not exist - * - * @return 0 - * if the named disk set exists - */ -int -defaults_get_diskset_by_name( - defaults_t *defaults, - char *name, - devconfig_t **diskset) -{ - dlist_t *list; - *diskset = NULL; - - /* Get list of disk sets */ - list = defaults_get_disksets(defaults); - if (list != NULL) { - - /* For each disk set-specific defaults... */ - for (; list != NULL; list = list->next) { - - char *dname = NULL; - devconfig_t *d = (devconfig_t *)list->obj; - - /* Get the name if this disk set */ - devconfig_get_name(d, &dname); - - /* Do the names match? */ - if ( - /* Global defaults disk set */ - (name == NULL && dname == NULL) || - - /* Named disk set */ - (name != NULL && dname != NULL && - strcmp(name, dname) == 0)) { - - *diskset = d; - break; - } - } - } - - /* Diskset doesn't exist */ - if (*diskset == NULL) { - return (ENOENT); - } - - return (0); -} - -/* - * Get the first component of the given type from the given disk set. - * If not found, create the component if requested. - * - * @return ENOENT - * if the given disk set does not exist, or it exists, - * but the requested component does not exist under it - * and its creation was not requested - * - * @return 0 - * if the requested component exists or was created - * - * @return non-zero - * if the requested component does not exist and could - * not be created - */ -static int -defaults_get_singleton_component( - defaults_t *defaults, - char *disksetname, - component_type_t type, - devconfig_t **component, - boolean_t create) -{ - int error; - devconfig_t *diskset; - - /* Get the disk set referred to */ - if ((error = defaults_get_diskset_by_name( - defaults, disksetname, &diskset)) != 0) { - - volume_set_error( - gettext("could not get defaults for disk set %s"), - disksetname == NULL ? gettext("") : disksetname); - - return (error); - } - - /* - * Get the singleton component under this disk set, create if - * requested - */ - return (devconfig_get_component(diskset, type, component, create)); -} - -/* - * Set name of the the default HSP to use - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param name - * the name of the default HSP to use - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -defaults_set_hsp_name( - defaults_t *defaults, - char *diskset, - char *name) -{ - devconfig_t *hsp = NULL; - int error = 0; - - /* Get/create singleton HSP element for this disk set */ - if ((error = defaults_get_singleton_component( - defaults, diskset, TYPE_HSP, &hsp, TRUE)) != 0) { - /* volume_set_error already called */ - return (error); - } - - /* Set the name attribute */ - return (devconfig_set_hsp_name(hsp, name)); -} - -/* - * Get the name of the default HSP to use - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param name - * RETURN: the name of the default HSP to use - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -defaults_get_hsp_name( - defaults_t *defaults, - char *diskset, - char **name) -{ - char *disksets[2]; - devconfig_t *hsp; - int error; - int i = 0; - - /* Check both the given and global (NULL) disk sets for the value */ - disksets[0] = diskset; - disksets[1] = NULL; - do { - /* Get/create singleton HSP element for this disk set */ - error = defaults_get_singleton_component( - defaults, disksets[i], TYPE_HSP, &hsp, FALSE); - - switch (error) { - /* HSP found for this disk set */ - case 0: - /* Get the nsubs attribute */ - if ((error = devconfig_get_name(hsp, name)) == 0) { - /* nsubs attribute found */ - return (0); - } - - /* FALLTHROUGH */ - - /* HSP not found for this disk set */ - case ENOENT: - break; - - /* Invalid disk set, or HSP couldn't be created */ - default: - /* volume_set_error already called */ - return (error); - } - - /* Stop after the global (NULL) disk set has been searched */ - } while (disksets[i++] != NULL); - - return (ENOENT); -} - -/* - * Set the default number of submirrors for mirrored volumes - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * the value to set as the default number of submirrors - * for mirrored volumes - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -defaults_set_mirror_nsubs( - defaults_t *defaults, - char *diskset, - uint16_t val) -{ - devconfig_t *mirror = NULL; - int error = 0; - - /* Get/create singleton mirror element for this disk set */ - if ((error = defaults_get_singleton_component( - defaults, diskset, TYPE_MIRROR, &mirror, TRUE)) != 0) { - /* volume_set_error already called */ - return (error); - } - - /* Set the nsubs attribute */ - return (devconfig_set_mirror_nsubs(mirror, val)); -} - -/* - * Get the default number of submirrors for mirrored volumes - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * RETURN: the default number of submirrors for mirrored - * volumes - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -defaults_get_mirror_nsubs( - defaults_t *defaults, - char *diskset, - uint16_t *val) -{ - char *disksets[2]; - devconfig_t *mirror; - int error; - int i = 0; - - /* Check both the given and global (NULL) disk sets for the value */ - disksets[0] = diskset; - disksets[1] = NULL; - do { - /* Get/create singleton mirror element for this disk set */ - error = defaults_get_singleton_component( - defaults, disksets[i], TYPE_MIRROR, &mirror, FALSE); - - switch (error) { - /* mirror found for this disk set */ - case 0: - /* Get the nsubs attribute */ - if ((error = devconfig_get_mirror_nsubs( - mirror, val)) == 0) { - /* nsubs attribute found */ - return (0); - } - - /* FALLTHROUGH */ - - /* mirror not found for this disk set */ - case ENOENT: - break; - - /* Invalid disk set, or mirror couldn't be created */ - default: - /* volume_set_error already called */ - return (error); - } - - /* Stop after the global (NULL) disk set has been searched */ - } while (disksets[i++] != NULL); - - return (ENOENT); -} - -/* - * Set the default read strategy for mirrored volumes - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * the value to set as the default read strategy for - * mirrored volumes - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -defaults_set_mirror_read( - defaults_t *defaults, - char *diskset, - mirror_read_strategy_t val) -{ - devconfig_t *mirror = NULL; - int error = 0; - - /* Get/create singleton mirror element for this disk set */ - if ((error = defaults_get_singleton_component( - defaults, diskset, TYPE_MIRROR, &mirror, TRUE)) != 0) { - /* volume_set_error already called */ - return (error); - } - - /* Set the read attribute */ - return (devconfig_set_mirror_read(mirror, val)); -} - -/* - * Get the default read strategy for mirrored volumes - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * RETURN: the default read strategy for mirrored volumes - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -defaults_get_mirror_read( - defaults_t *defaults, - char *diskset, - mirror_read_strategy_t *val) -{ - char *disksets[2]; - devconfig_t *mirror; - int error; - int i = 0; - - /* Check both the given and global (NULL) disk sets for the value */ - disksets[0] = diskset; - disksets[1] = NULL; - do { - /* Get/create singleton mirror element for this disk set */ - error = defaults_get_singleton_component( - defaults, disksets[i], TYPE_MIRROR, &mirror, FALSE); - - switch (error) { - /* mirror found for this disk set */ - case 0: - /* Get the read attribute */ - if ((error = devconfig_get_mirror_read(mirror, val)) == 0) { - /* read attribute found */ - return (0); - } - - /* FALLTHROUGH */ - - /* mirror not found for this disk set */ - case ENOENT: - break; - - /* Invalid disk set, or mirror couldn't be created */ - default: - /* volume_set_error already called */ - return (error); - } - - /* Stop after the global (NULL) disk set has been searched */ - } while (disksets[i++] != NULL); - - return (ENOENT); -} - -/* - * Set the default write strategy for mirrored volumes - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * the value to set as the default write strategy for - * mirrored volumes - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -defaults_set_mirror_write( - defaults_t *defaults, - char *diskset, - mirror_write_strategy_t val) -{ - devconfig_t *mirror = NULL; - int error = 0; - - /* Get/create singleton mirror element for this disk set */ - if ((error = defaults_get_singleton_component( - defaults, diskset, TYPE_MIRROR, &mirror, TRUE)) != 0) { - /* volume_set_error already called */ - return (error); - } - - /* Set the write attribute */ - return (devconfig_set_mirror_write(mirror, val)); -} - -/* - * Get the default write strategy for mirrored volumes - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * RETURN: the default write strategy for mirrored - * volumes - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -defaults_get_mirror_write( - defaults_t *defaults, - char *diskset, - mirror_write_strategy_t *val) -{ - char *disksets[2]; - devconfig_t *mirror; - int error; - int i = 0; - - /* Check both the given and global (NULL) disk sets for the value */ - disksets[0] = diskset; - disksets[1] = NULL; - do { - /* Get/create singleton mirror element for this disk set */ - error = defaults_get_singleton_component( - defaults, disksets[i], TYPE_MIRROR, &mirror, FALSE); - - switch (error) { - /* mirror found for this disk set */ - case 0: - /* Get the write attribute */ - if ((error = devconfig_get_mirror_write( - mirror, val)) == 0) { - /* write attribute found */ - return (0); - } - - /* FALLTHROUGH */ - - /* mirror not found for this disk set */ - case ENOENT: - break; - - /* Invalid disk set, or mirror couldn't be created */ - default: - /* volume_set_error already called */ - return (error); - } - - /* Stop after the global (NULL) disk set has been searched */ - } while (disksets[i++] != NULL); - - return (ENOENT); -} - -/* - * Set the default resync pass for mirrored volumes - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * the value to set as the default resync pass for - * mirrored volumes - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -defaults_set_mirror_pass( - defaults_t *defaults, - char *diskset, - uint16_t val) -{ - devconfig_t *mirror = NULL; - int error = 0; - - /* Get/create singleton mirror element for this disk set */ - if ((error = defaults_get_singleton_component( - defaults, diskset, TYPE_MIRROR, &mirror, TRUE)) != 0) { - /* volume_set_error already called */ - return (error); - } - - /* Set the pass attribute */ - return (devconfig_set_mirror_pass(mirror, val)); -} - -/* - * Get the default resync pass for mirrored volumes - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * RETURN: the default resync pass for mirrored volumes - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -defaults_get_mirror_pass( - defaults_t *defaults, - char *diskset, - uint16_t *val) -{ - char *disksets[2]; - devconfig_t *mirror; - int error; - int i = 0; - - /* Check both the given and global (NULL) disk sets for the value */ - disksets[0] = diskset; - disksets[1] = NULL; - do { - /* Get/create singleton mirror element for this disk set */ - error = defaults_get_singleton_component( - defaults, disksets[i], TYPE_MIRROR, &mirror, FALSE); - - switch (error) { - /* mirror found for this disk set */ - case 0: - /* Get the pass attribute */ - if ((error = devconfig_get_mirror_pass(mirror, val)) == 0) { - /* pass attribute found */ - return (0); - } - - /* FALLTHROUGH */ - - /* mirror not found for this disk set */ - case ENOENT: - break; - - /* Invalid disk set, or mirror couldn't be created */ - default: - /* volume_set_error already called */ - return (error); - } - - /* Stop after the global (NULL) disk set has been searched */ - } while (disksets[i++] != NULL); - - return (ENOENT); -} - -/* - * Set the default HSP creation flag for mirrored volumes - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * the value to set as the default HSP creation flag for - * mirrored volumes - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -defaults_set_mirror_usehsp( - defaults_t *defaults, - char *diskset, - boolean_t val) -{ - devconfig_t *mirror = NULL; - int error = 0; - - /* Get/create singleton mirror element for this disk set */ - if ((error = defaults_get_singleton_component( - defaults, diskset, TYPE_MIRROR, &mirror, TRUE)) != 0) { - /* volume_set_error already called */ - return (error); - } - - /* Set the usehsp attribute */ - return (devconfig_set_volume_usehsp(mirror, val)); -} - -/* - * Get the default HSP creation flag for mirrored volumes - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * RETURN: the default HSP creation flag for mirrored - * volumes - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -defaults_get_mirror_usehsp( - defaults_t *defaults, - char *diskset, - boolean_t *val) -{ - char *disksets[2]; - devconfig_t *mirror; - int error; - int i = 0; - - /* Check both the given and global (NULL) disk sets for the value */ - disksets[0] = diskset; - disksets[1] = NULL; - do { - /* Get/create singleton mirror element for this disk set */ - error = defaults_get_singleton_component( - defaults, disksets[i], TYPE_MIRROR, &mirror, FALSE); - - switch (error) { - /* mirror found for this disk set */ - case 0: - /* Get the usehsp attribute */ - if ((error = devconfig_get_volume_usehsp( - mirror, val)) == 0) { - /* usehsp attribute found */ - return (0); - } - - /* FALLTHROUGH */ - - /* mirror not found for this disk set */ - case ENOENT: - break; - - /* Invalid disk set, or mirror couldn't be created */ - default: - /* volume_set_error already called */ - return (error); - } - - /* Stop after the global (NULL) disk set has been searched */ - } while (disksets[i++] != NULL); - - return (ENOENT); -} - -/* - * Set the default HSP creation flag for concatenated volumes - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * the value to set as the default HSP creation flag for - * concatenated volumes - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -defaults_set_concat_usehsp( - defaults_t *defaults, - char *diskset, - boolean_t val) -{ - devconfig_t *concat = NULL; - int error = 0; - - /* Get/create singleton concat element for this disk set */ - if ((error = defaults_get_singleton_component( - defaults, diskset, TYPE_CONCAT, &concat, TRUE)) != 0) { - /* volume_set_error already called */ - return (error); - } - - /* Set the usehsp attribute */ - return (devconfig_set_volume_usehsp(concat, val)); -} - -/* - * Get the default HSP creation flag for concatenated volumes - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * RETURN: the default HSP creation flag for concatenated - * volumes - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -defaults_get_concat_usehsp( - defaults_t *defaults, - char *diskset, - boolean_t *val) -{ - char *disksets[2]; - devconfig_t *concat; - int error; - int i = 0; - - /* Check both the given and global (NULL) disk sets for the value */ - disksets[0] = diskset; - disksets[1] = NULL; - do { - /* Get/create singleton concat element for this disk set */ - error = defaults_get_singleton_component( - defaults, disksets[i], TYPE_CONCAT, &concat, FALSE); - - switch (error) { - /* concat found for this disk set */ - case 0: - /* Get the usehsp attribute */ - if ((error = devconfig_get_volume_usehsp( - concat, val)) == 0) { - /* usehsp attribute found */ - return (0); - } - - /* FALLTHROUGH */ - - /* concat not found for this disk set */ - case ENOENT: - break; - - /* Invalid disk set, or concat couldn't be created */ - default: - /* volume_set_error already called */ - return (error); - } - - /* Stop after the global (NULL) disk set has been searched */ - } while (disksets[i++] != NULL); - - return (ENOENT); -} - -/* - * Set the default minimum number of components for striped volumes - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * the value to set as the default minimum number of - * components for striped volumes - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -defaults_set_stripe_mincomp( - defaults_t *defaults, - char *diskset, - uint16_t val) -{ - devconfig_t *stripe = NULL; - int error = 0; - - /* Get/create singleton stripe element for this disk set */ - if ((error = defaults_get_singleton_component( - defaults, diskset, TYPE_STRIPE, &stripe, TRUE)) != 0) { - /* volume_set_error already called */ - return (error); - } - - /* Set the mincomp attribute */ - return (devconfig_set_stripe_mincomp(stripe, val)); -} - -/* - * Get the default minimum number of components for striped volumes - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * RETURN: the default minimum number of components for - * striped volumes - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -defaults_get_stripe_mincomp( - defaults_t *defaults, - char *diskset, - uint16_t *val) -{ - char *disksets[2]; - devconfig_t *stripe; - int error; - int i = 0; - - /* Check both the given and global (NULL) disk sets for the value */ - disksets[0] = diskset; - disksets[1] = NULL; - do { - /* Get/create singleton stripe element for this disk set */ - error = defaults_get_singleton_component( - defaults, disksets[i], TYPE_STRIPE, &stripe, FALSE); - - switch (error) { - /* stripe found for this disk set */ - case 0: - /* Get the mincomp attribute */ - if ((error = devconfig_get_stripe_mincomp( - stripe, val)) == 0) { - /* mincomp attribute found */ - return (0); - } - - /* FALLTHROUGH */ - - /* stripe not found for this disk set */ - case ENOENT: - break; - - /* Invalid disk set, or stripe couldn't be created */ - default: - /* volume_set_error already called */ - return (error); - } - - /* Stop after the global (NULL) disk set has been searched */ - } while (disksets[i++] != NULL); - - return (ENOENT); -} - -/* - * Set the default maximum number of components for striped volumes - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * the value to set as the default maximum number of - * components for striped volumes - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -defaults_set_stripe_maxcomp( - defaults_t *defaults, - char *diskset, - uint16_t val) -{ - devconfig_t *stripe = NULL; - int error = 0; - - /* Get/create singleton stripe element for this disk set */ - if ((error = defaults_get_singleton_component( - defaults, diskset, TYPE_STRIPE, &stripe, TRUE)) != 0) { - /* volume_set_error already called */ - return (error); - } - - /* Set the maxcomp attribute */ - return (devconfig_set_stripe_maxcomp(stripe, val)); -} - -/* - * Get the default maximum number of components for striped volumes - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * RETURN: the default maximum number of components for - * striped volumes - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -defaults_get_stripe_maxcomp( - defaults_t *defaults, - char *diskset, - uint16_t *val) -{ - char *disksets[2]; - devconfig_t *stripe; - int error; - int i = 0; - - /* Check both the given and global (NULL) disk sets for the value */ - disksets[0] = diskset; - disksets[1] = NULL; - do { - /* Get/create singleton stripe element for this disk set */ - error = defaults_get_singleton_component( - defaults, disksets[i], TYPE_STRIPE, &stripe, FALSE); - - switch (error) { - /* stripe found for this disk set */ - case 0: - /* Get the maxcomp attribute */ - if ((error = devconfig_get_stripe_maxcomp( - stripe, val)) == 0) { - /* maxcomp attribute found */ - return (0); - } - - /* FALLTHROUGH */ - - /* stripe not found for this disk set */ - case ENOENT: - break; - - /* Invalid disk set, or stripe couldn't be created */ - default: - /* volume_set_error already called */ - return (error); - } - - /* Stop after the global (NULL) disk set has been searched */ - } while (disksets[i++] != NULL); - - return (ENOENT); -} - -/* - * Set the default interlace for striped volumes - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * the value to set as the default interlace for striped - * volumes - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -defaults_set_stripe_interlace( - defaults_t *defaults, - char *diskset, - uint64_t val) -{ - devconfig_t *stripe = NULL; - int error = 0; - - /* Get/create singleton stripe element for this disk set */ - if ((error = defaults_get_singleton_component( - defaults, diskset, TYPE_STRIPE, &stripe, TRUE)) != 0) { - /* volume_set_error already called */ - return (error); - } - - /* Set the interlace attribute */ - return (devconfig_set_stripe_interlace(stripe, val)); -} - -/* - * Get the default interlace for striped volumes - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * RETURN: the default interlace for striped volumes - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -defaults_get_stripe_interlace( - defaults_t *defaults, - char *diskset, - uint64_t *val) -{ - char *disksets[2]; - devconfig_t *stripe; - int error; - int i = 0; - - /* Check both the given and global (NULL) disk sets for the value */ - disksets[0] = diskset; - disksets[1] = NULL; - do { - /* Get/create singleton stripe element for this disk set */ - error = defaults_get_singleton_component( - defaults, disksets[i], TYPE_STRIPE, &stripe, FALSE); - - switch (error) { - /* stripe found for this disk set */ - case 0: - /* Get the interlace attribute */ - if ((error = devconfig_get_stripe_interlace( - stripe, val)) == 0) { - /* interlace attribute found */ - return (0); - } - - /* FALLTHROUGH */ - - /* stripe not found for this disk set */ - case ENOENT: - break; - - /* Invalid disk set, or stripe couldn't be created */ - default: - /* volume_set_error already called */ - return (error); - } - - /* Stop after the global (NULL) disk set has been searched */ - } while (disksets[i++] != NULL); - - return (ENOENT); -} - -/* - * Set the default HSP creation flag for striped volumes - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * the value to set as the default HSP creation flag for - * striped volumes - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -defaults_set_stripe_usehsp( - defaults_t *defaults, - char *diskset, - boolean_t val) -{ - devconfig_t *stripe = NULL; - int error = 0; - - /* Get/create singleton stripe element for this disk set */ - if ((error = defaults_get_singleton_component( - defaults, diskset, TYPE_STRIPE, &stripe, TRUE)) != 0) { - /* volume_set_error already called */ - return (error); - } - - /* Set the usehsp attribute */ - return (devconfig_set_volume_usehsp(stripe, val)); -} - -/* - * Get the default HSP creation flag for striped volumes - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * RETURN: the default HSP creation flag for striped - * volumes - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -defaults_get_stripe_usehsp( - defaults_t *defaults, - char *diskset, - boolean_t *val) -{ - char *disksets[2]; - devconfig_t *stripe; - int error; - int i = 0; - - /* Check both the given and global (NULL) disk sets for the value */ - disksets[0] = diskset; - disksets[1] = NULL; - do { - /* Get/create singleton stripe element for this disk set */ - error = defaults_get_singleton_component( - defaults, disksets[i], TYPE_STRIPE, &stripe, FALSE); - - switch (error) { - /* stripe found for this disk set */ - case 0: - /* Get the usehsp attribute */ - if ((error = devconfig_get_volume_usehsp( - stripe, val)) == 0) { - /* usehsp attribute found */ - return (0); - } - - /* FALLTHROUGH */ - - /* stripe not found for this disk set */ - case ENOENT: - break; - - /* Invalid disk set, or stripe couldn't be created */ - default: - /* volume_set_error already called */ - return (error); - } - - /* Stop after the global (NULL) disk set has been searched */ - } while (disksets[i++] != NULL); - - return (ENOENT); -} - -/* - * Set the default redundancy level for generic volumes. - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * If 0, a stripe will be created by default. If > 0, a - * mirror with this number of submirrors will be created - * by default. - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -defaults_set_volume_redundancy_level( - defaults_t *defaults, - char *diskset, - uint16_t val) -{ - devconfig_t *volume = NULL; - int error = 0; - - /* Get/create singleton volume element for this disk set */ - if ((error = defaults_get_singleton_component( - defaults, diskset, TYPE_VOLUME, &volume, TRUE)) != 0) { - /* volume_set_error already called */ - return (error); - } - - /* Set the redundancy level */ - return (devconfig_set_volume_redundancy_level(volume, val)); -} - -/* - * Get the default redundancy level for generic volumes. - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * RETURN: the default redundancy level for generic - * volumes - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -defaults_get_volume_redundancy_level( - defaults_t *defaults, - char *diskset, - uint16_t *val) -{ - char *disksets[2]; - devconfig_t *volume; - int error; - int i = 0; - - /* Check both the given and global (NULL) disk sets for the value */ - disksets[0] = diskset; - disksets[1] = NULL; - do { - /* Get/create singleton volume element for this disk set */ - error = defaults_get_singleton_component( - defaults, disksets[i], TYPE_VOLUME, &volume, FALSE); - - switch (error) { - /* volume found for this disk set */ - case 0: - /* Get the redundancy level */ - if ((error = devconfig_get_volume_redundancy_level( - volume, val)) == 0) { - /* redundancy level found */ - return (0); - } - - /* FALLTHROUGH */ - - /* volume not found for this disk set */ - case ENOENT: - break; - - /* Invalid disk set, or volume couldn't be created */ - default: - /* volume_set_error already called */ - return (error); - } - - /* Stop after the global (NULL) disk set has been searched */ - } while (disksets[i++] != NULL); - - return (ENOENT); -} - -/* - * Set the default number of data paths for generic volume - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * the value to set as the default number of data paths - * for generic volume - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -defaults_set_volume_npaths( - defaults_t *defaults, - char *diskset, - uint16_t val) -{ - devconfig_t *volume = NULL; - int error = 0; - - /* Get/create singleton volume element for this disk set */ - if ((error = defaults_get_singleton_component( - defaults, diskset, TYPE_VOLUME, &volume, TRUE)) != 0) { - /* volume_set_error already called */ - return (error); - } - - /* Set the npaths attribute */ - return (devconfig_set_volume_npaths(volume, val)); -} - -/* - * Get the default number of data paths for generic volume - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * RETURN: the default number of data paths for generic - * volume - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -defaults_get_volume_npaths( - defaults_t *defaults, - char *diskset, - uint16_t *val) -{ - char *disksets[2]; - devconfig_t *volume; - int error; - int i = 0; - - /* Check both the given and global (NULL) disk sets for the value */ - disksets[0] = diskset; - disksets[1] = NULL; - do { - /* Get/create singleton volume element for this disk set */ - error = defaults_get_singleton_component( - defaults, disksets[i], TYPE_VOLUME, &volume, FALSE); - - switch (error) { - /* volume found for this disk set */ - case 0: - /* Get the npaths attribute */ - if ((error = devconfig_get_volume_npaths( - volume, val)) == 0) { - /* npaths attribute found */ - return (0); - } - - /* FALLTHROUGH */ - - /* volume not found for this disk set */ - case ENOENT: - break; - - /* Invalid disk set, or volume couldn't be created */ - default: - /* volume_set_error already called */ - return (error); - } - - /* Stop after the global (NULL) disk set has been searched */ - } while (disksets[i++] != NULL); - - return (ENOENT); -} - -/* - * Set the default HSP creation flag for generic volume - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * the value to set as the default HSP creation flag for - * generic volume - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -defaults_set_volume_usehsp( - defaults_t *defaults, - char *diskset, - boolean_t val) -{ - devconfig_t *volume = NULL; - int error = 0; - - /* Get/create singleton volume element for this disk set */ - if ((error = defaults_get_singleton_component( - defaults, diskset, TYPE_VOLUME, &volume, TRUE)) != 0) { - /* volume_set_error already called */ - return (error); - } - - /* Set the usehsp attribute */ - return (devconfig_set_volume_usehsp(volume, val)); -} - -/* - * Get the default HSP creation flag for generic volume - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * RETURN: the default HSP creation flag for generic - * volume - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -defaults_get_volume_usehsp( - defaults_t *defaults, - char *diskset, - boolean_t *val) -{ - char *disksets[2]; - devconfig_t *volume; - int error; - int i = 0; - - /* Check both the given and global (NULL) disk sets for the value */ - disksets[0] = diskset; - disksets[1] = NULL; - do { - /* Get/create singleton volume element for this disk set */ - error = defaults_get_singleton_component( - defaults, disksets[i], TYPE_VOLUME, &volume, FALSE); - - switch (error) { - /* volume found for this disk set */ - case 0: - /* Get the usehsp attribute */ - if ((error = devconfig_get_volume_usehsp( - volume, val)) == 0) { - /* usehsp attribute found */ - return (0); - } - - /* FALLTHROUGH */ - - /* volume not found for this disk set */ - case ENOENT: - break; - - /* Invalid disk set, or volume couldn't be created */ - default: - /* volume_set_error already called */ - return (error); - } - - /* Stop after the global (NULL) disk set has been searched */ - } while (disksets[i++] != NULL); - - return (ENOENT); -} diff --git a/usr/src/cmd/lvm/metassist/common/volume_defaults.h b/usr/src/cmd/lvm/metassist/common/volume_defaults.h deleted file mode 100644 index 3d9cacacaa81..000000000000 --- a/usr/src/cmd/lvm/metassist/common/volume_defaults.h +++ /dev/null @@ -1,853 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _VOLUME_DEFAULTS_H -#define _VOLUME_DEFAULTS_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#ifdef __cplusplus -extern "C" { -#endif - -#include "volume_devconfig.h" - -#define DEFAULT_MIRROR_NSUBS 2 -#define DEFAULT_MIRROR_READ MIRROR_READ_ROUNDROBIN -#define DEFAULT_MIRROR_WRITE MIRROR_WRITE_PARALLEL -#define DEFAULT_MIRROR_PASS 1 -#define DEFAULT_STRIPE_INTERLACE 1024 * 64 -#define DEFAULT_STRIPE_MINCOMP 3 -#define DEFAULT_STRIPE_MAXCOMP 10 -#define DEFAULT_VOLUME_REDUND_LEVEL 0 -#define DEFAULT_VOLUME_NPATHS 1 - -/* For consistency, these should all have the same value */ -#define DEFAULT_MIRROR_USEHSP FALSE -#define DEFAULT_CONCAT_USEHSP FALSE -#define DEFAULT_STRIPE_USEHSP FALSE -#define DEFAULT_VOLUME_USEHSP FALSE - -/* - * default_t - struct to hold layout defaults - */ -typedef struct defaults { - /* - * List of devconfig_t, each of which represents disk set- - * specific defaults. Each disk set has a name, except for - * the global set, whose name is NULL. - */ - dlist_t *disksets; -} defaults_t; - -/* - * Constructor: Create a defaults_t struct populated with default - * values. This defaults_t must be freed. - * - * @param defaults - * RETURN: a pointer to a new defaults_t - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int new_defaults(defaults_t **defaults); - -/* - * Free memory (recursively) allocated to a defaults_t struct - * - * @param arg - * pointer to the defaults_t struct to free - */ -extern void free_defaults(void *arg); - -/* - * Set list of diskset specific defaults - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param disksets - * a dlist_t representing the defaults for specific - * named disk sets - */ -extern void defaults_set_disksets(defaults_t *defaults, dlist_t *disksets); -/* - * Get list of diskset specific defaults - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @return a dlist_t representing the defaults for specific - * named disk sets - */ -extern dlist_t *defaults_get_disksets(defaults_t *defaults); - -/* - * Get a disk set with the given name from the given defaults_t - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param name - * the name of the disk set whose defaults to retrieve, - * or NULL to get the defaults for all disk sets - * - * @param diskset - * RETURN: defaults for the given named disk set, or - * defaults for all disk sets if name is NULL - * - * @return ENOENT - * if the named disk set does not exist - * - * @return 0 - * if the named disk set exists - */ - -extern int defaults_get_diskset_by_name( - defaults_t *defaults, char *name, devconfig_t **diskset); - -/* - * Set name of the the default HSP to use - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param name - * the name of the default HSP to use - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int defaults_set_hsp_name( - defaults_t *defaults, char *diskset, char *name); -/* - * Get the name of the default HSP to use - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param name - * RETURN: the name of the default HSP to use - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int defaults_get_hsp_name( - defaults_t *defaults, char *diskset, char **name); - -/* - * Set the default number of submirrors for mirrored volumes - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * the value to set as the default number of submirrors - * for mirrored volumes - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int defaults_set_mirror_nsubs( - defaults_t *defaults, char *diskset, uint16_t val); -/* - * Get the default number of submirrors for mirrored volumes - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * RETURN: the default number of submirrors for mirrored - * volumes - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int defaults_get_mirror_nsubs( - defaults_t *defaults, char *diskset, uint16_t *val); - -/* - * Set the default read strategy for mirrored volumes - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * the value to set as the default read strategy for - * mirrored volumes - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int defaults_set_mirror_read( - defaults_t *defaults, char *diskset, mirror_read_strategy_t val); -/* - * Get the default read strategy for mirrored volumes - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * RETURN: the default read strategy for mirrored volumes - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int defaults_get_mirror_read( - defaults_t *defaults, char *diskset, mirror_read_strategy_t *val); - -/* - * Set the default write strategy for mirrored volumes - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * the value to set as the default write strategy for - * mirrored volumes - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int defaults_set_mirror_write( - defaults_t *defaults, char *diskset, mirror_write_strategy_t val); -/* - * Get the default write strategy for mirrored volumes - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * RETURN: the default write strategy for mirrored - * volumes - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int defaults_get_mirror_write( - defaults_t *defaults, char *diskset, mirror_write_strategy_t *val); - -/* - * Set the default resync pass for mirrored volumes - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * the value to set as the default resync pass for - * mirrored volumes - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int defaults_set_mirror_pass( - defaults_t *defaults, char *diskset, uint16_t val); -/* - * Get the default resync pass for mirrored volumes - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * RETURN: the default resync pass for mirrored volumes - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int defaults_get_mirror_pass( - defaults_t *defaults, char *diskset, uint16_t *val); - -/* - * Set the default HSP creation flag for mirrored volumes - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * the value to set as the default HSP creation flag for - * mirrored volumes - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int defaults_set_mirror_usehsp( - defaults_t *defaults, char *diskset, boolean_t val); -/* - * Get the default HSP creation flag for mirrored volumes - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * RETURN: the default HSP creation flag for mirrored - * volumes - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int defaults_get_mirror_usehsp( - defaults_t *defaults, char *diskset, boolean_t *val); - -/* - * Set the default HSP creation flag for concatenated volumes - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * the value to set as the default HSP creation flag for - * concatenated volumes - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int defaults_set_concat_usehsp( - defaults_t *defaults, char *diskset, boolean_t val); -/* - * Get the default HSP creation flag for concatenated volumes - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * RETURN: the default HSP creation flag for concatenated - * volumes - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int defaults_get_concat_usehsp( - defaults_t *defaults, char *diskset, boolean_t *val); - -/* - * Set the default minimum number of components for striped volumes - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * the value to set as the default minimum number of - * components for striped volumes - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int defaults_set_stripe_mincomp( - defaults_t *defaults, char *diskset, uint16_t val); -/* - * Get the default minimum number of components for striped volumes - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * RETURN: the default minimum number of components for - * striped volumes - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int defaults_get_stripe_mincomp( - defaults_t *defaults, char *diskset, uint16_t *val); - -/* - * Set the default maximum number of components for striped volumes - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * the value to set as the default maximum number of - * components for striped volumes - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int defaults_set_stripe_maxcomp( - defaults_t *defaults, char *diskset, uint16_t val); -/* - * Get the default maximum number of components for striped volumes - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * RETURN: the default maximum number of components for - * striped volumes - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int defaults_get_stripe_maxcomp( - defaults_t *defaults, char *diskset, uint16_t *val); - -/* - * Set the default interlace for striped volumes - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * the value to set as the default interlace for striped - * volumes - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int defaults_set_stripe_interlace( - defaults_t *defaults, char *diskset, uint64_t val); -/* - * Get the default interlace for striped volumes - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * RETURN: the default interlace for striped volumes - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int defaults_get_stripe_interlace( - defaults_t *defaults, char *diskset, uint64_t *val); - -/* - * Set the default HSP creation flag for striped volumes - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * the value to set as the default HSP creation flag for - * striped volumes - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int defaults_set_stripe_usehsp( - defaults_t *defaults, char *diskset, boolean_t val); -/* - * Get the default HSP creation flag for striped volumes - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * RETURN: the default HSP creation flag for striped - * volumes - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int defaults_get_stripe_usehsp( - defaults_t *defaults, char *diskset, boolean_t *val); - -/* - * Set the default redundancy level for generic volumes. - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * If 0, a stripe will be created by default. If > 0, a - * mirror with this number of submirrors will be created - * by default. - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int defaults_set_volume_redundancy_level( - defaults_t *defaults, char *diskset, uint16_t val); -/* - * Get the default redundancy level for generic volumes. - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * RETURN: the default redundancy level for generic - * volumes - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int defaults_get_volume_redundancy_level( - defaults_t *defaults, char *diskset, uint16_t *val); - -/* - * Set the default number of data paths for generic volume - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * the value to set as the default number of data paths - * for generic volume - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int defaults_set_volume_npaths( - defaults_t *defaults, char *diskset, uint16_t val); -/* - * Get the default number of data paths for generic volume - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * RETURN: the default number of data paths for generic - * volume - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int defaults_get_volume_npaths( - defaults_t *defaults, char *diskset, uint16_t *val); - -/* - * Set the default HSP creation flag for generic volume - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * the value to set as the default HSP creation flag for - * generic volume - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int defaults_set_volume_usehsp( - defaults_t *defaults, char *diskset, boolean_t val); -/* - * Get the default HSP creation flag for generic volume - * - * @param defaults - * a defaults_t hierarchy representing default settings - * for all disk sets and specific disk sets - * - * @param diskset - * the name of the disk set to which to apply this - * default setting, or NULL to apply default - * setting to all disk sets - * - * @param val - * RETURN: the default HSP creation flag for generic - * volume - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int defaults_get_volume_usehsp( - defaults_t *defaults, char *diskset, boolean_t *val); - -#ifdef __cplusplus -} -#endif - -#endif /* _VOLUME_DEFAULTS_H */ diff --git a/usr/src/cmd/lvm/metassist/common/volume_devconfig.c b/usr/src/cmd/lvm/metassist/common/volume_devconfig.c deleted file mode 100644 index 1146b8946648..000000000000 --- a/usr/src/cmd/lvm/metassist/common/volume_devconfig.c +++ /dev/null @@ -1,1691 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include "volume_devconfig.h" - -#include -#include -#include -#include "volume_nvpair.h" -#include "volume_error.h" -#include "volume_output.h" -#include "volume_string.h" - -/* - * Methods which manipulate a devconfig_t struct - */ - -/* - * Constructor: Create a devconfig_t struct. This devconfig_t must be - * freed with free_devconfig(). - * - * @param devconfig - * RETURN: a new devconfig_t - * - * @param type - * the type of devconfig_t to create - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -new_devconfig( - devconfig_t **devconfig, - component_type_t type) -{ - int error; - - *devconfig = (devconfig_t *)calloc(1, sizeof (devconfig_t)); - if (*devconfig == NULL) { - volume_set_error(gettext("new_devconfig() calloc() failed\n")); - return (-1); - } - - /* Create attribute list */ - if ((error = nvlist_alloc(&((*devconfig)->attributes), - NV_UNIQUE_NAME_TYPE, 0)) != 0) { - volume_set_error(gettext("devconfig_t nvlist_alloc() failed\n")); - free_devconfig(*devconfig); - return (error); - } - - if ((error = devconfig_set_type(*devconfig, type)) != 0) { - free_devconfig(*devconfig); - return (error); - } - - return (0); -} - -/* - * Free memory (recursively) allocated to a devconfig_t struct - * - * @param arg - * pointer to the devconfig_t to be freed - */ -void -free_devconfig( - void *arg) -{ - devconfig_t *devconfig = (devconfig_t *)arg; - - if (devconfig == NULL) { - return; - } - - /* Free the attributes nvlist */ - if (devconfig->attributes != NULL) { - nvlist_free(devconfig->attributes); - } - - /* Free available devices */ - if (devconfig->available != NULL) { - free_string_array(devconfig->available); - } - - /* Free unavailable devices */ - if (devconfig->unavailable != NULL) { - free_string_array(devconfig->unavailable); - } - - /* Free the components */ - if (devconfig->components != NULL) { - dlist_free_items(devconfig->components, free_devconfig); - } - - /* Free the devconfig itself */ - free(devconfig); -} - -/* - * Check the type of the given device. - * - * @param device - * the device whose type to check - * - * @param type - * the type of the device against which to compare - * - * @return B_TRUE if the device is of the given type, B_FALSE - * otherwise - */ -boolean_t -devconfig_isA( - devconfig_t *device, - component_type_t type) -{ - component_type_t curtype; - - if (device == NULL) { - return (B_FALSE); - } - - if (devconfig_get_type(device, &curtype) != 0) { - return (B_FALSE); - } - - if (curtype != type) { - return (B_FALSE); - } - - return (B_TRUE); -} - -/* - * Get the first component of the given type from the given - * devconfig_t. Create the component if create is B_TRUE. - * - * @return ENOENT - * if the requested component does not exist and its - * creation was not requested - * - * @return 0 - * if the requested component exists or was created - * - * @return non-zero - * if the requested component did not exist and could not - * be created - */ -int -devconfig_get_component( - devconfig_t *device, - component_type_t type, - devconfig_t **component, - boolean_t create) -{ - dlist_t *list; - int error = 0; - char *typestr = devconfig_type_to_str(type); - - oprintf(OUTPUT_DEBUG, gettext("Searching for singleton %s\n"), typestr); - - /* For each component of this device... */ - for (list = devconfig_get_components(device); - list != NULL; list = list->next) { - - *component = (devconfig_t *)list->obj; - - /* Is this subcomponent an instance of the given type? */ - if (*component != NULL && devconfig_isA(*component, type)) { - oprintf(OUTPUT_DEBUG, gettext("Found %s\n"), typestr); - return (0); - } - } - - /* No component found */ - error = ENOENT; - *component = NULL; - - oprintf(OUTPUT_DEBUG, gettext("%s not found\n"), typestr); - - if (create == B_TRUE) { - oprintf(OUTPUT_DEBUG, gettext("Creating %s\n"), typestr); - - /* - * An existing singleton component of the given type was - * not found under the given disk set. So, create one. - */ - if ((error = new_devconfig(component, type)) == 0) { - /* Attach new component to given device */ - devconfig_set_components( - device, dlist_append(dlist_new_item(*component), - devconfig_get_components(device), AT_TAIL)); - } - } - - return (error); -} - -/* - * Set the available devices for use in creating this device - * - * @param device - * a devconfig_t representing the device to modify - * - * @param available - * A NULL-terminated array of device names - */ -void -devconfig_set_available( - devconfig_t *device, - char **available) -{ - device->available = available; -} - -/* - * Get the available devices for use in creating this device - * - * @param device - * a devconfig_t representing the device to examine - * - * @return available - * A NULL-terminated array of device names - */ -char ** -devconfig_get_available( - devconfig_t *device) -{ - return (device->available); -} - -/* - * Set the unavailable devices which may not be used in creating this - * device - * - * @param device - * a devconfig_t representing the device to modify - * - * @param available - * A NULL-terminated array of device names - */ -void -devconfig_set_unavailable( - devconfig_t *device, - char **unavailable) -{ - device->unavailable = unavailable; -} - -/* - * Get the unavailable devices for use in creating this device - * - * @param device - * a devconfig_t representing the device to examine - * - * @return unavailable - * A NULL-terminated array of device names - */ -char ** -devconfig_get_unavailable( - devconfig_t *device) -{ - return (device->unavailable); -} - -/* - * Set the subcomponent devices of a given device - * - * @param device - * a devconfig_t representing the device to examine - * - * @param components - * A dlist_t containing devconfig_t devices - */ -void -devconfig_set_components( - devconfig_t *device, - dlist_t *components) -{ - device->components = components; -} - -/* - * Get the subcomponent devices of a given device - * - * @param device - * a devconfig_t representing the device to examine - * - * @return A dlist_t containing devconfig_t devices - */ -dlist_t * -devconfig_get_components( - devconfig_t *device) -{ - return (device->components); -} - -/* - * Set the device name - * - * @param device - * a devconfig_t representing the device to modify - * - * @param name - * the value to set as the device name - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -devconfig_set_name( - devconfig_t *device, - char *name) -{ - return (set_string(device->attributes, ATTR_NAME, name)); -} - -/* - * Set the disk set name - * - * @param diskset - * a devconfig_t representing the diskset to modify - * - * @param name - * the value to set as the device name - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -devconfig_set_diskset_name( - devconfig_t *diskset, - char *name) -{ - md_error_t error = mdnullerror; - - /* Verify syntax of disk set name */ - if (meta_set_checkname(name, &error)) { - volume_set_error(gettext("invalid disk set name: %s"), name); - return (-1); - } - - return (devconfig_set_name(diskset, name)); -} - -/* - * Set the device name - * - * @param hsp - * a devconfig_t representing the hsp to modify - * - * @param name - * the value to set as the device name - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -devconfig_set_hsp_name( - devconfig_t *hsp, - char *name) -{ - /* Validate name */ - if (!is_hspname(name)) { - volume_set_error(gettext("invalid hot spare pool name: %s"), name); - return (-1); - } - - return (devconfig_set_name(hsp, name)); -} - -/* - * Set the device name - * - * @param volume - * a devconfig_t representing the volume to modify - * - * @param name - * the value to set as the device name - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -devconfig_set_volume_name( - devconfig_t *volume, - char *name) -{ - /* Validate name */ - if (!is_metaname(name)) { - volume_set_error(gettext("invalid volume name: %s"), name); - return (-1); - } - - return (devconfig_set_name(volume, name)); -} - -/* - * Get the device name - * - * @param volume - * a devconfig_t representing the volume to examine - * - * @param name - * RETURN: the device name - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -devconfig_get_name( - devconfig_t *device, - char **name) -{ - int error = get_string(device->attributes, ATTR_NAME, name); - - /* Convert ENOENT to ERR_ATTR_UNSET for a custom error message */ - if (error == ENOENT) { - volume_set_error(gettext("device name not set")); - error = ERR_ATTR_UNSET; - } - - return (error); -} - -/* - * Set the device type - * - * @param device - * a devconfig_t representing the device to modify - * - * @param type - * the value to set as the device type - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -devconfig_set_type( - devconfig_t *device, - component_type_t type) -{ - return (set_uint16(device->attributes, ATTR_TYPE, (uint16_t)type)); -} - -/* - * Get the device type - * - * @param device - * a devconfig_t representing the device to examine - * - * @param type - * RETURN: the device type - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -devconfig_get_type( - devconfig_t *device, - component_type_t *type) -{ - uint16_t val; - int error = get_uint16(device->attributes, ATTR_TYPE, &val); - - switch (error) { - /* Convert ENOENT to ERR_ATTR_UNSET for a custom error message */ - case ENOENT: - volume_set_error(gettext("device type not set")); - error = ERR_ATTR_UNSET; - break; - - /* Success */ - case 0: - *type = (component_type_t)val; - } - - return (error); -} - -/* - * Set the device size (for volume, mirror, stripe, concat) in bytes - * - * Note that size in bytes in a 64-bit field cannot hold the size that - * can be accessed in a 16 byte CDB. Since CDBs operate on blocks, - * the max capacity is 2^73 bytes with 512 byte blocks. - * - * @param device - * a devconfig_t representing the device to modify - * - * @param size_in_bytes - * the value to set as the device size in bytes - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -devconfig_set_size( - devconfig_t *device, - uint64_t size_in_bytes) -{ - - /* Validate against limits */ - /* LINTED -- MIN_SIZE may be 0 */ - if (size_in_bytes < MIN_SIZE) { - volume_set_error(gettext("size (in bytes) too small: %llu"), - (unsigned long long)size_in_bytes); - return (-1); - } - - return (set_uint64(device->attributes, - ATTR_SIZEINBYTES, size_in_bytes)); -} - -/* - * Get the device size (for volume, mirror, stripe, concat) in bytes - * - * Note that size in bytes in a 64-bit field cannot hold the size that - * can be accessed in a 16 byte CDB. Since CDBs operate on blocks, - * the max capacity is 2^73 bytes with 512 byte blocks. - * - * @param device - * a devconfig_t representing the device to examine - * - * @param size_in_bytes - * RETURN: the device size in bytes - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -devconfig_get_size( - devconfig_t *device, - uint64_t *size_in_bytes) -{ - int error = get_uint64( - device->attributes, ATTR_SIZEINBYTES, size_in_bytes); - - /* Convert ENOENT to ERR_ATTR_UNSET for a custom error message */ - if (error == ENOENT) { - volume_set_error(gettext("size (in bytes) not set")); - error = ERR_ATTR_UNSET; - } - - return (error); -} - -/* - * Set the device size in blocks - * - * @param device - * a devconfig_t representing the device to modify - * - * @param type - * the value to set as the device size in blocks - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -devconfig_set_size_in_blocks( - devconfig_t *device, - uint64_t size_in_blocks) -{ - /* Validate against limits */ - /* LINTED -- MIN_SIZE_IN_BLOCKS may be 0 */ - if (size_in_blocks < MIN_SIZE_IN_BLOCKS) { - volume_set_error(gettext("size (in blocks) too small: %llu"), - (unsigned long long)size_in_blocks); - return (-1); - } - - return (set_uint64(device->attributes, - ATTR_SIZEINBLOCKS, size_in_blocks)); -} - -/* - * Get the device size in blocks - * - * @param device - * a devconfig_t representing the device to examine - * - * @param size_in_blocks - * RETURN: the device size in blocks - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -devconfig_get_size_in_blocks( - devconfig_t *device, - uint64_t *size_in_blocks) -{ - int error = get_uint64( - device->attributes, ATTR_SIZEINBLOCKS, size_in_blocks); - - /* Convert ENOENT to ERR_ATTR_UNSET for a custom error message */ - if (error == ENOENT) { - volume_set_error(gettext("size (in blocks) not set")); - error = ERR_ATTR_UNSET; - } - - return (error); -} - -/* - * Set the the slice index - * - * @param slice - * a devconfig_t representing the slice to modify - * - * @param index - * the value to set as the the slice index - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -devconfig_set_slice_index( - devconfig_t *slice, - uint16_t index) -{ - return (set_uint16(slice->attributes, ATTR_SLICE_INDEX, index)); -} - -/* - * Get the slice index - * - * @param device - * a devconfig_t representing the device to examine - * - * @param index - * RETURN: the slice index - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -devconfig_get_slice_index( - devconfig_t *slice, - uint16_t *index) -{ - int error = get_uint16(slice->attributes, ATTR_SLICE_INDEX, index); - - /* Convert ENOENT to ERR_ATTR_UNSET for a custom error message */ - if (error == ENOENT) { - volume_set_error(gettext("slice index not set")); - error = ERR_ATTR_UNSET; - } - - return (error); -} - -/* - * Set the the slice start block - * - * @param slice - * a devconfig_t representing the slice to modify - * - * @param start_block - * the value to set as the the slice start block - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -devconfig_set_slice_start_block( - devconfig_t *slice, - uint64_t start_block) -{ - return (set_uint64(slice->attributes, - ATTR_SLICE_STARTSECTOR, start_block)); -} - -/* - * Get the slice start block - * - * @param device - * a devconfig_t representing the device to examine - * - * @param start_block - * RETURN: the slice start block - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -devconfig_get_slice_start_block( - devconfig_t *slice, - uint64_t *start_block) -{ - int error = get_uint64( - slice->attributes, ATTR_SLICE_STARTSECTOR, start_block); - - /* Convert ENOENT to ERR_ATTR_UNSET for a custom error message */ - if (error == ENOENT) { - volume_set_error(gettext("slice start block not set")); - error = ERR_ATTR_UNSET; - } - - return (error); -} - -/* - * Set the number of subcomponents in mirror - * - * @param mirror - * a devconfig_t representing the mirror to modify - * - * @param nsubs - * the value to set as the number of subcomponents in - * mirror - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -devconfig_set_mirror_nsubs( - devconfig_t *mirror, - uint16_t nsubs) -{ - /* Validate against limits */ - if (nsubs < 1 || nsubs > NMIRROR) { - volume_set_error( - gettext("number of submirrors (%d) out of valid range (%d-%d)"), - nsubs, 1, NMIRROR); - return (-1); - } - - return (set_uint16(mirror->attributes, ATTR_MIRROR_NSUBMIRRORS, nsubs)); -} - -/* - * Get number of subcomponents in mirror - * - * @param device - * a devconfig_t representing the device to examine - * - * @param nsubs - * RETURN: number of subcomponents in mirror - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -devconfig_get_mirror_nsubs( - devconfig_t *mirror, - uint16_t *nsubs) -{ - int error = get_uint16( - mirror->attributes, ATTR_MIRROR_NSUBMIRRORS, nsubs); - - /* Convert ENOENT to ERR_ATTR_UNSET for a custom error message */ - if (error == ENOENT) { - volume_set_error(gettext("number or submirrors not set")); - error = ERR_ATTR_UNSET; - } - - return (error); -} - -/* - * Set the read strategy for mirror - * - * @param mirror - * a devconfig_t representing the mirror to modify - * - * @param read - * the value to set as the read strategy for mirror - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -devconfig_set_mirror_read( - devconfig_t *mirror, - mirror_read_strategy_t read) -{ - return (set_uint16(mirror->attributes, - ATTR_MIRROR_READ, (uint16_t)read)); -} - -/* - * Get read strategy for mirror - * - * @param device - * a devconfig_t representing the device to examine - * - * @param read - * RETURN: read strategy for mirror - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -devconfig_get_mirror_read( - devconfig_t *mirror, - mirror_read_strategy_t *read) -{ - uint16_t val; - int error = get_uint16(mirror->attributes, ATTR_MIRROR_READ, &val); - - switch (error) { - /* Convert ENOENT to ERR_ATTR_UNSET for a custom error message */ - case ENOENT: - volume_set_error(gettext("mirror read strategy not set")); - error = ERR_ATTR_UNSET; - break; - - /* Success */ - case 0: - *read = (mirror_read_strategy_t)val; - } - - return (error); -} - -/* - * Set the write strategy for mirror - * - * @param mirror - * a devconfig_t representing the mirror to modify - * - * @param write - * the value to set as the write strategy for mirror - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -devconfig_set_mirror_write( - devconfig_t *mirror, - mirror_write_strategy_t write) -{ - return (set_uint16(mirror->attributes, - ATTR_MIRROR_WRITE, (uint16_t)write)); -} - -/* - * Get write strategy for mirror - * - * @param device - * a devconfig_t representing the device to examine - * - * @param write - * RETURN: write strategy for mirror - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -devconfig_get_mirror_write( - devconfig_t *mirror, - mirror_write_strategy_t *write) -{ - uint16_t val; - int error = get_uint16(mirror->attributes, ATTR_MIRROR_WRITE, &val); - - switch (error) { - /* Convert ENOENT to ERR_ATTR_UNSET for a custom error message */ - case ENOENT: - volume_set_error(gettext("mirror write strategy not set")); - error = ERR_ATTR_UNSET; - break; - - /* Success */ - case 0: - *write = (mirror_write_strategy_t)val; - } - - return (error); -} - -/* - * Set the resync pass for mirror - * - * @param mirror - * a devconfig_t representing the mirror to modify - * - * @param pass - * the value to set as the resync pass for mirror - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -devconfig_set_mirror_pass( - devconfig_t *mirror, - uint16_t pass) -{ - /* Validate against max value */ - if (pass > MD_PASS_MAX) { - volume_set_error( - gettext("mirror pass number (%d) out of valid range (0-%d)"), - pass, MD_PASS_MAX); - return (-1); - } - - return (set_uint16(mirror->attributes, ATTR_MIRROR_PASSNUM, pass)); -} - -/* - * Get resync pass for mirror - * - * @param device - * a devconfig_t representing the device to examine - * - * @param pass - * RETURN: resync pass for mirror - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -devconfig_get_mirror_pass( - devconfig_t *mirror, - uint16_t *pass) -{ - int error = get_uint16(mirror->attributes, ATTR_MIRROR_PASSNUM, pass); - - /* Convert ENOENT to ERR_ATTR_UNSET for a custom error message */ - if (error == ENOENT) { - volume_set_error(gettext("mirror pass number not set")); - error = ERR_ATTR_UNSET; - } - - return (error); -} - -/* - * Set the minimum number of components in stripe - * - * @param stripe - * a devconfig_t representing the stripe to modify - * - * @param mincomp - * the value to set as the minimum number of components - * in stripe - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -devconfig_set_stripe_mincomp( - devconfig_t *stripe, - uint16_t mincomp) -{ - /* Validate against minimum value */ - if (mincomp < MIN_NSTRIPE_COMP) { - volume_set_error(gettext( - "minimum stripe components (%d) below minimum allowable (%d)"), - mincomp, MIN_NSTRIPE_COMP); - return (-1); - } - - return (set_uint16(stripe->attributes, ATTR_STRIPE_MINCOMP, mincomp)); -} - -/* - * Get minimum number of components in stripe - * - * @param device - * a devconfig_t representing the device to examine - * - * @param mincomp - * RETURN: minimum number of components in stripe - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -devconfig_get_stripe_mincomp( - devconfig_t *stripe, - uint16_t *mincomp) -{ - int error = get_uint16( - stripe->attributes, ATTR_STRIPE_MINCOMP, mincomp); - - /* Convert ENOENT to ERR_ATTR_UNSET for a custom error message */ - if (error == ENOENT) { - volume_set_error( - gettext("minimum number of stripe components not set")); - error = ERR_ATTR_UNSET; - } - - return (error); -} - -/* - * Set the maximum number of components in stripe - * - * @param stripe - * a devconfig_t representing the stripe to modify - * - * @param maxcomp - * the value to set as the maximum number of components - * in stripe - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -devconfig_set_stripe_maxcomp( - devconfig_t *stripe, - uint16_t maxcomp) -{ - /* Validate against minimum value */ - if (maxcomp < MIN_NSTRIPE_COMP) { - volume_set_error(gettext( - "maximum stripe components (%d) below minimum allowable (%d)"), - maxcomp, MIN_NSTRIPE_COMP); - return (-1); - } - - return (set_uint16(stripe->attributes, ATTR_STRIPE_MAXCOMP, maxcomp)); -} - -/* - * Get maximum number of components in stripe - * - * @param device - * a devconfig_t representing the device to examine - * - * @param maxcomp - * RETURN: maximum number of components in stripe - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -devconfig_get_stripe_maxcomp( - devconfig_t *stripe, - uint16_t *maxcomp) -{ - int error = get_uint16( - stripe->attributes, ATTR_STRIPE_MAXCOMP, maxcomp); - - /* Convert ENOENT to ERR_ATTR_UNSET for a custom error message */ - if (error == ENOENT) { - volume_set_error( - gettext("maximum number of stripe components not set")); - error = ERR_ATTR_UNSET; - } - - return (error); -} - -/* - * Set the stripe interlace - * - * @param stripe - * a devconfig_t representing the stripe to modify - * - * @param interlace - * the value to set as the stripe interlace - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -devconfig_set_stripe_interlace( - devconfig_t *stripe, - uint64_t interlace) -{ - if (interlace < MININTERLACE || interlace > MAXINTERLACE) { - char *intstr = NULL; - char *minstr = NULL; - char *maxstr = NULL; - - /* Get string representations of interlaces */ - bytes_to_sizestr(interlace, &intstr, universal_units, B_FALSE); - bytes_to_sizestr(MININTERLACE, &minstr, universal_units, B_FALSE); - bytes_to_sizestr(MAXINTERLACE, &maxstr, universal_units, B_FALSE); - - volume_set_error( - gettext("interlace (%s) out of valid range (%s - %s)"), - intstr, minstr, maxstr); - - free(intstr); - free(minstr); - free(maxstr); - - return (-1); - } - - return (set_uint64(stripe->attributes, - ATTR_STRIPE_INTERLACE, interlace)); -} - -/* - * Get stripe interlace - * - * @param device - * a devconfig_t representing the device to examine - * - * @param interlace - * RETURN: stripe interlace - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -devconfig_get_stripe_interlace( - devconfig_t *stripe, - uint64_t *interlace) -{ - int error = get_uint64( - stripe->attributes, ATTR_STRIPE_INTERLACE, interlace); - - /* Convert ENOENT to ERR_ATTR_UNSET for a custom error message */ - if (error == ENOENT) { - volume_set_error(gettext("stripe interlace not set")); - error = ERR_ATTR_UNSET; - } - - return (error); -} - -/* - * Set the redundancy level for a volume. - * - * @param volume - * a devconfig_t representing the volume to modify - * - * @param rlevel - * If 0, a stripe will be created. If > 0, a mirror with - * this number of submirrors will be created. - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -devconfig_set_volume_redundancy_level( - devconfig_t *volume, - uint16_t rlevel) -{ - /* Validate against limits */ - if (rlevel > NMIRROR) { - volume_set_error(gettext( - "volume redundancy level (%d) out of valid range (%d-%d)"), - rlevel, 0, NMIRROR); - return (-1); - } - - return (set_uint16(volume->attributes, ATTR_VOLUME_REDUNDANCY, rlevel)); -} - -/* - * Get the redundancy level for a volume. - * - * @param device - * a devconfig_t representing the device to examine - * - * @param rlevel - * RETURN: the redundancy level for a volume - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -devconfig_get_volume_redundancy_level( - devconfig_t *volume, - uint16_t *rlevel) -{ - int error = get_uint16( - volume->attributes, ATTR_VOLUME_REDUNDANCY, rlevel); - - /* Convert ENOENT to ERR_ATTR_UNSET for a custom error message */ - if (error == ENOENT) { - volume_set_error(gettext("volume redundancy level not set")); - error = ERR_ATTR_UNSET; - } - - return (error); -} - -/* - * Set the number of paths in volume - * - * @param volume - * a devconfig_t representing the volume to modify - * - * @param npaths - * the value to set as the number of paths in volume - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -devconfig_set_volume_npaths( - devconfig_t *volume, - uint16_t npaths) -{ - /* Validate against limits */ - if (npaths < MIN_NDATAPATHS || npaths > MAX_NDATAPATHS) { - volume_set_error( - gettext("number of data paths (%d) out of valid range (%d-%d)"), - npaths, MIN_NDATAPATHS, MAX_NDATAPATHS); - return (-1); - } - - return (set_uint16(volume->attributes, ATTR_VOLUME_DATAPATHS, npaths)); -} - -/* - * Get number of paths in volume - * - * @param device - * a devconfig_t representing the device to examine - * - * @param npaths - * RETURN: number of paths in volume - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -devconfig_get_volume_npaths( - devconfig_t *volume, - uint16_t *npaths) -{ - int error = get_uint16( - volume->attributes, ATTR_VOLUME_DATAPATHS, npaths); - - /* Convert ENOENT to ERR_ATTR_UNSET for a custom error message */ - if (error == ENOENT) { - volume_set_error(gettext("number of data paths not set")); - error = ERR_ATTR_UNSET; - } - - return (error); -} - -/* - * Set the HSP creation option (for volume, stripe, concat, mirror) - * - * @param volume - * a devconfig_t representing the volume to modify - * - * @param usehsp - * the value to set as the HSP creation option - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -devconfig_set_volume_usehsp( - devconfig_t *volume, - boolean_t usehsp) -{ - return (set_boolean(volume->attributes, ATTR_VOLUME_USEHSP, usehsp)); -} - -/* - * Get HSP creation option (for volume, stripe, concat, mirror) - * - * @param device - * a devconfig_t representing the device to examine - * - * @param usehsp - * RETURN: HSP creation option (for volume, stripe, - * concat, mirror) - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -devconfig_get_volume_usehsp( - devconfig_t *volume, - boolean_t *usehsp) -{ - int error = get_boolean( - volume->attributes, ATTR_VOLUME_USEHSP, usehsp); - - /* Convert ENOENT to ERR_ATTR_UNSET for a custom error message */ - if (error == ENOENT) { - volume_set_error(gettext("volume usehsp not set")); - error = ERR_ATTR_UNSET; - } - - return (error); -} - -/* - * Get the string representation of the volume's type - * - * @param type - * a valid component_type_t - * - * @return an internationalized string representing the given - * type - */ -char * -devconfig_type_to_str( - component_type_t type) -{ - char *str; - - switch (type) { - case TYPE_CONCAT: str = gettext("Concat"); break; - case TYPE_CONTROLLER: str = gettext("Controller"); break; - case TYPE_DISKSET: str = gettext("Diskset"); break; - case TYPE_DRIVE: str = gettext("Disk"); break; - case TYPE_EXTENT: str = gettext("Extent"); break; - case TYPE_HOST: str = gettext("Host"); break; - case TYPE_HSP: str = gettext("Hot Spare Pool"); break; - case TYPE_MIRROR: str = gettext("Mirror"); break; - case TYPE_RAID5: str = gettext("Raid5"); break; - case TYPE_SLICE: str = gettext("Slice"); break; - case TYPE_SOFTPART: str = gettext("Soft Partition"); break; - case TYPE_STRIPE: str = gettext("Stripe"); break; - case TYPE_TRANS: str = gettext("Trans"); break; - case TYPE_VOLUME: str = gettext("Volume"); break; - default: - case TYPE_UNKNOWN: str = gettext("Unknown"); break; - } - - return (str); -} - -/* - * Get the string representation of the mirror's read strategy - * - * @param read - * a valid mirror_read_strategy_t - * - * @return an internationalized string representing the given - * read strategy - */ -char * -devconfig_read_strategy_to_str( - mirror_read_strategy_t read) -{ - char *str; - - switch (read) { - case MIRROR_READ_ROUNDROBIN: str = gettext("ROUNDROBIN"); break; - case MIRROR_READ_GEOMETRIC: str = gettext("GEOMETRIC"); break; - case MIRROR_READ_FIRST: str = gettext("FIRST"); break; - default: str = ""; - } - - return (str); -} - -/* - * Get the string representation of the mirror's write strategy - * - * @param write - * a valid mirror_write_strategy_t - * - * @return an internationalized string representing the given - * write strategy - */ -char * -devconfig_write_strategy_to_str( - mirror_write_strategy_t write) -{ - char *str; - - switch (write) { - case MIRROR_WRITE_PARALLEL: str = gettext("PARALLEL"); break; - case MIRROR_WRITE_SERIAL: str = gettext("SERIAL"); break; - default: str = ""; - } - - return (str); -} - -#ifdef DEBUG -/* - * Dump the contents of a devconfig_t struct to stdout. - * - * @param device - * the devconfig_t to examine - * - * @param prefix - * a prefix string to print before each line - */ -void -devconfig_dump( - devconfig_t *device, - char *prefix) -{ - dlist_t *comps = NULL; - char **array = NULL; - char *str = NULL; - int i = 0; - - component_type_t type = TYPE_UNKNOWN; - boolean_t bool = B_FALSE; - uint16_t val16 = 0; - uint64_t val64 = 0; - mirror_read_strategy_t read; - mirror_write_strategy_t write; - - if (device == NULL) { - return; - } - - /* Type */ - if (devconfig_get_type(device, &type) == 0) { - printf("%s%s\n", prefix, devconfig_type_to_str(type)); - } - - /* Name */ - if (devconfig_get_name(device, &str) == 0) { - printf("%s name: %s\n", prefix, str); - } - - /* Size in bytes */ - if (devconfig_get_size(device, &val64) == 0) { - printf("%s size in bytes: %llu\n", prefix, val64); - } - - /* Size in blocks */ - if (devconfig_get_size_in_blocks(device, &val64) == 0) { - printf("%s size in blocks: %llu\n", prefix, val64); - } - - /* Use HSP */ - if (devconfig_get_volume_usehsp(device, &bool) == 0) { - printf("%s usehsp: %s\n", prefix, bool? "TRUE" : "FALSE"); - } - - switch (type) { - case TYPE_VOLUME: - /* Volume rlevel */ - if (devconfig_get_volume_redundancy_level( - device, &val16) == 0) { - printf("%s volume redundancy level: %d\n", prefix, val16); - } - - /* Volume npaths */ - if (devconfig_get_volume_npaths(device, &val16) == 0) { - printf("%s volume npaths: %d\n", prefix, val16); - } - break; - - case TYPE_MIRROR: - - /* Mirror nsubs */ - if (devconfig_get_mirror_nsubs(device, &val16) == 0) { - printf("%s mirror nsubs: %d\n", prefix, val16); - } - - /* Mirror read */ - if (devconfig_get_mirror_read(device, &read) == 0) { - printf("%s mirror read: %s\n", prefix, - devconfig_read_strategy_to_str(read)); - } - - /* Mirror write */ - if (devconfig_get_mirror_write(device, &write) == 0) { - printf("%s mirror write: %s\n", prefix, - devconfig_write_strategy_to_str(write)); - } - - /* Mirror pass */ - if (devconfig_get_mirror_pass(device, &val16) == 0) { - printf("%s mirror pass: %d\n", prefix, val16); - } - break; - - case TYPE_STRIPE: - /* Stripe mincomp */ - if (devconfig_get_stripe_mincomp(device, &val16) == 0) { - printf("%s stripe mincomp: %d\n", prefix, val16); - } - - /* Stripe maxcomp */ - if (devconfig_get_stripe_maxcomp(device, &val16) == 0) { - printf("%s stripe maxcomp: %d\n", prefix, val16); - } - - /* Stripe interlace */ - if (devconfig_get_stripe_interlace(device, &val64) == 0) { - printf("%s stripe interlace: %lld\n", prefix, val64); - } - break; - - case TYPE_SLICE: - /* Slice index */ - if (devconfig_get_slice_index(device, &val16) == 0) { - printf("%s slice index: %d\n", prefix, val16); - } - - /* Slice start block */ - if (devconfig_get_slice_start_block(device, &val64) == 0) { - printf("%s slice start block: %llu\n", prefix, val64); - } - break; - } - - array = devconfig_get_available(device); - if (array != NULL) { - printf("%s available:\n", prefix); - for (i = 0; array[i] != NULL; i++) { - printf("%s %s\n", prefix, array[i]); - } - } - - array = devconfig_get_unavailable(device); - if (array != NULL) { - printf("%s unavailable:\n", prefix); - for (i = 0; array[i] != NULL; i++) { - printf("%s %s\n", prefix, array[i]); - } - } - - printf("\n"); - - comps = devconfig_get_components(device); - if (comps != NULL) { - char buf[128]; - snprintf(buf, 128, "%s%s", prefix, " "); - for (; comps != NULL; comps = comps->next) { - devconfig_dump((devconfig_t *)comps->obj, buf); - } - } -} -#endif /* DEBUG */ diff --git a/usr/src/cmd/lvm/metassist/common/volume_devconfig.h b/usr/src/cmd/lvm/metassist/common/volume_devconfig.h deleted file mode 100644 index 3898554591bd..000000000000 --- a/usr/src/cmd/lvm/metassist/common/volume_devconfig.h +++ /dev/null @@ -1,998 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _VOLUME_DEVCONFIG_H -#define _VOLUME_DEVCONFIG_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include "volume_dlist.h" -#include - -/* - * String constants for XML element/attribute names. - */ -#define ELEMENT_AVAILABLE "available" -#define ELEMENT_COMMENT "comment" -#define ELEMENT_CONCAT "concat" -#define ELEMENT_DISK "disk" -#define ELEMENT_DISKSET "diskset" -#define ELEMENT_HSP "hsp" -#define ELEMENT_L10N "localization" -#define ELEMENT_MESSAGE "message" -#define ELEMENT_MIRROR "mirror" -#define ELEMENT_PARAM "param" -#define ELEMENT_SLICE "slice" -#define ELEMENT_STRIPE "stripe" -#define ELEMENT_TEXT "text" -#define ELEMENT_UNAVAILABLE "unavailable" -#define ELEMENT_VARIABLE "variable" -#define ELEMENT_VOLUME "volume" -#define ELEMENT_VOLUMECONFIG "volume-config" -#define ELEMENT_VOLUMEDEFAULTS "volume-defaults" -#define ELEMENT_VOLUMEREQUEST "volume-request" - -#define ATTR_LANG "xml:lang" -#define ATTR_MESSAGEID "msgid" -#define ATTR_MIRROR_NSUBMIRRORS "nsubmirrors" -#define ATTR_MIRROR_PASSNUM "passnum" -#define ATTR_MIRROR_READ "read" -#define ATTR_MIRROR_WRITE "write" -#define ATTR_NAME "name" -#define ATTR_SELECT "select" -#define ATTR_SIZEINBLOCKS "sizeinblocks" -#define ATTR_SIZEINBYTES "size" -#define ATTR_SLICE_INDEX "index" -#define ATTR_SLICE_STARTSECTOR "startsector" -#define ATTR_STRIPE_INTERLACE "interlace" -#define ATTR_STRIPE_MAXCOMP "maxcomp" -#define ATTR_STRIPE_MINCOMP "mincomp" -#define ATTR_TYPE "type" -#define ATTR_VOLUME_CREATE "create" -#define ATTR_VOLUME_DATAPATHS "datapaths" -#define ATTR_VOLUME_FAULTRECOVERY "faultrecovery" -#define ATTR_VOLUME_REDUNDANCY "redundancy" -#define ATTR_VOLUME_USEHSP "usehsp" - -#define NAME_L10N_MESSAGE_FILE "msgfile" -#define NAME_LANG "lang" - -/* - * Limits for attributes - */ -#define MIN_NSTRIPE_COMP 1 -#define MIN_SIZE 0 -#define MIN_SIZE_IN_BLOCKS 0 -#define MIN_NDATAPATHS 1 -#define MAX_NDATAPATHS 4 - -/* Attribute requested but not set */ -#define ERR_ATTR_UNSET -10001 - -/* - * Enumeration defining physical or logical device types - */ -typedef enum { - TYPE_UNKNOWN = 0, - TYPE_CONCAT = 1, - TYPE_CONTROLLER, - TYPE_DISKSET, - TYPE_DRIVE, - TYPE_EXTENT, - TYPE_HOST, - TYPE_HSP, - TYPE_MIRROR, - TYPE_RAID5, - TYPE_SLICE, - TYPE_SOFTPART, - TYPE_STRIPE, - TYPE_TRANS, - TYPE_VOLUME -} component_type_t; - -/* - * enumerated constants for SVM Mirror read strategies - */ -typedef enum { - MIRROR_READ_ROUNDROBIN = 0, - MIRROR_READ_GEOMETRIC, - MIRROR_READ_FIRST -} mirror_read_strategy_t; - -/* - * enumerated constants for SVM Mirror write strategies - */ -typedef enum { - MIRROR_WRITE_PARALLEL = 0, - MIRROR_WRITE_SERIAL -} mirror_write_strategy_t; - -/* - * devconfig_t - struct to hold a device configuration hierarchy - */ -typedef struct devconfig { - - /* Attributes of this device */ - nvlist_t *attributes; - - /* - * Available devices for use in construction of this device - * and its subcomponents - */ - char **available; - - /* - * Unavailable devices for use in construction of this device - * and its subcomponents - */ - char **unavailable; - - /* - * Subcomponents (devconfig_t) of this device - */ - dlist_t *components; -} devconfig_t; - -/* - * Function prototypes - */ - -/* - * Constructor: Create a devconfig_t struct. This devconfig_t must be - * freed with free_devconfig(). - * - * @param devconfig - * RETURN: a new devconfig_t - * - * @param type - * the type of devconfig_t to create - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int new_devconfig(devconfig_t **devconfig, component_type_t type); - -/* - * Free memory (recursively) allocated to a devconfig_t struct - * - * @param arg - * pointer to the devconfig_t to be freed - */ -extern void free_devconfig(void *arg); - -/* - * Check the type of the given device. - * - * @param device - * the device whose type to check - * - * @param type - * the type of the device against which to compare - * - * @return B_TRUE if the device is of the given type, B_FALSE - * otherwise - */ -extern boolean_t devconfig_isA(devconfig_t *device, component_type_t type); - -/* - * Get the first component of the given type from the given - * devconfig_t. Create the component if create is B_TRUE. - * - * @return ENOENT - * if the requested component does not exist and its - * creation was not requested - * - * @return 0 - * if the requested component exists or was created - * - * @return non-zero - * if the requested component did not exist and could not - * be created - */ -extern int devconfig_get_component(devconfig_t *device, - component_type_t type, devconfig_t **component, boolean_t create); - -/* - * Set the available devices for use in creating this device - * - * @param device - * a devconfig_t representing the device to modify - * - * @param available - * A NULL-terminated array of device names - */ -extern void devconfig_set_available(devconfig_t *device, char **available); - -/* - * Get the available devices for use in creating this device - * - * @param device - * a devconfig_t representing the device to examine - * - * @return available - * A NULL-terminated array of device names - */ -extern char ** devconfig_get_available(devconfig_t *device); - -/* - * Set the unavailable devices which may not be used in creating this - * device - * - * @param device - * a devconfig_t representing the device to modify - * - * @param available - * A NULL-terminated array of device names - */ -extern void devconfig_set_unavailable(devconfig_t *device, char **unavailable); - -/* - * Get the unavailable devices for use in creating this device - * - * @param device - * a devconfig_t representing the device to examine - * - * @return unavailable - * A NULL-terminated array of device names - */ -extern char ** devconfig_get_unavailable(devconfig_t *device); - -/* - * Set the subcomponent devices of a given device - * - * @param device - * a devconfig_t representing the device to examine - * - * @param components - * A dlist_t containing devconfig_t devices - */ -extern void devconfig_set_components(devconfig_t *device, dlist_t *components); - -/* - * Get the subcomponent devices of a given device - * - * @param device - * a devconfig_t representing the device to examine - * - * @return A dlist_t containing devconfig_t devices - */ -extern dlist_t *devconfig_get_components(devconfig_t *device); - -/* - * Set the device name - * - * @param device - * a devconfig_t representing the device to modify - * - * @param name - * the value to set as the device name - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int devconfig_set_name(devconfig_t *device, char *name); - -/* - * Set the disk set name - * - * @param diskset - * a devconfig_t representing the diskset to modify - * - * @param name - * the value to set as the device name - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int devconfig_set_diskset_name(devconfig_t *diskset, char *name); - -/* - * Set the device name - * - * @param hsp - * a devconfig_t representing the hsp to modify - * - * @param name - * the value to set as the device name - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int devconfig_set_hsp_name(devconfig_t *hsp, char *name); - -/* - * Set the device name - * - * @param volume - * a devconfig_t representing the volume to modify - * - * @param name - * the value to set as the device name - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int devconfig_set_volume_name(devconfig_t *volume, char *name); - -/* - * Get the device name - * - * @param volume - * a devconfig_t representing the volume to examine - * - * @param name - * RETURN: the device name - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int devconfig_get_name(devconfig_t *device, char **name); - -/* - * Set the device type - * - * @param device - * a devconfig_t representing the device to modify - * - * @param type - * the value to set as the device type - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int devconfig_set_type(devconfig_t *device, component_type_t type); - -/* - * Get the device type - * - * @param device - * a devconfig_t representing the device to examine - * - * @param type - * RETURN: the device type - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int devconfig_get_type(devconfig_t *device, component_type_t *type); - -/* - * Set the device size (for volume, mirror, stripe, concat) in bytes - * - * Note that size in bytes in a 64-bit field cannot hold the size that - * can be accessed in a 16 byte CDB. Since CDBs operate on blocks, - * the max capacity is 2^73 bytes with 512 byte blocks. - * - * @param device - * a devconfig_t representing the device to modify - * - * @param size_in_bytes - * the value to set as the device size in bytes - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int devconfig_set_size(devconfig_t *device, uint64_t size_in_bytes); - -/* - * Get the device size (for volume, mirror, stripe, concat) in bytes - * - * Note that size in bytes in a 64-bit field cannot hold the size that - * can be accessed in a 16 byte CDB. Since CDBs operate on blocks, - * the max capacity is 2^73 bytes with 512 byte blocks. - * - * @param device - * a devconfig_t representing the device to examine - * - * @param size_in_bytes - * RETURN: the device size in bytes - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int devconfig_get_size(devconfig_t *device, uint64_t *size_in_bytes); - -/* - * Set the device size in blocks - * - * @param device - * a devconfig_t representing the device to modify - * - * @param type - * the value to set as the device size in blocks - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int devconfig_set_size_in_blocks( - devconfig_t *device, uint64_t size_in_blocks); - -/* - * Get the device size in blocks - * - * @param device - * a devconfig_t representing the device to examine - * - * @param size_in_blocks - * RETURN: the device size in blocks - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int devconfig_get_size_in_blocks( - devconfig_t *device, uint64_t *size_in_blocks); - -/* - * Set the the slice index - * - * @param slice - * a devconfig_t representing the slice to modify - * - * @param index - * the value to set as the the slice index - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int devconfig_set_slice_index(devconfig_t *slice, uint16_t index); - -/* - * Get the slice index - * - * @param device - * a devconfig_t representing the device to examine - * - * @param index - * RETURN: the slice index - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int devconfig_get_slice_index(devconfig_t *slice, uint16_t *index); - -/* - * Set the the slice start block - * - * @param slice - * a devconfig_t representing the slice to modify - * - * @param start_block - * the value to set as the the slice start block - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int devconfig_set_slice_start_block( - devconfig_t *slice, uint64_t start_block); - -/* - * Get the slice start block - * - * @param device - * a devconfig_t representing the device to examine - * - * @param start_block - * RETURN: the slice start block - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int devconfig_get_slice_start_block( - devconfig_t *slice, uint64_t *start_block); - -/* - * Set the number of subcomponents in mirror - * - * @param mirror - * a devconfig_t representing the mirror to modify - * - * @param nsubs - * the value to set as the number of subcomponents in - * mirror - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int devconfig_set_mirror_nsubs(devconfig_t *mirror, uint16_t nsubs); - -/* - * Get number of subcomponents in mirror - * - * @param device - * a devconfig_t representing the device to examine - * - * @param nsubs - * RETURN: number of subcomponents in mirror - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int devconfig_get_mirror_nsubs(devconfig_t *mirror, uint16_t *nsubs); - -/* - * Set the read strategy for mirror - * - * @param mirror - * a devconfig_t representing the mirror to modify - * - * @param read - * the value to set as the read strategy for mirror - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int devconfig_set_mirror_read( - devconfig_t *mirror, mirror_read_strategy_t read); - -/* - * Get read strategy for mirror - * - * @param device - * a devconfig_t representing the device to examine - * - * @param read - * RETURN: read strategy for mirror - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int devconfig_get_mirror_read( - devconfig_t *mirror, mirror_read_strategy_t *read); - -/* - * Set the write strategy for mirror - * - * @param mirror - * a devconfig_t representing the mirror to modify - * - * @param write - * the value to set as the write strategy for mirror - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int devconfig_set_mirror_write( - devconfig_t *mirror, mirror_write_strategy_t write); - -/* - * Get write strategy for mirror - * - * @param device - * a devconfig_t representing the device to examine - * - * @param write - * RETURN: write strategy for mirror - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int devconfig_get_mirror_write( - devconfig_t *mirror, mirror_write_strategy_t *write); - -/* - * Set the resync pass for mirror - * - * @param mirror - * a devconfig_t representing the mirror to modify - * - * @param pass - * the value to set as the resync pass for mirror - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int devconfig_set_mirror_pass(devconfig_t *mirror, uint16_t pass); - -/* - * Get resync pass for mirror - * - * @param device - * a devconfig_t representing the device to examine - * - * @param pass - * RETURN: resync pass for mirror - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int devconfig_get_mirror_pass(devconfig_t *mirror, uint16_t *pass); - -/* - * Set the minimum number of components in stripe - * - * @param stripe - * a devconfig_t representing the stripe to modify - * - * @param mincomp - * the value to set as the minimum number of components - * in stripe - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int devconfig_set_stripe_mincomp(devconfig_t *stripe, uint16_t mincomp); - -/* - * Get minimum number of components in stripe - * - * @param device - * a devconfig_t representing the device to examine - * - * @param mincomp - * RETURN: minimum number of components in stripe - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int devconfig_get_stripe_mincomp(devconfig_t *stripe, uint16_t *mincomp); - -/* - * Set the maximum number of components in stripe - * - * @param stripe - * a devconfig_t representing the stripe to modify - * - * @param maxcomp - * the value to set as the maximum number of components - * in stripe - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int devconfig_set_stripe_maxcomp(devconfig_t *stripe, uint16_t maxcomp); - -/* - * Get maximum number of components in stripe - * - * @param device - * a devconfig_t representing the device to examine - * - * @param maxcomp - * RETURN: maximum number of components in stripe - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int devconfig_get_stripe_maxcomp(devconfig_t *stripe, uint16_t *maxcomp); - -/* - * Set the stripe interlace - * - * @param stripe - * a devconfig_t representing the stripe to modify - * - * @param interlace - * the value to set as the stripe interlace - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int devconfig_set_stripe_interlace( - devconfig_t *stripe, uint64_t interlace); - -/* - * Get stripe interlace - * - * @param device - * a devconfig_t representing the device to examine - * - * @param interlace - * RETURN: stripe interlace - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int devconfig_get_stripe_interlace( - devconfig_t *stripe, uint64_t *interlace); - -/* - * Set the redundancy level for a volume. - * - * @param volume - * a devconfig_t representing the volume to modify - * - * @param rlevel - * If 0, a stripe will be created. If > 0, a mirror with - * this number of submirrors will be created. - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int devconfig_set_volume_redundancy_level( - devconfig_t *volume, uint16_t rlevel); - -/* - * Get the redundancy level for a volume. - * - * @param device - * a devconfig_t representing the device to examine - * - * @param rlevel - * RETURN: the redundancy level for a volume - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int devconfig_get_volume_redundancy_level( - devconfig_t *volume, uint16_t *rlevel); - -/* - * Set the number of paths in volume - * - * @param volume - * a devconfig_t representing the volume to modify - * - * @param npaths - * the value to set as the number of paths in volume - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int devconfig_set_volume_npaths(devconfig_t *volume, uint16_t npaths); - -/* - * Get number of paths in volume - * - * @param device - * a devconfig_t representing the device to examine - * - * @param npaths - * RETURN: number of paths in volume - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int devconfig_get_volume_npaths(devconfig_t *volume, uint16_t *npaths); - -/* - * Set the HSP creation option (for volume, stripe, concat, mirror) - * - * @param volume - * a devconfig_t representing the volume to modify - * - * @param usehsp - * the value to set as the HSP creation option - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int devconfig_set_volume_usehsp(devconfig_t *volume, boolean_t usehsp); - -/* - * Get HSP creation option (for volume, stripe, concat, mirror) - * - * @param device - * a devconfig_t representing the device to examine - * - * @param usehsp - * RETURN: HSP creation option (for volume, stripe, - * concat, mirror) - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int devconfig_get_volume_usehsp(devconfig_t *volume, boolean_t *usehsp); - -/* - * Get the string representation of the volume's type - * - * @param type - * a valid component_type_t - * - * @return an internationalized string representing the given - * type - */ -extern char *devconfig_type_to_str(component_type_t type); - -/* - * Get the string representation of the mirror's read strategy - * - * @param read - * a valid mirror_read_strategy_t - * - * @return an internationalized string representing the given - * read strategy - */ -extern char *devconfig_read_strategy_to_str(mirror_read_strategy_t read); - -/* - * Get the string representation of the mirror's write strategy - * - * @param write - * a valid mirror_write_strategy_t - * - * @return an internationalized string representing the given - * write strategy - */ -extern char *devconfig_write_strategy_to_str(mirror_write_strategy_t write); - -#ifdef DEBUG -/* - * Dump the contents of a devconfig_t struct to stdout. - * - * @param device - * the devconfig_t to examine - * - * @param prefix - * a prefix string to print before each line - */ -extern void devconfig_dump(devconfig_t *device, char *prefix); -#endif /* DEBUG */ - -#ifdef __cplusplus -} -#endif - -#endif /* _VOLUME_DEVCONFIG_H */ diff --git a/usr/src/cmd/lvm/metassist/common/volume_dlist.c b/usr/src/cmd/lvm/metassist/common/volume_dlist.c deleted file mode 100644 index f836c2bc970b..000000000000 --- a/usr/src/cmd/lvm/metassist/common/volume_dlist.c +++ /dev/null @@ -1,512 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include -#include -#include -#include - -#include "volume_dlist.h" - -#define _volume_dlist_C - -/* - * public constant definitions - */ -const boolean_t ASCENDING = TRUE; /* list ordering */ -const boolean_t DESCENDING = FALSE; -const boolean_t AT_TAIL = TRUE; /* list insertion location */ -const boolean_t AT_HEAD = FALSE; - -/* - * determine if the list contains an item - * that points at the object - */ -boolean_t -dlist_contains( - dlist_t *list, - void *obj, - int (compare)(void *, void *)) -{ - return (dlist_find(list, obj, compare) != NULL); -} - -/* - * locate the item in the list that points at the object - */ -dlist_t * -dlist_find( - dlist_t *list, - void *obj, - int (compare)(void *, void *)) -{ - dlist_t *iter; - - for (iter = list; iter != NULL; iter = iter->next) { - if ((compare)(obj, iter->obj) == 0) { - return (iter); - } - } - - return (NULL); -} - -/* - * insert item into list in the desired order (ascending or descending) - * using the comparison function provided. - * - * In the for loop, iter marks current position in the list - * and item is the item to be inserted. - * - * Iterate the list and find the correct place to insert temp. - * - * if (ascending && compare(item, iter) <= 0 || - * (descending && compare(item, iter) >= 0) - * item goes before iter - * else - * item goes after iter - */ -dlist_t * -dlist_insert_ordered( - dlist_t *item, - dlist_t *list, - boolean_t ascending, - int (compare)(void *, void *)) -{ - dlist_t *head = NULL; - dlist_t *iter = NULL; - int result = 0; - - if (list == NULL) { - - head = item; - - } else { - - head = list; - - for (iter = list; iter != NULL; iter = iter->next) { - - result = (compare)(item->obj, iter->obj); - - if ((ascending && (result <= 0)) || - (!ascending && (result >= 0))) { - - if (iter == head) { - head = item; - item->next = iter; - iter->prev = item; - } else { - item->prev = iter->prev; - item->prev->next = item; - iter->prev = item; - item->next = iter; - } - break; - } - - if (iter->next == NULL) { - /* end of list, so item becomes the new end */ - iter->next = item; - item->prev = iter; - break; - } - } - } - - return (head); -} - -/* - * Remove the first node pointing to same content as item from list, - * clear it's next and prev pointers, return new list head. - * - * The caller is responsible for freeing the removed item if it is no - * longer needed. - * - * The comparison function should be of the form: - * - * int compare(void *obj1, void* obj2); - * - * When called, obj1 will be the object passed into - * dlist_remove_equivalent_item and obj2 will be an object pointed to - * by an item in the list. - * - * The function should return 0 if the two objects are equivalent The - * function should return nonzero otherwise - * - * @param list - * the list containing the item to remove - * - * @param obj - * the object with which to compare each item - * - * @param compare - * the comparison function, passed obj and the obj member - * of each item, to return 0 if item should be removed - * - * @param removed - * RETURN: the removed item, or NULL if none was found - * - * @return the first element of the resulting list - */ -dlist_t * -dlist_remove_equivalent_item( - dlist_t *list, - void *obj, - int (compare)(void *, void *), - dlist_t **removed) -{ - dlist_t *item; - - *removed = NULL; - - if (list == NULL) { - return (list); - } - - item = dlist_find(list, obj, compare); - if (item == NULL) { - return (list); - } - - *removed = item; - - return (dlist_remove(item)); -} - -/* - * Remove an item from its list. Return the resulting list. - * - * @param item - * the item to remove, with prev and next pointers - * set to NULL - * - * @return the first element of the resulting list - */ -dlist_t * -dlist_remove( - dlist_t *item) -{ - dlist_t *head = NULL; - - if (item != NULL) { - if (item->next != NULL) { - item->next->prev = item->prev; - head = item->next; - } - - if (item->prev != NULL) { - item->prev->next = item->next; - head = item->prev; - } - - item->next = NULL; - item->prev = NULL; - - /* Find head of list */ - for (; head != NULL && head->prev != NULL; head = head->prev); - } - - return (head); -} - -/* - * append item to list, either beginning or end - */ -dlist_t * -dlist_append( - dlist_t *item, - dlist_t *list, - boolean_t attail) -{ - dlist_t *head = list; - - if (list == NULL) { - - head = item; - - } else if (item == NULL) { - - head = list; - - } else if (attail) { - - dlist_t *iter; - - /* append to end */ - for (iter = head; iter->next != NULL; iter = iter->next); - - iter->next = item; - item->prev = iter; - - } else { - /* insert at begining */ - item->next = head; - head->prev = item; - head = item; - } - - return (head); -} - -/* - * Create a dlist_t element for the given object and append to list. - * - * @param object - * the obj member of the dlist_t element to be created - * - * @param list - * the list to which to append the new dlist_t element - * - * @param attail - * whether to append at the beginning (AT_HEAD) or end - * (AT_TAIL) of the list - * - * @return 0 - * if successful - * - * @return ENOMEM - * if a dlist_t could not be allocated - */ -int -dlist_append_object( - void *object, - dlist_t **list, - boolean_t attail) -{ - dlist_t *item = dlist_new_item(object); - - if (item == NULL) { - return (ENOMEM); - } - - *list = dlist_append(item, *list, attail); - - return (0); -} - -/* - * Appends list2 to the end of list1. - * - * Returns the resulting list. - */ -dlist_t * -dlist_append_list( - dlist_t *list1, - dlist_t *list2) -{ - dlist_t *iter; - - if (list1 == NULL) { - return (list2); - } - - if (list2 != NULL) { - /* Find last element of list1 */ - for (iter = list1; iter->next != NULL; iter = iter->next); - - iter->next = list2; - list2->prev = iter; - } - - return (list1); -} - -/* - * compute number of items in list - */ -int -dlist_length( - dlist_t *list) -{ - dlist_t *iter; - int length = 0; - - for (iter = list; iter != NULL; iter = iter->next) - ++length; - - return (length); -} - -/* - * Allocate a new dlist_t structure and initialize the opaque object - * pointer the input object. - * - * @return A new dlist_t structure for the given object, or NULL - * if the memory could not be allocated. - */ -dlist_t * -dlist_new_item( - void *obj) -{ - dlist_t *item = (dlist_t *)calloc(1, sizeof (dlist_t)); - - if (item != NULL) { - item->obj = obj; - } - - return (item); -} - -/* - * Traverse the list pointed to by head and free each - * list node. If freefunc is non-NULL, call freefunc - * for each node's object. - */ -void -dlist_free_items( - dlist_t *head, - void (freefunc(void *))) -{ - while (head != NULL) { - dlist_t *item = head; - head = head->next; - - if (freefunc != NULL) { - freefunc(item->obj); - } - - free((void *) item); - } -} - -/* - * Order the given list such that the number of similar elements - * adjacent to each other are minimized. - * - * The algorithm is: - * - * 1. Sort similar items into categories. Two elements are considered - * similar if the given compare function returns 0. - * - * 2. Create a new list by iterating through each category and - * selecting an element from the category with the most elements. - * Avoid choosing an element from the last category chosen. - * - * @param list - * the list to order - * - * @param compare - * the comparison function, passed the obj members - * of two items, to return 0 if the items can be - * considered similar - * - * @return the first element of the resulting list - */ -dlist_t * -dlist_separate_similar_elements( - dlist_t *list, - int(compare)(void *, void *)) -{ - dlist_t **categories = NULL; - dlist_t *item; - int ncategories = 0; - int max_elements; - int lastcat; - - /* - * First, sort like items into categories, according to - * the passed-in compare function - */ - for (item = list; item != NULL; ) { - dlist_t *removed; - - /* Remove this item from the list */ - list = dlist_remove(item); - - /* Create new category */ - categories = (dlist_t **)realloc( - categories, ++ncategories * sizeof (dlist_t *)); - categories[ncategories - 1] = item; - - /* Add like items to same category */ - list = dlist_remove_equivalent_item( - list, item->obj, compare, &removed); - while (removed != NULL) { - /* Add removed item to category */ - dlist_append(removed, item, AT_TAIL); - list = dlist_remove_equivalent_item( - list, item->obj, compare, &removed); - } - - item = list; - } - - /* - * Next, create a new list, minimizing the number of adjacent - * elements from the same category - */ - list = NULL; - lastcat = -1; - do { - int i; - int curcat; - - /* - * Find the category with the most elements, other than - * the last category chosen - */ - max_elements = 0; - for (i = 0; i < ncategories; i++) { - int nelements; - - if (i == lastcat) { - continue; - } - - nelements = dlist_length(categories[i]); - if (nelements > max_elements) { - max_elements = nelements; - curcat = i; - } - } - - /* If no elements were found, use the last category chosen */ - if (max_elements == 0 && lastcat >= 0) { - max_elements = dlist_length(categories[lastcat]); - curcat = lastcat; - } - - /* Was a category with elements found? */ - if (max_elements != 0) { - /* Remove first element of chosen category */ - item = categories[curcat]; - categories[curcat] = dlist_remove(item); - - /* Add removed element to resulting list */ - list = dlist_append(item, list, AT_TAIL); - - lastcat = curcat; - } - } while (max_elements != 0); - - free(categories); - - return (list); -} diff --git a/usr/src/cmd/lvm/metassist/common/volume_dlist.h b/usr/src/cmd/lvm/metassist/common/volume_dlist.h deleted file mode 100644 index af0c5786cc70..000000000000 --- a/usr/src/cmd/lvm/metassist/common/volume_dlist.h +++ /dev/null @@ -1,278 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _VOLUME_DLIST_H -#define _VOLUME_DLIST_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#ifdef __cplusplus -extern "C" { -#endif - -#include - -/* - * Structure defining a doubly linked list of arbitrary objects - */ -typedef struct dlist { - - struct dlist *next; - struct dlist *prev; - void *obj; - -} dlist_t; - -/* - * module globals - */ -extern const boolean_t ASCENDING; -extern const boolean_t DESCENDING; -extern const boolean_t AT_TAIL; -extern const boolean_t AT_HEAD; - -/* from types.h */ -#ifndef TRUE -#define TRUE B_TRUE -#endif - -#ifndef FALSE -#define FALSE B_FALSE -#endif - -/* - * doubly linked list utility methods - */ - -/* - * count the number of elements currently in the list - */ -extern int dlist_length(dlist_t *list); - -/* - * Traverse the list pointed to by head and free each - * list node. If freefunc is non-NULL, call freefunc - * for each node's object. - */ -extern void dlist_free_items(dlist_t *list, void (freefunc(void *))); - -/* - * append item to list. If atend is true, the item is - * added at the end of the list, otherwise it is added - * at the beginning. - * - * returns the possibly changed head of the list. - */ -extern dlist_t *dlist_append( - dlist_t *item, - dlist_t *list, - boolean_t atend); - -/* - * Create a dlist_t element for the given object and append to list. - * - * @param object - * the obj member of the dlist_t element to be created - * - * @param list - * the list to which to append the new dlist_t element - * - * @param attail - * whether to append at the beginning (AT_HEAD) or end - * (AT_TAIL) of the list - * - * @return 0 - * if successful - * - * @return ENOMEM - * if a dlist_t could not be allocated - */ -extern int dlist_append_object( - void *object, - dlist_t **list, - boolean_t attail); - -/* - * Appends list2 to the end of list1. - * - * Returns the resulting list. - */ -extern dlist_t *dlist_append_list( - dlist_t *list1, - dlist_t *list2); - -/* - * Remove the first node pointing to same content as item from list, - * clear it's next and prev pointers, return new list head. - * - * The caller is responsible for freeing the removed item if it is no - * longer needed. - * - * The comparison function should be of the form: - * - * int compare(void *obj1, void* obj2); - * - * When called, obj1 will be the object passed into - * dlist_remove_equivalent_item and obj2 will be an object pointed to by an - * item in the list. - * - * The function should return 0 if the two objects are equivalent The - * function should return nonzero otherwise - * - * @param list - * the list containing the item to remove - * - * @param obj - * the object with which to compare each item - * - * @param compare - * the comparison function, passed obj and the obj member - * of each item, to return 0 if item should be removed - * - * @param removed - * RETURN: the removed item, or NULL if none was found - * - * @return the first element of the resulting list - */ -extern dlist_t *dlist_remove_equivalent_item( - dlist_t *list, - void *obj, - int (compare)(void *obj1, void *obj2), - dlist_t **removed); - -/* - * Remove an item from its list. Return the resulting list. - * - * @param item - * the item to remove, with prev and next pointers - * set to NULL - * - * @return the first element of the resulting list - */ -dlist_t * -dlist_remove( - dlist_t *item); - -/* - * allocates memory for a new list item. The list item will - * point at obj. - * - * returns the new list item. - */ -extern dlist_t *dlist_new_item(void *obj); - -/* - * inserts item in the correct position within the list based on - * the comparison function. if ascending is true, the list will - * be in ascending order, otherwise descending. - * - * the comparison function should be of the form: - * - * int compare(void *obj1, void *obj2); - * - * When called, obj1 will be the object pointed to by the item to - * be added to the list, obj2 will be an object pointed to by an - * item currently in the list. - * - * The function should return 0 if the two objects are equivalent - * The function should return <0 if obj1 comes before obj2 - * The function should return >0 if obj1 comes after obj2 - * - * dlist_insert_ordered returns the possibly changed head - * of the list. - */ -extern dlist_t *dlist_insert_ordered( - dlist_t *item, - dlist_t *list, - boolean_t ascending, - int (compare)(void *obj1, void *obj2)); - -/* - * Locates the item in the list which contains object. - * - * the comparison function should be of the form: - * - * int compare(void *obj1, void *obj2); - * - * When called, obj1 will be the input object, obj2 will be - * an object pointed to by an item currently in the list. - * - * The function should return 0 if the two objects are equivalent - * The function should return non-zero otherwise - * - * dlist_find() returns the found item or NULL if one was not found. - */ -extern dlist_t *dlist_find( - dlist_t *list, - void *obj, - int (compare)(void *obj1, void *obj2)); - -/* - * Determines if list has an item which contains object. - * - * the comparison function should be of the form: - * - * int compare(void *obj1, void *obj2); - * - * When called, obj1 will be the input object, obj2 will be - * an object pointed to by an item currently in the list. - * - * The function should return 0 if the two objects are equivalent - * The function should return non-zero otherwise - * - * dlist_contains() returns TRUE if the object is already - * in the list or FALSE otherwise. - */ -extern boolean_t dlist_contains( - dlist_t *list, - void *obj, - int (compare)(void *obj1, void *obj2)); - -/* - * Order the given list such that the number of similar elements - * adjacent to each other are minimized. Two elements are considered - * similar if the given compare function returns 0. - * - * @param list - * the list to order - * - * @param compare - * the comparison function, passed the obj members - * of two items, to return 0 if the items can be - * considered similar - * - * @return the first element of the resulting list - */ -extern dlist_t * -dlist_separate_similar_elements( - dlist_t *list, - int(*equals)(void *, void *)); - -#ifdef __cplusplus -} -#endif - -#endif /* _VOLUME_DLIST_H */ diff --git a/usr/src/cmd/lvm/metassist/common/volume_error.c b/usr/src/cmd/lvm/metassist/common/volume_error.c deleted file mode 100644 index 67c2927acee5..000000000000 --- a/usr/src/cmd/lvm/metassist/common/volume_error.c +++ /dev/null @@ -1,97 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include -#include -#include -#include -#include - -#include "volume_error.h" - -#define VOLUME_ERROR_BUFSIZE 1024 -static char volume_error[VOLUME_ERROR_BUFSIZE]; - -/* - * Retrieve the error string for the given error code. - * - * @param error - * If error is less than zero, it is assumed to be a - * custom error code. If error is greater than zero, it - * is assumed to be an error defined in errno.h. - * - * @return the error string set by volume_set_error() - * if error < 0 - * - * @return the error string returned by strerror() - * if error > 0 - */ -char * -get_error_string( - int error) -{ - if (error < 0) { - return (volume_error); - } - - if (error > 0) { - return (strerror(error)); - } - - return (NULL); -} - -/* - * Set the error string for the most recent error. This message can - * be retrieved with get_error_string(error), assuming error is less - * than zero. - * - * @param fmt - * printf format string - * - * @return the number of characters formatted - * if successful - * - * @return negative value - * if an error occurred - */ -/*PRINTFLIKE1*/ -int -volume_set_error( - char *fmt, - ...) -{ - int ret = 0; - - va_list ap; - va_start(ap, fmt); - ret = vsnprintf(volume_error, VOLUME_ERROR_BUFSIZE, fmt, ap); - va_end(ap); - - return (ret); -} diff --git a/usr/src/cmd/lvm/metassist/common/volume_error.h b/usr/src/cmd/lvm/metassist/common/volume_error.h deleted file mode 100644 index 30fa08acc6d3..000000000000 --- a/usr/src/cmd/lvm/metassist/common/volume_error.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _VOLUME_ERROR_H -#define _VOLUME_ERROR_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * Retrieve the error string for the given error code. - * - * @param error - * If error is less than zero, it is assumed to be a - * custom error code. If error is greater than zero, it - * is assumed to be an error defined in errno.h. - * - * @return the error string set by volume_set_error() - * if error < 0 - * - * @return the error string returned by strerror() - * if error > 0 - */ -extern char *get_error_string(int error); - -/* - * Set the error string for the most recent error. This message can - * be retrieved with get_error_string(error), assuming error is less - * than zero. - * - * @param fmt - * printf format string - * - * @return the number of characters formatted - * if successful - * - * @return negative value - * if an error occurred - */ -extern int volume_set_error(char *fmt, ...); - -#ifdef __cplusplus -} -#endif - -#endif /* _VOLUME_ERROR_H */ diff --git a/usr/src/cmd/lvm/metassist/common/volume_nvpair.c b/usr/src/cmd/lvm/metassist/common/volume_nvpair.c deleted file mode 100644 index 57592d8880ae..000000000000 --- a/usr/src/cmd/lvm/metassist/common/volume_nvpair.c +++ /dev/null @@ -1,726 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include -#include -#include "volume_nvpair.h" -#include "volume_error.h" - -/* - * ****************************************************************** - * - * Function prototypes - * - * ****************************************************************** - */ - -static nvpair_t *nvlist_walk_nvpair(nvlist_t *nvl, - const char *name, data_type_t type, nvpair_t *nvp); - -/* - * ****************************************************************** - * - * External functions - * - * ****************************************************************** - */ - -/* - * Get the named uint16 from the given nvlist_t. - * - * @param attrs - * the nvlist_t to search - * - * @param which - * the string key for this element in the list - * - * @param val - * RETURN: the value of the requested uint16 - * - * @return 0 - * if successful - * - * @return ENOENT - * if no matching name-value pair is found - */ -int -get_uint16( - nvlist_t *attrs, - char *which, - uint16_t *val) -{ - int error; - nvpair_t *match = - nvlist_walk_nvpair(attrs, which, DATA_TYPE_UINT16, NULL); - - if (match == NULL) { - error = ENOENT; - } else { - error = nvpair_value_uint16(match, val); - } - - return (error); -} - -/* - * Set the named uint16 in the given nvlist_t. - * - * @param attrs - * the nvlist_t to search - * - * @param which - * the string key for this element in the list - * - * @param val - * the value to set - * - * @return 0 - * if successful - * - * @return EINVAL - * if there is an invalid argument - * - * @return ENOMEM - * if there is insufficient memory - */ -int -set_uint16( - nvlist_t *attrs, - char *which, - uint16_t val) -{ - int error = 0; - - if ((error = nvlist_add_uint16(attrs, which, val)) != 0) { - volume_set_error( - gettext("nvlist_add_int16(%s) failed: %d\n"), which, error); - } - - return (error); -} - -/* - * Get the named uint32 from the given nvlist_t. - * - * @param attrs - * the nvlist_t to search - * - * @param which - * the string key for this element in the list - * - * @param val - * RETURN: the value of the requested uint32 - * - * @return 0 - * if successful - * - * @return ENOENT - * if no matching name-value pair is found - */ -int -get_uint32( - nvlist_t *attrs, - char *which, - uint32_t *val) -{ - int error; - nvpair_t *match = - nvlist_walk_nvpair(attrs, which, DATA_TYPE_UINT32, NULL); - - if (match == NULL) { - error = ENOENT; - } else { - error = nvpair_value_uint32(match, val); - } - - return (error); -} - -/* - * Set the named uint32 in the given nvlist_t. - * - * @param attrs - * the nvlist_t to search - * - * @param which - * the string key for this element in the list - * - * @param val - * the value to set - * - * @return 0 - * if successful - * - * @return EINVAL - * if there is an invalid argument - * - * @return ENOMEM - * if there is insufficient memory - */ -int -set_uint32( - nvlist_t *attrs, - char *which, - uint32_t val) -{ - int error = 0; - - if ((error = nvlist_add_uint32(attrs, which, val)) != 0) { - volume_set_error( - gettext("nvlist_add_int32(%s) failed: %d\n"), which, error); - } - - return (error); -} - -/* - * Get the named uint64 from the given nvlist_t. - * - * @param attrs - * the nvlist_t to search - * - * @param which - * the string key for this element in the list - * - * @param val - * RETURN: the value of the requested uint64 - * - * @return 0 - * if successful - * - * @return ENOENT - * if no matching name-value pair is found - */ -int -get_uint64( - nvlist_t *attrs, - char *which, - uint64_t *val) -{ - int error; - nvpair_t *match = - nvlist_walk_nvpair(attrs, which, DATA_TYPE_UINT64, NULL); - - if (match == NULL) { - error = ENOENT; - } else { - error = nvpair_value_uint64(match, val); - } - - return (error); -} - -/* - * Set the named uint64 in the given nvlist_t. - * - * @param attrs - * the nvlist_t to search - * - * @param which - * the string key for this element in the list - * - * @param val - * the value to set - * - * @return 0 - * if successful - * - * @return EINVAL - * if there is an invalid argument - * - * @return ENOMEM - * if there is insufficient memory - */ -int -set_uint64( - nvlist_t *attrs, - char *which, - uint64_t val) -{ - int error = 0; - - if ((error = nvlist_add_uint64(attrs, which, val)) != 0) { - volume_set_error( - gettext("nvlist_add_int64(%s) failed: %d\n"), which, error); - } - - return (error); -} - -/* - * Set the named boolean in the given nvlist_t. - * - * @param attrs - * the nvlist_t to search - * - * @param which - * the string key for this element in the list - * - * @param val - * the value to set - * - * @return 0 - * if successful - * - * @return EINVAL - * if there is an invalid argument - * - * @return ENOMEM - * if there is insufficient memory - */ -int -set_boolean( - nvlist_t *attrs, - char *which, - boolean_t val) -{ - /* - * Use set_uint16 to distinguish "attr = B_FALSE" from - * "attribute unset". - */ - return (set_uint16(attrs, which, val == B_TRUE ? 1 : 0)); -} - -/* - * Get the named boolean from the given nvlist_t. - * - * @param attrs - * the nvlist_t to search - * - * @param which - * the string key for this element in the list - * - * @param boolval - * RETURN: the value of the requested boolean - * - * @return 0 - * if successful - * - * @return ENOENT - * if no matching name-value pair is found - */ -int -get_boolean( - nvlist_t *attrs, - char *which, - boolean_t *boolval) -{ - int error; - uint16_t val; - - /* - * Use get_uint16 to distinguish "attr = B_FALSE" from - * "attribute unset". - */ - if ((error = get_uint16(attrs, which, &val)) == 0) { - *boolval = (val ? B_TRUE : B_FALSE); - } - - return (error); -} - -/* - * Get the named string from the given nvlist_t. - * - * @param attrs - * the nvlist_t to search - * - * @param which - * the string key for this element in the list - * - * @param str - * RETURN: the requested string - * - * @return 0 - * if successful - * - * @return ENOENT - * if no matching name-value pair is found - */ -int -get_string( - nvlist_t *attrs, - char *which, - char **str) -{ - int error; - nvpair_t *match = - nvlist_walk_nvpair(attrs, which, DATA_TYPE_STRING, NULL); - - if (match == NULL) { - error = ENOENT; - } else { - error = nvpair_value_string(match, str); - } - - return (error); -} - -/* - * Set the named string in the given nvlist_t. - * - * @param attrs - * the nvlist_t to search - * - * @param which - * the string key for this element in the list - * - * @param val - * the value to set - * - * @return 0 - * if successful - * - * @return EINVAL - * if there is an invalid argument - * - * @return ENOMEM - * if there is insufficient memory - */ -int -set_string( - nvlist_t *attrs, - char *which, - char *val) -{ - int error = 0; - - if ((error = nvlist_add_string(attrs, which, val)) != 0) { - volume_set_error( - gettext("nvlist_add_string(%s) failed: %d\n"), which, error); - } - - return (error); -} - -/* - * Get the named uint16 array from the given nvlist_t. - * - * @param attrs - * the nvlist_t to search - * - * @param which - * the string key for this element in the list - * - * @param val - * RETURN: the value of the requested uint16 array - * - * @param nelem - * RETURN: the number of elements in the array - * - * @return 0 - * if successful - * - * @return ENOENT - * if no matching name-value pair is found - */ -int -get_uint16_array( - nvlist_t *attrs, - char *which, - uint16_t **val, - uint_t *nelem) -{ - int error; - nvpair_t *match = - nvlist_walk_nvpair(attrs, which, DATA_TYPE_UINT16_ARRAY, NULL); - - if (match == NULL) { - error = ENOENT; - } else { - error = nvpair_value_uint16_array(match, val, nelem); - } - - return (error); -} - -/* - * Set the named uint16 array from the given nvlist_t. - * - * @param attrs - * the nvlist_t to search - * - * @param which - * the string key for this element in the list - * - * @param val - * the value of the requested uint16 array - * - * @param nelem - * the number of elements in the array - * - * @return 0 - * if successful - * - * @return EINVAL - * if there is an invalid argument - * - * @return ENOMEM - * if there is insufficient memory - */ -int -set_uint16_array( - nvlist_t *attrs, - char *which, - uint16_t *val, - uint_t nelem) -{ - int error = 0; - - if ((error = nvlist_add_uint16_array( - attrs, which, val, nelem)) != 0) { - volume_set_error( - gettext("nvlist_add_uint16_array(%s) failed: %d.\n"), - which, error); - } - - return (error); -} - -/* - * Get the named uint64 array from the given nvlist_t. - * - * @param attrs - * the nvlist_t to search - * - * @param which - * the string key for this element in the list - * - * @param val - * RETURN: the value of the requested uint64 array - * - * @param nelem - * RETURN: the number of elements in the array - * - * @return 0 - * if successful - * - * @return ENOENT - * if no matching name-value pair is found - */ -int -get_uint64_array( - nvlist_t *attrs, - char *which, - uint64_t **val, - uint_t *nelem) -{ - int error; - nvpair_t *match = - nvlist_walk_nvpair(attrs, which, DATA_TYPE_UINT64_ARRAY, NULL); - - if (match == NULL) { - error = ENOENT; - } else { - error = nvpair_value_uint64_array(match, val, nelem); - } - - return (error); -} - -/* - * Set the named uint64 array from the given nvlist_t. - * - * @param attrs - * the nvlist_t to search - * - * @param which - * the string key for this element in the list - * - * @param val - * the value of the requested uint64 array - * - * @param nelem - * the number of elements in the array - * - * @return 0 - * if successful - * - * @return EINVAL - * if there is an invalid argument - * - * @return ENOMEM - * if there is insufficient memory - */ -int -set_uint64_array( - nvlist_t *attrs, - char *which, - uint64_t *val, - uint_t nelem) -{ - int error = 0; - - if ((error = nvlist_add_uint64_array( - attrs, which, val, nelem)) != 0) { - volume_set_error( - gettext("nvlist_add_uint64_array(%s) failed: %d.\n"), - which, error); - } - - return (error); -} - -/* - * Get the named string array from the given nvlist_t. - * - * @param attrs - * the nvlist_t to search - * - * @param which - * the string key for this element in the list - * - * @param val - * RETURN: the value of the requested string array - * - * @param nelem - * RETURN: the number of elements in the array - * - * @return 0 - * if successful - * - * @return ENOENT - * if no matching name-value pair is found - */ -int -get_string_array( - nvlist_t *attrs, - char *which, - char ***val, - uint_t *nelem) -{ - int error; - nvpair_t *match = - nvlist_walk_nvpair(attrs, which, DATA_TYPE_STRING_ARRAY, NULL); - - if (match == NULL) { - error = ENOENT; - } else { - error = nvpair_value_string_array(match, val, nelem); - } - - return (error); -} - -/* - * Set the named string array from the given nvlist_t. - * - * @param attrs - * the nvlist_t to search - * - * @param which - * the string key for this element in the list - * - * @param val - * the value of the requested string array - * - * @param nelem - * the number of elements in the array - * - * @return 0 - * if successful - * - * @return EINVAL - * if there is an invalid argument - * - * @return ENOMEM - * if there is insufficient memory - */ -int -set_string_array( - nvlist_t *attrs, - char *which, - char **val, - uint_t nelem) -{ - int error = 0; - - if ((error = nvlist_add_string_array( - attrs, which, val, nelem)) != 0) { - volume_set_error( - gettext("nvlist_add_string_array(%s) failed: %d.\n"), - which, error); - } - - return (error); -} - -/* - * ****************************************************************** - * - * Static functions - * - * ****************************************************************** - */ - -/* - * Get a handle to the next nvpair with the specified name and data - * type in the list following the given nvpair. - * - * Some variation of this function will likely appear in the libnvpair - * library per 4981923. - * - * @param nvl - * the nvlist_t to search - * - * @param name - * the string key for the pair to find in the list, or - * NULL to match any name - * - * @param type - * the data type for the pair to find in the list, or - * DATA_TYPE_UNKNOWN to match any type - * - * @param nvp - * the pair to search from in the list, or NULL to search - * from the beginning of the list - * - * @return the next nvpair in the list matching the given - * criteria, or NULL if no matching nvpair is found - */ -static nvpair_t * -nvlist_walk_nvpair( - nvlist_t *nvl, - const char *name, - data_type_t type, - nvpair_t *nvp) -{ - /* For each nvpair in the list following nvp... */ - while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { - - /* Does this pair's name match the given name? */ - if ((name == NULL || strcmp(nvpair_name(nvp), name) == 0) && - - /* Does this pair's type match the given type? */ - (type == DATA_TYPE_UNKNOWN || type == nvpair_type(nvp))) { - return (nvp); - } - } - - return (NULL); -} diff --git a/usr/src/cmd/lvm/metassist/common/volume_nvpair.h b/usr/src/cmd/lvm/metassist/common/volume_nvpair.h deleted file mode 100644 index de9aa134b0ff..000000000000 --- a/usr/src/cmd/lvm/metassist/common/volume_nvpair.h +++ /dev/null @@ -1,467 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _VOLUME_NVPAIR_H -#define _VOLUME_NVPAIR_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#ifdef __cplusplus -extern "C" { -#endif - -#include - -/* - * Get the named uint16 from the given nvlist_t. - * - * @param attrs - * the nvlist_t to search - * - * @param which - * the string key for this element in the list - * - * @param val - * RETURN: the value of the requested uint16 - * - * @return 0 - * if successful - * - * @return ENOENT - * if no matching name-value pair is found - * - * @return ENOTSUP - * if an encode/decode method is not supported - * - * @return EINVAL - * if there is an invalid argument - */ -extern int get_uint16(nvlist_t *attrs, char *which, uint16_t *val); - -/* - * Set the named uint16 in the given nvlist_t. - * - * @param attrs - * the nvlist_t to search - * - * @param which - * the string key for this element in the list - * - * @param val - * the value to set - * - * @return 0 - * if successful - * - * @return EINVAL - * if there is an invalid argument - * - * @return ENOMEM - * if there is insufficient memory - */ -extern int set_uint16(nvlist_t *attrs, char *which, uint16_t val); - -/* - * Get the named uint32 from the given nvlist_t. - * - * @param attrs - * the nvlist_t to search - * - * @param which - * the string key for this element in the list - * - * @param val - * RETURN: the value of the requested uint32 - * - * @return 0 - * if successful - * - * @return ENOENT - * if no matching name-value pair is found - * - * @return ENOTSUP - * if an encode/decode method is not supported - * - * @return EINVAL - * if there is an invalid argument - */ -extern int get_uint32(nvlist_t *attrs, char *which, uint32_t *val); - -/* - * Set the named uint32 in the given nvlist_t. - * - * @param attrs - * the nvlist_t to search - * - * @param which - * the string key for this element in the list - * - * @param val - * the value to set - * - * @return 0 - * if successful - * - * @return EINVAL - * if there is an invalid argument - * - * @return ENOMEM - * if there is insufficient memory - */ -extern int set_uint32(nvlist_t *attrs, char *which, uint32_t val); - -/* - * Get the named uint64 from the given nvlist_t. - * - * @param attrs - * the nvlist_t to search - * - * @param which - * the string key for this element in the list - * - * @param val - * RETURN: the value of the requested uint64 - * - * @return 0 - * if successful - * - * @return ENOENT - * if no matching name-value pair is found - * - * @return ENOTSUP - * if an encode/decode method is not supported - * - * @return EINVAL - * if there is an invalid argument - */ -extern int get_uint64(nvlist_t *attrs, char *which, uint64_t *val); - -/* - * Set the named uint64 in the given nvlist_t. - * - * @param attrs - * the nvlist_t to search - * - * @param which - * the string key for this element in the list - * - * @param val - * the value to set - * - * @return 0 - * if successful - * - * @return EINVAL - * if there is an invalid argument - * - * @return ENOMEM - * if there is insufficient memory - */ -extern int set_uint64(nvlist_t *attrs, char *which, uint64_t val); - -/* - * Set the named boolean in the given nvlist_t. - * - * @param attrs - * the nvlist_t to search - * - * @param which - * the string key for this element in the list - * - * @param val - * the value to set - * - * @return 0 - * if successful - * - * @return EINVAL - * if there is an invalid argument - * - * @return ENOMEM - * if there is insufficient memory - */ -extern int set_boolean(nvlist_t *attrs, char *which, boolean_t val); - -/* - * Get the named boolean from the given nvlist_t. - * - * @param attrs - * the nvlist_t to search - * - * @param which - * the string key for this element in the list - * - * @param boolval - * RETURN: the value of the requested boolean - * - * @return 0 - * if successful - * - * @return ENOENT - * if no matching name-value pair is found - * - * @return ENOTSUP - * if an encode/decode method is not supported - * - * @return EINVAL - * if there is an invalid argument - */ -extern int get_boolean(nvlist_t *attrs, char *which, boolean_t *boolval); - -/* - * Get the named string from the given nvlist_t. - * - * @param attrs - * the nvlist_t to search - * - * @param which - * the string key for this element in the list - * - * @param str - * RETURN: the requested string - * - * @return 0 - * if successful - * - * @return ENOENT - * if no matching name-value pair is found - * - * @return ENOTSUP - * if an encode/decode method is not supported - * - * @return EINVAL - * if there is an invalid argument - */ -extern int get_string(nvlist_t *attrs, char *which, char **str); - -/* - * Set the named string in the given nvlist_t. - * - * @param attrs - * the nvlist_t to search - * - * @param which - * the string key for this element in the list - * - * @param val - * the value to set - * - * @return 0 - * if successful - * - * @return EINVAL - * if there is an invalid argument - * - * @return ENOMEM - * if there is insufficient memory - */ -extern int set_string(nvlist_t *attrs, char *which, char *val); - -/* - * Get the named uint16 array from the given nvlist_t. - * - * @param attrs - * the nvlist_t to search - * - * @param which - * the string key for this element in the list - * - * @param val - * RETURN: the value of the requested uint16 array - * - * @param nelem - * RETURN: the number of elements in the array - * - * @return 0 - * if successful - * - * @return ENOENT - * if no matching name-value pair is found - * - * @return ENOTSUP - * if an encode/decode method is not supported - * - * @return EINVAL - * if there is an invalid argument - */ -extern int get_uint16_array( - nvlist_t *attrs, char *which, uint16_t **val, uint_t *nelem); - -/* - * Set the named uint16 array from the given nvlist_t. - * - * @param attrs - * the nvlist_t to search - * - * @param which - * the string key for this element in the list - * - * @param val - * the value of the requested uint16 array - * - * @param nelem - * the number of elements in the array - * - * @return 0 - * if successful - * - * @return ENOENT - * if no matching name-value pair is found - * - * @return ENOTSUP - * if an encode/decode method is not supported - * - * @return EINVAL - * if there is an invalid argument - */ -extern int set_uint16_array( - nvlist_t *attrs, char *which, uint16_t *val, uint_t nelem); - -/* - * Get the named uint64 array from the given nvlist_t. - * - * @param attrs - * the nvlist_t to search - * - * @param which - * the string key for this element in the list - * - * @param val - * RETURN: the value of the requested uint64 array - * - * @param nelem - * RETURN: the number of elements in the array - * - * @return 0 - * if successful - * - * @return ENOENT - * if no matching name-value pair is found - * - * @return ENOTSUP - * if an encode/decode method is not supported - * - * @return EINVAL - * if there is an invalid argument - */ -extern int get_uint64_array( - nvlist_t *attrs, char *which, uint64_t **val, uint_t *nelem); - -/* - * Set the named uint64 array from the given nvlist_t. - * - * @param attrs - * the nvlist_t to search - * - * @param which - * the string key for this element in the list - * - * @param val - * the value of the requested uint64 array - * - * @param nelem - * the number of elements in the array - * - * @return 0 - * if successful - * - * @return ENOENT - * if no matching name-value pair is found - * - * @return ENOTSUP - * if an encode/decode method is not supported - * - * @return EINVAL - * if there is an invalid argument - */ -extern int set_uint64_array( - nvlist_t *attrs, char *which, uint64_t *val, uint_t nelem); - -/* - * Get the named string array from the given nvlist_t. - * - * @param attrs - * the nvlist_t to search - * - * @param which - * the string key for this element in the list - * - * @param val - * RETURN: the value of the requested string array - * - * @param nelem - * RETURN: the number of elements in the array - * - * @return 0 - * if successful - * - * @return ENOENT - * if no matching name-value pair is found - * - * @return ENOTSUP - * if an encode/decode method is not supported - * - * @return EINVAL - * if there is an invalid argument - */ -extern int get_string_array( - nvlist_t *attrs, char *which, char ***val, uint_t *nelem); - -/* - * Set the named string array from the given nvlist_t. - * - * @param attrs - * the nvlist_t to search - * - * @param which - * the string key for this element in the list - * - * @param val - * the value of the requested string array - * - * @param nelem - * the number of elements in the array - * - * @return 0 - * if successful - * - * @return ENOENT - * if no matching name-value pair is found - * - * @return ENOTSUP - * if an encode/decode method is not supported - * - * @return EINVAL - * if there is an invalid argument - */ -extern int set_string_array( - nvlist_t *attrs, char *which, char **val, uint_t nelem); - -#ifdef __cplusplus -} -#endif - -#endif /* _VOLUME_NVPAIR_H */ diff --git a/usr/src/cmd/lvm/metassist/common/volume_output.c b/usr/src/cmd/lvm/metassist/common/volume_output.c deleted file mode 100644 index b6b177d0c595..000000000000 --- a/usr/src/cmd/lvm/metassist/common/volume_output.c +++ /dev/null @@ -1,177 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include -#include -#include -#include -#include -#include - -#include "volume_output.h" -#include "volume_error.h" - -static int max_verbosity = OUTPUT_QUIET; -static FILE *output = NULL; - -/* - * Set the maximum level of verbosity to be reported to the user. - * Strings sent to oprintf() with a higher verbosity level than this - * maximum level will not be reported to the user. - * - * @param verbosity - * One of the predefined constants: - * OUTPUT_QUIET - * OUTPUT_TERSE - * OUTPUT_VERBOSE - * OUTPUT_DEBUG - * - * @param stream - * The stream to print all qualifying output to. - * - * @return 0 on success, non-zero otherwise. - */ -int -set_max_verbosity( - int verbosity, - FILE *stream) -{ - int error = 0; - - switch (verbosity) { - case OUTPUT_QUIET: - case OUTPUT_TERSE: - case OUTPUT_VERBOSE: - case OUTPUT_DEBUG: - max_verbosity = verbosity; - output = stream; - break; - - default: - volume_set_error( - gettext("%d: invalid verbosity level"), verbosity); - error = -1; - } - - return (error); -} - -/* - * Get the maximum level of verbosity to be reported to the user. - * - * @return OUTPUT_QUIET - * - * @return OUTPUT_TERSE - * - * @return OUTPUT_VERBOSE - * - * @return OUTPUT_DEBUG - */ -int -get_max_verbosity() -{ - return (max_verbosity); -} - -/* - * Prints the given formatted string arguments to a predefined stream, - * if the given verbosity is less than or equal to the set maximum - * verbosity. - * - * @param verbosity - * Same as for set_max_verbosity() - * - * @param fmt, ... - * printf-style arguments - * - * @return the number of characters output - * if successful - * - * @return negative value - * if unsuccessful - */ -int -oprintf( - int verbosity, - char *fmt, - ...) -{ - int ret; - va_list ap; - - va_start(ap, fmt); - ret = oprintf_va(verbosity, fmt, ap); - va_end(ap); - - return (ret); -} - -/* - * Identical to oprintf but with a va_list instead of variable length - * argument list. This function is provided for external printf-style - * wrappers. - * - * @param verbosity - * Same as for set_max_verbosity() - * - * @param fmt - * printf format string - * - * @param ap - * a va_list containing remaining printf-style arguments - * - * @return the number of characters output - * if successful - * - * @return negative value - * if unsuccessful - */ -/*PRINTFLIKE2*/ -int -oprintf_va( - int verbosity, - char *fmt, - va_list ap) -{ - int ret = 0; - - /* Is this verbosity high enough to print? */ - if (output != NULL && verbosity <= max_verbosity) { -#ifdef DEBUG - if (getenv(METASSIST_DEBUG_ENV) != NULL) { - time_t now = time(NULL); - struct tm *time = localtime(&now); - fprintf(output, "%.2d:%.2d:%.2d: ", - time->tm_hour, time->tm_min, time->tm_sec); - } -#endif - ret = vfprintf(output, fmt, ap); - } - - return (ret); -} diff --git a/usr/src/cmd/lvm/metassist/common/volume_output.h b/usr/src/cmd/lvm/metassist/common/volume_output.h deleted file mode 100644 index fbff0ed01cd2..000000000000 --- a/usr/src/cmd/lvm/metassist/common/volume_output.h +++ /dev/null @@ -1,130 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _VOLUME_OUTPUT_H -#define _VOLUME_OUTPUT_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include - -#ifdef DEBUG -/* - * The environment variable that must be set for metassist to - * enable debug output - */ -#define METASSIST_DEBUG_ENV "METASSIST_DEBUG" -#endif - -/* Verbosity levels */ -#define OUTPUT_QUIET 0 -#define OUTPUT_TERSE 1 -#define OUTPUT_VERBOSE 2 -#define OUTPUT_DEBUG 3 - -/* - * Set the maximum level of verbosity to be reported to the user. - * Strings sent to oprintf() with a higher verbosity level than this - * maximum level will not be reported to the user. - * - * @param verbosity - * One of the predefined constants: - * OUTPUT_QUIET - * OUTPUT_TERSE - * OUTPUT_VERBOSE - * OUTPUT_DEBUG - * - * @param stream - * The stream to print all qualifying output to. - * - * @return 0 on success, non-zero otherwise. - */ -extern int set_max_verbosity(int verbosity, FILE *stream); - -/* - * Get the maximum level of verbosity to be reported to the user. - * - * @return OUTPUT_QUIET - * - * @return OUTPUT_TERSE - * - * @return OUTPUT_VERBOSE - * - * @return OUTPUT_DEBUG - */ -extern int get_max_verbosity(); - -/* - * Prints the given formatted string arguments to a predefined stream, - * if the given verbosity is less than or equal to the set maximum - * verbosity. - * - * @param verbosity - * Same as for set_max_verbosity() - * - * @param fmt, ... - * printf-style arguments - * - * @return the number of characters output - * if successful - * - * @return negative value - * if unsuccessful - */ -extern int oprintf(int verbosity, char *fmt, ...); - -/* - * Identical to oprintf but with a va_list instead of variable length - * argument list. This function is provided for external printf-style - * wrappers. - * - * @param verbosity - * Same as for set_max_verbosity() - * - * @param fmt - * printf format string - * - * @param ap - * a va_list containing remaining printf-style arguments - * - * @return the number of characters output - * if successful - * - * @return negative value - * if unsuccessful - */ -extern int oprintf_va(int verbosity, char *fmt, va_list ap); - -#ifdef __cplusplus -} -#endif - -#endif /* _VOLUME_OUTPUT_H */ diff --git a/usr/src/cmd/lvm/metassist/common/volume_request.c b/usr/src/cmd/lvm/metassist/common/volume_request.c deleted file mode 100644 index 665616089c60..000000000000 --- a/usr/src/cmd/lvm/metassist/common/volume_request.c +++ /dev/null @@ -1,178 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include -#include "volume_request.h" -#include "volume_error.h" - -/* - * Methods which manipulate a request_t struct - */ - -/* - * Constructor: Create a request_t struct. This request_t must be - * freed. - * - * @param request - * RETURN: a pointer to a new request_t - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -new_request( - request_t **request) -{ - int error; - devconfig_t *diskset_req; - devconfig_t *diskset_config; - - *request = (request_t *)calloc(1, sizeof (request_t)); - if (*request == NULL) { - (void) volume_set_error(gettext("new_request calloc() failed\n")); - return (-1); - } - - /* Create a new diskset_req */ - if ((error = new_devconfig(&diskset_req, TYPE_DISKSET)) != 0) { - free_request(*request); - return (error); - } - request_set_diskset_req(*request, diskset_req); - - /* Create a new diskset_config */ - if ((error = new_devconfig(&diskset_config, TYPE_DISKSET)) != 0) { - free_request(*request); - return (error); - } - request_set_diskset_config(*request, diskset_config); - - return (0); -} - -/* - * Free memory (recursively) allocated to a request_t struct - * - * @param arg - * pointer to the request_t struct to free - */ -void -free_request( - void *arg) -{ - request_t *request = (request_t *)arg; - - if (request == NULL) { - return; - } - - /* Free the diskset_req */ - if (request->diskset_req != NULL) { - free_devconfig(request->diskset_req); - } - - /* Free the diskset_config */ - if (request->diskset_config != NULL) { - free_devconfig(request->diskset_config); - } - - /* Free the devconfig itself */ - free(request); -} - -/* - * Set the disk set at the top of the request hierarchy - * - * @param request - * The request_t representing the request to modify - * - * @param diskset - * The devconfig_t representing the toplevel (disk set) - * device in the volume request hierarchy - */ -void -request_set_diskset_req( - request_t *request, - devconfig_t *diskset) -{ - request->diskset_req = diskset; -} - -/* - * Get the disk set at the top of the request hierarchy - * - * @param request - * The request_t representing the request to examine - * - * @return The devconfig_t representing the toplevel (disk set) - * device in the volume request hierarchy - */ -devconfig_t * -request_get_diskset_req( - request_t *request) -{ - return (request->diskset_req); -} - -/* - * Set/get the disk set at the top of the proposed volume hierarchy - * - * @param request - * The request_t representing the request to modify - * - * @param diskset - * The devconfig_t representing the toplevel (disk set) - * device in the proposed volume hierarchy - */ -void -request_set_diskset_config( - request_t *request, - devconfig_t *diskset) -{ - request->diskset_config = diskset; -} - -/* - * Get the disk set at the top of the request hierarchy - * - * @param request - * The request_t representing the request to examine - * - * @return The devconfig_t representing the toplevel (disk set) - * device in the proposed volume hierarchy - */ -devconfig_t * -request_get_diskset_config( - request_t *request) -{ - return (request->diskset_config); -} diff --git a/usr/src/cmd/lvm/metassist/common/volume_request.h b/usr/src/cmd/lvm/metassist/common/volume_request.h deleted file mode 100644 index 5ba1d06d817e..000000000000 --- a/usr/src/cmd/lvm/metassist/common/volume_request.h +++ /dev/null @@ -1,134 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _VOLUME_REQUEST_H -#define _VOLUME_REQUEST_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#ifdef __cplusplus -extern "C" { -#endif - -#include "volume_devconfig.h" - -/* - * request_t - struct to hold a layout request - */ -typedef struct request { - /* - * The devconfig_t representing the disk set at the top of the - * request hierarchy. This hierarchy represents the requested - * volume configuration, as read from the volume-request. - */ - devconfig_t *diskset_req; - - /* - * The devconfig_t representing the disk set at the top of the - * resulting proposed volume hierarchy. This hierarchy - * represents the volume configuration proposed by the layout - * engine. This configuration will eventually be converted to - * a volume-spec. - */ - devconfig_t *diskset_config; -} request_t; - -/* - * Constructor: Create a request_t struct. This request_t must be - * freed. - * - * @param request - * RETURN: a pointer to a new request_t - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int new_request(request_t **request); - -/* - * Free memory (recursively) allocated to a request_t struct - * - * @param arg - * pointer to the request_t struct to free - */ -extern void free_request(void *arg); - -/* - * Set the disk set at the top of the request hierarchy - * - * @param request - * The request_t representing the request to modify - * - * @param diskset - * The devconfig_t representing the toplevel (disk set) - * device in the volume request hierarchy - */ -extern void request_set_diskset_req(request_t *request, devconfig_t *diskset); - -/* - * Get the disk set at the top of the request hierarchy - * - * @param request - * The request_t representing the request to examine - * - * @return The devconfig_t representing the toplevel (disk set) - * device in the volume request hierarchy - */ -extern devconfig_t *request_get_diskset_req(request_t *request); - -/* - * Set/get the disk set at the top of the proposed volume hierarchy - * - * @param request - * The request_t representing the request to modify - * - * @param diskset - * The devconfig_t representing the toplevel (disk set) - * device in the proposed volume hierarchy - */ -extern void request_set_diskset_config( - request_t *request, devconfig_t *diskset); - -/* - * Get the disk set at the top of the request hierarchy - * - * @param request - * The request_t representing the request to examine - * - * @return The devconfig_t representing the toplevel (disk set) - * device in the proposed volume hierarchy - */ -extern devconfig_t *request_get_diskset_config(request_t *request); - -#ifdef __cplusplus -} -#endif - -#endif /* _VOLUME_REQUEST_H */ diff --git a/usr/src/cmd/lvm/metassist/common/volume_string.c b/usr/src/cmd/lvm/metassist/common/volume_string.c deleted file mode 100644 index 0391fa7fc2dc..000000000000 --- a/usr/src/cmd/lvm/metassist/common/volume_string.c +++ /dev/null @@ -1,462 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include "volume_string.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include "volume_error.h" -#include "volume_output.h" - -/* - * ****************************************************************** - * - * Function prototypes - * - * ****************************************************************** - */ - -static void *append_to_pointer_array(void **array, void *pointer); - -/* - * ****************************************************************** - * - * Data - * - * ****************************************************************** - */ - -/* All-inclusive valid size units */ -units_t universal_units[] = { - {"BLOCKS", BYTES_PER_BLOCK}, - {"KB", BYTES_PER_KILOBYTE}, - {"MB", BYTES_PER_MEGABYTE}, - {"GB", BYTES_PER_GIGABYTE}, - {"TB", BYTES_PER_TERABYTE}, - {NULL, 0} -}; - -/* - * ****************************************************************** - * - * External functions - * - * ****************************************************************** - */ - -/* - * Concatenates a list of strings. The result must be free()d. - * - * @param numargs - * The number of strings to concatenate. - * - * @param ... - * The strings (type char *) to concatenate. - * - * @return the concatenated string - * if succesful - * - * @return NULL - * if memory could not be allocated - */ -char * -stralloccat( - int numargs, - ...) -{ - va_list vl; - int i; - int len = 1; - char *cat; - - /* Determine length of concatenated string */ - va_start(vl, numargs); - for (i = 0; i < numargs; i++) { - char *str = va_arg(vl, char *); - if (str != NULL) { - len += strlen(str); - } - } - va_end(vl); - - /* Allocate memory for concatenation plus a trailing NULL */ - cat = (char *)calloc(1, len * sizeof (char)); - - if (cat == NULL) { - return (NULL); - } - - /* Concatenate strings */ - va_start(vl, numargs); - for (i = 0; i < numargs; i++) { - char *str = va_arg(vl, char *); - if (str != NULL) { - strcat(cat, str); - } - } - va_end(vl); - - return (cat); -} - -/* - * Convert the given string to a uint16_t, verifying that the value - * does not exceed the lower or upper bounds of a uint16_t. - * - * @param str - * the string to convert - * - * @param num - * the addr of the uint16_t - * - * @return 0 - * if the given string was converted to a uint16_t - * - * @return -1 - * if the string could could not be converted to a number - * - * @return -2 - * if the converted number exceeds the lower or upper - * bounds of a uint16_t - */ -int -str_to_uint16( - char *str, - uint16_t *num) -{ - long long lnum; - int error = 0; - - /* Convert string to long long */ - if (sscanf(str, "%lld", &lnum) != 1) { - error = -1; - } else { - - /* - * Verify that the long long value does not exceed the - * lower or upper bounds of a uint16_t - */ - - /* Maximum value of uint16_t */ - uint16_t max = (uint16_t)~0ULL; - - if (lnum < 0 || lnum > max) { - error = -2; - } else { - *num = lnum; - } - } - - return (error); -} - -/* - * Converts the given long long into a string. This string must be - * freed. - * - * @param num - * the long long to convert - * - * @param str - * the addr of the string - * - * @return 0 - * if successful - * - * @return ENOMEM - * if the physical limits of the system are exceeded by - * size bytes of memory which cannot be allocated - * - * @return EAGAIN - * if there is not enough memory available to allocate - * size bytes of memory - */ -int -ll_to_str( - long long num, - char **str) -{ - int error = 0; - - /* Allocate memory for the string */ - if ((*str = calloc(1, LONG_LONG_STR_SIZE * sizeof (char))) == NULL) { - error = errno; - } else { - /* Convert the integer to a string */ - snprintf(*str, LONG_LONG_STR_SIZE, "%lld", num); - } - - return (error); -} - -/* - * Convert a size specification to bytes. - * - * @param str - * a size specification strings of the form - * , where valid are specified by - * the units argument and is the (floating-point) - * multiplier of the units - * - * @param bytes - * RETURN: the result of converting the given size string - * to bytes - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -sizestr_to_bytes( - char *str, - uint64_t *bytes, - units_t units[]) -{ - char *unit_str; - long double d; - int error = 0; - int i; - - /* Convert string to double */ - if ((d = strtod(str, &unit_str)) == 0) { - volume_set_error(gettext("invalid size string: %s"), str); - error = -1; - } else { - - /* Trim leading white space */ - while (isspace(*unit_str) != 0) { - ++unit_str; - } - - /* Convert to bytes based on */ - for (i = 0; units[i].unit_str != NULL; i++) { - if (strcasecmp(unit_str, units[i].unit_str) == 0) { - d *= units[i].bytes_per_unit; - break; - } - } - - /* Was a valid unit string found? */ - if (units[i].unit_str == NULL) { - volume_set_error( - gettext("missing or invalid units indicator in size: %s"), - str); - error = -1; - } - } - - if (error) { - *bytes = 0; - } else { - *bytes = (uint64_t)d; - oprintf(OUTPUT_DEBUG, - gettext("converted \"%s\" to %llu bytes\n"), str, *bytes); - } - - return (error); -} - -/* - * Convert bytes to a size specification string. - * - * @param bytes - * the number of bytes - * - * @param str - * RETURN: a size specification strings of the form - * , where valid are specified by - * the units argument and is the (floating-point) - * multiplier of the units. This string must be freed. - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -int -bytes_to_sizestr( - uint64_t bytes, - char **str, - units_t units[], - boolean_t round) -{ - int i, len, error = 0; - double value; - const char *format; - units_t use_units = units[0]; - - /* Determine the units to use */ - for (i = 0; units[i].unit_str != NULL; i++) { - if (bytes >= units[i].bytes_per_unit) { - use_units = units[i]; - } - } - - value = ((long double)bytes / use_units.bytes_per_unit); - - /* Length of string plus trailing NULL */ - len = LONG_LONG_STR_SIZE + strlen(use_units.unit_str) + 1; - - if (round) { - value = floor(value + 0.5F); - format = "%.0f%s"; - } else { - format = "%.2f%s"; - } - - /* Append units to string */ - *str = calloc(1, len * sizeof (char)); - if (*str == NULL) { - error = errno; - } else { - snprintf(*str, len, format, value, use_units.unit_str); - } - - return (error); -} - -/* - * Appends a copy of the given string to the given string array, - * ensuring that the last element in the array is NULL. This array - * must be freed via free_string_array. - * - * Note when an error occurs and NULL is returned, array is not freed. - * Subsequently callers should save a pointer to the original array - * until success is verified. - * - * @param array - * the array to append to, or NULL to create a new array - * - * @param str - * the string to copy and add to the array - * - * @return a pointer to the realloc'd (and possibly moved) array - * if succesful - * - * @return NULL - * if unsuccesful - */ -char ** -append_to_string_array( - char **array, - char *str) -{ - char *copy = strdup(str); - - if (copy == NULL) { - return (NULL); - } - - return ((char **)append_to_pointer_array((void **)array, copy)); -} - -/* - * Frees each element of the given string array, then frees the array - * itself. - * - * @param array - * a NULL-terminated string array - */ -void -free_string_array( - char **array) -{ - int i; - - /* Free each available element */ - for (i = 0; array[i] != NULL; i++) { - free(array[i]); - } - - /* Free the array itself */ - free((void *)array); -} - -/* - * ****************************************************************** - * - * Static functions - * - * ****************************************************************** - */ - -/* - * Appends the given pointer to the given pointer array, ensuring that - * the last element in the array is NULL. - * - * Note when an error occurs and NULL is returned, array is not freed. - * Subsequently callers should save a pointer to the original array - * until success is verified. - * - * @param array - * the array to append to, or NULL to create a new array - * - * @param pointer - * the pointer to add to the array - * - * @return a pointer to the realloc'd (and possibly moved) array - * if succesful - * - * @return NULL - * if unsuccesful - */ -static void * -append_to_pointer_array( - void **array, - void *pointer) -{ - void **newarray = NULL; - int i = 0; - - if (array != NULL) { - /* Count the elements currently in the array */ - for (i = 0; array[i] != NULL; ++i); - } - - /* realloc, adding a slot for the new pointer */ - newarray = (void **)realloc(array, (i + 2) * sizeof (*array)); - - if (newarray != NULL) { - /* Append pointer and terminal NULL */ - newarray[i] = pointer; - newarray[i+1] = NULL; - } - - return (newarray); -} diff --git a/usr/src/cmd/lvm/metassist/common/volume_string.h b/usr/src/cmd/lvm/metassist/common/volume_string.h deleted file mode 100644 index a66b38b23cf3..000000000000 --- a/usr/src/cmd/lvm/metassist/common/volume_string.h +++ /dev/null @@ -1,203 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _VOLUME_STRING_H -#define _VOLUME_STRING_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * The length of the string when the longest long long is converted to - * a string - */ -#define LONG_LONG_STR_SIZE 128 - -#define BYTES_PER_BLOCK 512 -#define BYTES_PER_KILOBYTE 1024 -#define BYTES_PER_MEGABYTE 1024 * 1024 -#define BYTES_PER_GIGABYTE 1024 * 1024 * 1024 -#define BYTES_PER_TERABYTE (uint64_t)1024 * 1024 * 1024 * 1024 - -/* - * Describes units when converting from bytes to string and back. - */ -typedef struct { - char *unit_str; - uint64_t bytes_per_unit; -} units_t; - -/* All-inclusive valid size units */ -extern units_t universal_units[]; - -/* - * Concatenates a list of strings. The result must be free()d. - * - * @param numargs - * The number of strings to concatenate. - * - * @param ... - * The strings (type char *) to concatenate. - * - * @return the concatenated string - * if succesful - * - * @return NULL - * if memory could not be allocated - */ -extern char *stralloccat(int numargs, ...); - -/* - * Convert the given string to a uint16_t, verifying that the value - * does not exceed the lower or upper bounds of a uint16_t. - * - * @param str - * the string to convert - * - * @param num - * the addr of the uint16_t - * - * @return 0 - * if the given string was converted to a uint16_t - * - * @return -1 - * if the string could could not be converted to a number - * - * @return -2 - * if the converted number exceeds the lower or upper - * bounds of a uint16_t - */ -extern int str_to_uint16(char *str, uint16_t *num); - -/* - * Converts the given long long into a string. This string must be - * freed. - * - * @param num - * the long long to convert - * - * @param str - * the addr of the string - * - * @return 0 - * if successful - * - * @return ENOMEM - * if the physical limits of the system are exceeded by - * size bytes of memory which cannot be allocated - * - * @return EAGAIN - * if there is not enough memory available to allocate - * size bytes of memory - */ -extern int ll_to_str(long long num, char **str); - -/* - * Convert a size specification to bytes. - * - * @param str - * a size specification strings of the form - * , where valid are specified by - * the units argument and is the (floating-point) - * multiplier of the units - * - * @param bytes - * RETURN: the result of converting the given size string - * to bytes - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int sizestr_to_bytes(char *str, uint64_t *bytes, units_t units[]); - -/* - * Convert bytes to a size specification string. - * - * @param bytes - * the number of bytes - * - * @param str - * RETURN: a size specification strings of the form - * , where valid are specified by - * the units argument and is the (floating-point) - * multiplier of the units. This string must be freed. - * - * @return 0 - * if successful - * - * @return non-zero - * if an error occurred. Use get_error_string() to - * retrieve the associated error message. - */ -extern int bytes_to_sizestr( - uint64_t bytes, char **str, units_t units[], boolean_t round); - -/* - * Appends a copy of the given string to the given string array, - * ensuring that the last element in the array is NULL. This array - * must be freed via free_string_array. - * - * Note when an error occurs and NULL is returned, array is not freed. - * Subsequently callers should save a pointer to the original array - * until success is verified. - * - * @param array - * the array to append to, or NULL to create a new array - * - * @param str - * the string to copy and add to the array - * - * @return a pointer to the realloc'd (and possibly moved) array - * if succesful - * - * @return NULL - * if unsuccesful - */ -extern char ** append_to_string_array(char **array, char *str); - -/* - * Frees each element of the given string array, then frees the array - * itself. - * - * @param array - * a NULL-terminated string array - */ -extern void free_string_array(char **array); - -#ifdef __cplusplus -} -#endif - -#endif /* _VOLUME_STRING_H */ diff --git a/usr/src/cmd/lvm/metassist/controller/Makefile b/usr/src/cmd/lvm/metassist/controller/Makefile deleted file mode 100644 index 137b867eaf4a..000000000000 --- a/usr/src/cmd/lvm/metassist/controller/Makefile +++ /dev/null @@ -1,52 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# - -METASSIST_TOPLEVEL = .. - -SRCS = getopt_ext.c metassist.c -OBJS = $(SRCS:%.c=%.o) -HDRS = $(SRCS:%.c=%.h) -MSGFILES = $(SRCS:%.c=%.i) - -include $(METASSIST_TOPLEVEL)/../../Makefile.cmd -include $(METASSIST_TOPLEVEL)/Makefile.env - -INCLUDES += -I.. -I../common -I../xml -I../layout \ - -I$(ADJUNCT_PROTO)/usr/include/libxml2 -CFLAGS += $(INCLUDES) - -POFILE = controllerp.po - -include $(METASSIST_TOPLEVEL)/Makefile.targ - -# Build .po file from message files -$(POFILE): $(MSGFILES) - $(BUILDPO.msgfiles) - -cstyle: - $(CSTYLE) $(CSTYLE_FLAGS) $(SRCS) $(HDRS) - -hdrchk: - $(HDRCHK) $(HDRCHK_FLAGS) $(HDRS) diff --git a/usr/src/cmd/lvm/metassist/controller/getopt_ext.c b/usr/src/cmd/lvm/metassist/controller/getopt_ext.c deleted file mode 100644 index 459bb9f8dbda..000000000000 --- a/usr/src/cmd/lvm/metassist/controller/getopt_ext.c +++ /dev/null @@ -1,151 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include -#include -#include "volume_error.h" -#include "getopt_ext.h" - -/* - * Functions - */ - -/* - * Identical to getopt(3), except that - * - * 1. If "-" is the first character of optstring, each non-option argv - * element is handled as if it were the argument of an option with - * character code GETOPT_NON_OPTION_ARG. The result is that - * GETOPT_DONE_PARSING will not be returned until the end of the - * argument list has been reached. - * - * This mirrors the functionality provided by GNU getopt. - * - * 2. GETOPT_ERR_INVALID_OPT or GETOPT_ERR_MISSING_ARG is returned - * instead of '?'. Subsequently "-?" can be used as a valid - * option. - * - * 3. GETOPT_DONE_PARSING, GETOPT_ERR_INVALID_ARG, or - * GETOPT_NON_OPTION_ARG is returned instead of -1. - * - * @param argc - * The number of arguments in the array - * - * @param argv - * The argument array - * - * @param optstring - * The option letters, with ':' following options with - * required arguments. See note about "-" as the first - * character. - * - * @return GETOPT_ERR_INVALID_OPT - * if the option is not found in optstring - * - * GETOPT_ERR_MISSING_ARG - * if the option requires an argument which is missing - * - * GETOPT_ERR_INVALID_ARG - * if "-" is not the first character in optstring and a - * non-option argument is encountered - * - * GETOPT_NON_OPTION_ARG - * if "-" is the first character in optstring and a - * non-option argument is encountered - * - * GETOPT_DONE_PARSING - * if the end of the argument list is reached - * - * - * the option character itself, if none of the above - * scenarios applies. - */ -extern int -getopt_ext( - int argc, - char * const argv[], - const char *optstring) -{ - int c; - int handle_non_options = (*optstring == '-'); - - /* Is "-" the first character of optstring? */ - if (handle_non_options) { - /* getopt(3) doesn't understand "-" */ - optstring++; - } - - switch (c = getopt(argc, argv, optstring)) { - - /* - * getopt(3) returns -1 when 1) it encounters a non-option - * argument or 2) reaches the end of the argument list. - * Distinguish from the two possibilities. - */ - case -1: - if (optind < argc) { - optarg = argv[optind]; - - /* Non-option argument found */ - if (handle_non_options) { - /* Non-option arguments are valid */ - c = GETOPT_NON_OPTION_ARG; - optind++; - } else { - /* Non-option arguments are invalid */ - c = GETOPT_ERR_INVALID_ARG; - } - } else { - /* End of the argument list reached */ - c = GETOPT_DONE_PARSING; - } - break; - - /* - * getopt(3) returns '?' when 1) the "-?" option is - * encountered, 2) an invalid option is given or 3) a - * valid option requiring an argument is found but no - * argument is specified. Distinguish from the three - * possibilities. - */ - case '?': - /* Is this an error or was -? encountered? */ - if (optopt != '?') { - if (strchr(optstring, optopt) == NULL) { - /* Invalid option */ - c = GETOPT_ERR_INVALID_OPT; - optarg = argv[optind-1]; - } else { - /* Valid option without required argument */ - c = GETOPT_ERR_MISSING_ARG; - } - } - } - - return (c); -} diff --git a/usr/src/cmd/lvm/metassist/controller/getopt_ext.h b/usr/src/cmd/lvm/metassist/controller/getopt_ext.h deleted file mode 100644 index 6b36477ce041..000000000000 --- a/usr/src/cmd/lvm/metassist/controller/getopt_ext.h +++ /dev/null @@ -1,115 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _GETOPTEXT_H -#define _GETOPTEXT_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * Returned chars for getopt_ext - */ - -/* A non-option argument was found */ -#define GETOPT_NON_OPTION_ARG 1 - -/* All arguments have been parsed */ -#define GETOPT_DONE_PARSING -1 - -/* An invalid option was found */ -#define GETOPT_ERR_INVALID_OPT -2 - -/* An invalid non-option argument was found */ -#define GETOPT_ERR_INVALID_ARG -3 - -/* No argument for valid option expecting an argument */ -#define GETOPT_ERR_MISSING_ARG -4 - -/* - * Function prototypes - */ - -/* - * Identical to getopt(3), except that - * - * 1. If "-" is the first character of optstring, each non-option argv - * element is handled as if it were the argument of an option with - * character code GETOPT_NON_OPTION_ARG. The result is that - * GETOPT_DONE_PARSING will not be returned until the end of the - * argument list has been reached. - * - * This mirrors the functionality provided by GNU getopt. - * - * 2. GETOPT_ERR_INVALID_OPT or GETOPT_ERR_MISSING_ARG is returned - * instead of '?'. Subsequently "-?" can be used as a valid - * option. - * - * 3. GETOPT_DONE_PARSING, GETOPT_ERR_INVALID_ARG, or - * GETOPT_NON_OPTION_ARG is returned instead of -1. - * - * @param argc - * The number of arguments in the array - * - * @param argv - * The argument array - * - * @param optstring - * The option letters, with ':' following options with - * required arguments. See note about "-" as the first - * character. - * - * @return GETOPT_ERR_INVALID_OPT - * if the option is not found in optstring - * - * GETOPT_ERR_MISSING_ARG - * if the option requires an argument which is missing - * - * GETOPT_ERR_INVALID_ARG - * if "-" is not the first character in optstring and a - * non-option argument is encountered - * - * GETOPT_NON_OPTION_ARG - * if "-" is the first character in optstring and a - * non-option argument is encountered - * - * GETOPT_DONE_PARSING - * if the end of the argument list is reached - * - * - * the option character itself, if none of the above - * scenarios applies. - */ -extern int getopt_ext(int argc, char * const argv[], const char *optstring); - -#ifdef __cplusplus -} -#endif - -#endif /* _GETOPTEXT_H */ diff --git a/usr/src/cmd/lvm/metassist/controller/metassist.c b/usr/src/cmd/lvm/metassist/controller/metassist.c deleted file mode 100644 index aeb9655937c8..000000000000 --- a/usr/src/cmd/lvm/metassist/controller/metassist.c +++ /dev/null @@ -1,1511 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -/* - * Front end CLI to metassist. Parses command line, reads in data - * files, provides main() entry point into metassist. Here's the - * complete data validation stack for the project: - * - * 1. Controller validates command line syntax/order of arguments. - * - * 2. XML parser validates XML syntax, conformance with DTD - * - * 3. xml_convert validates proper conversion from string to - * size/integer/float/boolean/etc. - * - * 4. devconfig_t mutators validate limits/boundaries/min/max/names of - * data. References md_mdiox.h and possibly libmeta. - * - * 5. layout validates on remaining issues, including existence of - * given devices, feasibility of request, suitability of specified - * components, and subtle misuse of data structure (like both size - * and components specified). - */ - -#include "metassist.h" - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include "getopt_ext.h" -#include "locale.h" -#include "volume_error.h" -#include "volume_output.h" -#include "volume_request.h" -#include "volume_defaults.h" -#include "volume_string.h" -#include "xml_convert.h" -#include "layout.h" - -/* - * Function prototypes - */ - -static void clean_up(); -static void interrupthandler(int x); -static int copy_arg(char *option, char *value, char **saveto); -static xmlDocPtr create_volume_request_XML(); -static int handle_common_opts(int c, boolean_t *handled); -static int parse_create_opts(int argc, char *argv[]); -static int parse_opts(int argc, char *argv[]); -static int parse_tokenized_list(const char *string, dlist_t **list); -static int parse_verbose_arg(char *arg, int *verbosity); -static void print_help_create(FILE *stream); -static void print_help_main(FILE *stream); -static void print_manual_reference(FILE *stream); -static void print_usage(FILE *stream); -static void print_usage_create(FILE *stream); -static void print_usage_main(FILE *stream); -static int print_version(FILE *stream); -static int get_doc_from_file( - char *file, char **valid_types, xmlDocPtr *doc, char **root); -static int get_volume_request_or_config(xmlDocPtr *doc, char **root); -static int handle_commands(char *commands); -static int handle_config(devconfig_t *config); -static int handle_request(request_t *request, defaults_t *defaults); -static int write_temp_file(char *text, mode_t mode, char **file); - -/* - * Data - */ - -/* Holds argv[0] */ -char *progname; - -/* The action to take */ -int action = ACTION_EXECUTE; - -/* Holds the name of the temporary command file */ -char *commandfile = NULL; - -/* The metassist subcommand */ -int subcmd = SUBCMD_NONE; - -/* The volume-request XML file to read */ -char *arg_inputfile = NULL; - -/* The size of the requested volume */ -char *arg_size = NULL; - -/* The disk set to use */ -char *arg_diskset = NULL; - -/* The volume name to use */ -char *arg_name = NULL; - -/* Redundancy level */ -char *arg_redundancy = NULL; - -/* Number of datapaths */ -char *arg_datapaths = NULL; - -/* Whether to implement fault recovery */ -boolean_t faultrecovery = B_FALSE; - -/* Whether to output the config file */ -boolean_t output_configfile = B_FALSE; - -/* Whether to output the command file instead of */ -boolean_t output_commandfile = B_FALSE; - -/* List of available devices */ -dlist_t *available = NULL; - -/* List of unavailable devices */ -dlist_t *unavailable = NULL; - -/* - * Functions - */ - -/* - * Frees alloc'd memory, to be called prior to exiting. - */ -static void -clean_up() -{ - /* Remove temporary command file */ - if (commandfile != NULL) { - /* Ignore failure */ - unlink(commandfile); - } - - /* Free allocated argument strings */ - if (commandfile != NULL) free(commandfile); - if (arg_diskset != NULL) free(arg_diskset); - if (arg_name != NULL) free(arg_name); - if (arg_inputfile != NULL) free(arg_inputfile); - - /* Free available dlist and strings within */ - dlist_free_items(available, free); - - /* Free unavailable dlist and strings within */ - dlist_free_items(unavailable, free); - - /* Clean up XML data structures */ - cleanup_xml(); -} - -/* - * Signal handler, called to exit gracefully - */ -static void -interrupthandler( - int sig) -{ - char sigstr[SIG2STR_MAX]; - - if (sig2str(sig, sigstr) != 0) { - sigstr[0] = '\0'; - } - - fprintf(stderr, - gettext("Signal %d (%s) caught -- exiting...\n"), sig, sigstr); - - /* Allow layout to cleanup on abnormal exit */ - layout_clean_up(); - - clean_up(); - exit(1); -} - -/* - * Copies and saves the given argument, verifying that the argument - * has not already been saved. - * - * @param option - * The flag preceding or type of the argument. Used only - * in the error message when an option has already been - * saved to *saveto. - * - * @param value - * The argument to be copied. - * - * @param saveto - * Changed to point to the copied data. This must point - * to NULL data initially, or it will be assumed that - * this argument has already been set. This memory must - * be free()d by the caller. - * - * @return 0 on success, non-zero otherwise. - */ -static int -copy_arg( - char *option, - char *value, - char **saveto) -{ - int error = 0; - - /* Has this string already been set? */ - if (*saveto != NULL) { - volume_set_error( - gettext("%s: option specified multiple times"), option); - error = -1; - } else - - if ((*saveto = strdup(value)) == NULL) { - error = ENOMEM; - } - - return (error); -} - -/* - * Generates the XML volume request corresponding to the command-line - * parameters. No DTD node is included in this request. - * - * @return The XML request, or NULL if an error ocurred in - * generating the text. This memory must be freed with - * XMLFree(). - */ -static xmlDocPtr -create_volume_request_XML() -{ - xmlDocPtr doc; - xmlNodePtr request, volume; - - /* Create the XML document */ - doc = xmlNewDoc((xmlChar *)"1.0"); - - /* Create the root node */ - request = xmlNewDocNode( - doc, NULL, (xmlChar *)ELEMENT_VOLUMEREQUEST, NULL); - xmlAddChild((xmlNodePtr) doc, (xmlNodePtr)request); - - /* diskset element */ - if (arg_diskset != NULL) { - xmlNodePtr node = xmlNewChild( - request, NULL, (xmlChar *)ELEMENT_DISKSET, NULL); - xmlSetProp(node, - (xmlChar *)ATTR_NAME, (xmlChar *)arg_diskset); - } - - /* available elements */ - if (available != NULL) { - dlist_t *item; - for (item = available; item != NULL; item = item->next) { - xmlNodePtr node = xmlNewChild( - request, NULL, (xmlChar *)ELEMENT_AVAILABLE, NULL); - xmlSetProp(node, - (xmlChar *)ATTR_NAME, (xmlChar *)item->obj); - } - } - - /* unavailable elements */ - if (unavailable != NULL) { - dlist_t *item; - for (item = unavailable; item != NULL; item = item->next) { - xmlNodePtr node = xmlNewChild( - request, NULL, (xmlChar *)ELEMENT_UNAVAILABLE, NULL); - xmlSetProp(node, - (xmlChar *)ATTR_NAME, (xmlChar *)item->obj); - } - } - - /* volume element */ - volume = xmlNewChild(request, NULL, (xmlChar *)ELEMENT_VOLUME, NULL); - - /* Volume name - optional */ - if (arg_name != NULL) { - xmlSetProp(volume, - (xmlChar *)ATTR_NAME, (xmlChar *)arg_name); - } - - /* Volume size - required */ - xmlSetProp(volume, (xmlChar *)ATTR_SIZEINBYTES, (xmlChar *)arg_size); - - /* Volume redundancy - optional */ - if (arg_redundancy != NULL) { - xmlSetProp(volume, - (xmlChar *)ATTR_VOLUME_REDUNDANCY, (xmlChar *)arg_redundancy); - } - - /* Volume fault recovery - optional */ - if (faultrecovery == B_TRUE) { - xmlSetProp(volume, - (xmlChar *)ATTR_VOLUME_FAULTRECOVERY, (xmlChar *)"TRUE"); - } - - /* Volume datapaths - optional */ - if (arg_datapaths != NULL) { - xmlSetProp(volume, - (xmlChar *)ATTR_VOLUME_DATAPATHS, (xmlChar *)arg_datapaths); - } - - if (get_max_verbosity() >= OUTPUT_DEBUG) { - xmlChar *text; - /* Get the text dump */ - xmlDocDumpFormatMemory(doc, &text, NULL, 1); - oprintf(OUTPUT_DEBUG, - gettext("Generated volume-request:\n%s"), text); - xmlFree(text); - } - - return (doc); -} - -/* - * Checks the given flag for options common to all subcommands. - * - * @param c - * The option letter. - * - * @param handled - * RETURN: whether the given option flag was handled. - * - * @return Non-zero if an error occurred or the given option was - * invalid or incomplete, 0 otherwise. - */ -static int -handle_common_opts( - int c, - boolean_t *handled) -{ - int error = 0; - - /* Level of verbosity to report */ - int verbosity; - - *handled = B_TRUE; - - switch (c) { - case COMMON_SHORTOPT_VERBOSITY: - if ((error = parse_verbose_arg(optarg, &verbosity)) == 0) { - set_max_verbosity(verbosity, stderr); - } - break; - - case COMMON_SHORTOPT_VERSION: - if ((error = print_version(stdout)) == 0) { - clean_up(); - exit(0); - } - break; - - case GETOPT_ERR_MISSING_ARG: - volume_set_error( - gettext("option missing a required argument: -%c"), optopt); - error = -1; - break; - - case GETOPT_ERR_INVALID_OPT: - volume_set_error(gettext("invalid option: -%c"), optopt); - error = -1; - break; - - case GETOPT_ERR_INVALID_ARG: - volume_set_error(gettext("invalid argument: %s"), optarg); - error = -1; - break; - - default: - *handled = B_FALSE; - } - - return (error); -} - -/* - * Parse the command line options for the create subcommand. - * - * @param argc - * The number of arguments in the array - * - * @param argv - * The argument array - */ -static int -parse_create_opts( - int argc, - char *argv[]) -{ - int c; - int error = 0; - - /* - * Whether a volume request is specified on the command line - * (vs. a inputfile) - */ - boolean_t request_on_command_line = B_FALSE; - - /* Examine next arg */ - while (!error && (c = getopt_ext( - argc, argv, CREATE_SHORTOPTS)) != GETOPT_DONE_PARSING) { - - boolean_t handled; - - /* Check for args common to all scopes */ - error = handle_common_opts(c, &handled); - if (error == 0 && handled == B_FALSE) { - - /* Check for args specific to this scope */ - switch (c) { - - /* Help */ - case COMMON_SHORTOPT_HELP: - print_help_create(stdout); - clean_up(); - exit(0); - break; - - /* Config file */ - case CREATE_SHORTOPT_CONFIGFILE: - action &= ~ACTION_EXECUTE; - action |= ACTION_OUTPUT_CONFIG; - break; - - /* Command file */ - case CREATE_SHORTOPT_COMMANDFILE: - action &= ~ACTION_EXECUTE; - action |= ACTION_OUTPUT_COMMANDS; - break; - - /* Disk set */ - case CREATE_SHORTOPT_DISKSET: - error = copy_arg( - argv[optind - 2], optarg, &arg_diskset); - request_on_command_line = B_TRUE; - break; - - /* Name */ - case CREATE_SHORTOPT_NAME: - error = copy_arg( - argv[optind - 2], optarg, &arg_name); - request_on_command_line = B_TRUE; - break; - - /* Redundancy */ - case CREATE_SHORTOPT_REDUNDANCY: - error = copy_arg( - argv[optind - 2], optarg, &arg_redundancy); - request_on_command_line = B_TRUE; - break; - - /* Data paths */ - case CREATE_SHORTOPT_DATAPATHS: - error = copy_arg( - argv[optind - 2], optarg, &arg_datapaths); - request_on_command_line = B_TRUE; - break; - - /* Fault recovery */ - case CREATE_SHORTOPT_FAULTRECOVERY: - faultrecovery = B_TRUE; - request_on_command_line = B_TRUE; - break; - - /* Available devices */ - case CREATE_SHORTOPT_AVAILABLE: - error = parse_tokenized_list(optarg, &available); - request_on_command_line = B_TRUE; - break; - - /* Unavailable devices */ - case CREATE_SHORTOPT_UNAVAILABLE: - error = parse_tokenized_list(optarg, &unavailable); - request_on_command_line = B_TRUE; - break; - - /* Size */ - case CREATE_SHORTOPT_SIZE: - request_on_command_line = B_TRUE; - error = copy_arg( - argv[optind - 1], optarg, &arg_size); - break; - - /* Input file */ - case CREATE_SHORTOPT_INPUTFILE: - error = copy_arg(gettext("request/configuration file"), - optarg, &arg_inputfile); - break; - - default: - /* Shouldn't be here! */ - volume_set_error( - gettext("unexpected option: %c (%d)"), c, c); - error = -1; - } - } - } - - /* - * Now that the arguments have been parsed, verify that - * required options were specified. - */ - if (!error) { - /* Third invocation method -- two required arguments */ - if (request_on_command_line == B_TRUE) { - if (arg_inputfile != NULL) { - volume_set_error( - gettext("invalid option(s) specified with input file")); - error = -1; - } else - - if (arg_size == NULL) { - volume_set_error(gettext("no size specified")); - error = -1; - } else - - if (arg_diskset == NULL) { - volume_set_error(gettext("no disk set specified")); - error = -1; - } - } else - - /* First or second invocation method -- one required argument */ - if (arg_inputfile == NULL) { - volume_set_error(gettext("missing required arguments")); - error = -1; - } - - /* - * The CREATE_SHORTOPT_CONFIGFILE and - * CREATE_SHORTOPT_COMMANDFILE arguments are mutually - * exclusive. Verify that these were not both specified. - */ - if (!error && - action & ACTION_OUTPUT_CONFIG && - action & ACTION_OUTPUT_COMMANDS) { - volume_set_error( - gettext("-%c and -%c are mutually exclusive"), - CREATE_SHORTOPT_CONFIGFILE, - CREATE_SHORTOPT_COMMANDFILE); - error = -1; - } - } - - return (error); -} - -/* - * Parse the main command line options. - * - * @param argc - * The number of arguments in the array - * - * @param argv - * The argument array - * - * @return 0 on success, non-zero otherwise. - */ -static int -parse_opts( - int argc, - char *argv[]) -{ - int c; - int error = 0; - - /* Examine next arg */ - while (!error && (c = getopt_ext( - argc, argv, MAIN_SHORTOPTS)) != GETOPT_DONE_PARSING) { - - boolean_t handled; - - /* Check for args common to all scopes */ - error = handle_common_opts(c, &handled); - - if (error == 0 && handled == B_FALSE) { - - /* Check for args specific to this scope */ - switch (c) { - - /* Help */ - case COMMON_SHORTOPT_HELP: - print_help_main(stdout); - clean_up(); - exit(0); - break; - - /* Non-option arg */ - case GETOPT_NON_OPTION_ARG: - - /* See if non-option arg is subcommand */ - if (strcmp(optarg, MAIN_SUBCMD_CREATE) == 0) { - subcmd = SUBCMD_CREATE; - error = parse_create_opts(argc, argv); - } else { - /* Argument not recognized */ - volume_set_error( - gettext("%s: invalid argument"), optarg); - error = -1; - } - break; - - default: - /* Shouldn't be here! */ - volume_set_error( - gettext("unexpected option: %c (%d)"), c, c); - error = -1; - } - } else - - /* - * Check invalid arguments to see if they are valid - * options out of place. - * - * NOTE: IN THE FUTURE, A CODE BLOCK SIMILAR TO THIS - * ONE SHOULD BE ADDED FOR EACH NEW SUBCOMMAND. - */ - if (c == GETOPT_ERR_INVALID_OPT && - strchr(CREATE_SHORTOPTS, optopt) != NULL) { - /* Provide a more enlightening error message */ - volume_set_error( - gettext("-%c specified before create subcommand"), optopt); - } - } - - /* Parsing appears to be successful */ - if (!error) { - - /* Was a subcommand specified? */ - if (subcmd == SUBCMD_NONE) { - volume_set_error(gettext("no subcommand specified")); - error = -1; - } - } - - return (error); -} - -/* - * Convert a string containing a comma/space-separated list into a - * dlist. - * - * @param string - * a comma/space-separated list - * - * @param list - * An exisiting dlist to append to, or NULL to create a - * new list. - * - * @return The head node of the dlist_t, whether it was newly - * created or passed in. On memory allocation error, - * errno will be set and processing will stop. - */ -static int -parse_tokenized_list( - const char *string, - dlist_t **list) -{ - char *stringdup; - char *device; - char *dup; - dlist_t *item; - int error = 0; - - /* Don't let strtok alter original argument */ - if ((stringdup = strdup(string)) == NULL) { - error = ENOMEM; - } else { - - /* For each device in the string list... */ - while ((device = strtok(stringdup, DEVICELISTDELIM)) != NULL) { - - /* Duplicate the device string */ - if ((dup = strdup(device)) == NULL) { - error = ENOMEM; - break; - } - - /* Create new dlist_t for this device */ - if ((item = dlist_new_item((void *)dup)) == NULL) { - error = ENOMEM; - free(dup); - break; - } - - /* Append item to list */ - *list = dlist_append(item, *list, B_TRUE); - - /* strtok needs NULL pointer on subsequent calls */ - stringdup = NULL; - } - - free(stringdup); - } - - return (error); -} - -/* - * Parses the given verbosity level argument string. - * - * @param arg - * A string representation of a verbosity level - * - * @param verbosity - * RETURN: the verbosity level - * - * @return 0 if the given verbosity level string cannot - * be interpreted, non-zero otherwise - */ -static int -parse_verbose_arg( - char *arg, - int *verbosity) -{ - int level; - - /* Scan for int */ - if (sscanf(arg, "%d", &level) == 1) { - - /* Argument was an integer */ - switch (level) { - case OUTPUT_QUIET: - case OUTPUT_TERSE: - case OUTPUT_VERBOSE: -#ifdef DEBUG - case OUTPUT_DEBUG: -#endif - - *verbosity = level; - return (0); - } - } - - volume_set_error(gettext("%s: invalid verbosity level"), arg); - return (-1); -} - -/* - * Print the help message for the command. - * - * @param stream - * stdout or stderr, as appropriate. - */ -static void -print_help_create( - FILE *stream) -{ - print_usage_create(stream); - - /* BEGIN CSTYLED */ - fprintf(stream, gettext("\ -\n\ -Create Solaris Volume Manager volumes.\n\ -\n\ --F \n\ - Specify the volume request or volume configuration file to\n\ - process.\n\ -\n\ --s \n\ - Specify the disk set to use when creating volumes.\n\ -\n\ --S \n\ - Specify the size of the volume to be created.\n\ -\n\ --a \n\ - Explicitly specify the devices that can be used in the\n\ - creation of this volume.\n\ -\n\ --c Output the command script that would implement the specified or\n\ - generated volume configuration.\n\ -\n\ --d Output the volume configuration that satisfies the specified or\n\ - generated volume request.\n\ -\n\ --f Specify whether the volume should support automatic component\n\ - replacement after a fault.\n\ -\n\ --n \n\ - Specify the name of the new volume.\n\ -\n\ --p \n\ - Specify the number of required paths to the storage volume.\n\ -\n\ --r \n\ - Specify the redundancy level (0-4) of the data.\n\ -\n\ --u \n\ - Explicitly specify devices to exclude in the creation of this\n\ - volume.\n\ -\n\ --v \n\ - Specify the level of verbosity.\n\ -\n\ --V Display program version information.\n\ -\n\ --? Display help information.\n")); - - /* END CSTYLED */ - - print_manual_reference(stream); -} - -/* - * Print the help message for the command. - * - * @param stream - * stdout or stderr, as appropriate. - */ -static void -print_help_main( - FILE *stream) -{ - print_usage_main(stream); - - /* BEGIN CSTYLED */ - fprintf(stream, gettext("\ -\n\ -Provide assistance, through automation, with common Solaris Volume\n\ -Manager tasks.\n\ -\n\ --V Display program version information.\n\ -\n\ --? Display help information. This option can follow \n\ - for subcommand-specific help.\n\ -\n\ -The accepted values for are:\n\ -\n\ -create Create Solaris Volume Manager volumes.\n")); - /* END CSTYLED */ - - print_manual_reference(stream); -} - -/* - * Print the help postscript for the command. - * - * @param stream - * stdout or stderr, as appropriate. - */ -static void -print_manual_reference( - FILE *stream) -{ - fprintf(stream, gettext("\nFor more information, see %s(1M).\n"), - progname); -} - -/* - * Print the program usage to the given file stream. - * - * @param stream - * stdout or stderr, as appropriate. - */ -static void -print_usage( - FILE *stream) -{ - switch (subcmd) { - case SUBCMD_CREATE: - print_usage_create(stream); - break; - - case SUBCMD_NONE: - default: - print_usage_main(stream); - } -} - -/* - * Print the program usage to the given file stream. - * - * @param stream - * stdout or stderr, as appropriate. - */ -static void -print_usage_create( - FILE *stream) -{ - /* Create a blank the length of progname */ - char *blank = strdup(progname); - memset(blank, ' ', strlen(blank) * sizeof (char)); - - /* BEGIN CSTYLED */ - fprintf(stream, gettext("\ -Usage: %1$s create [-v ] [-c] -F \n\ - %1$s create [-v ] [-c|-d] -F \n\ - %1$s create [-v ] [-c|-d]\n\ - %2$s [-f] [-n ] [-p ] [-r ]\n\ - %2$s [-a [,,...]]\n\ - %2$s [-u [,,...]]\n\ - %2$s -s -S \n\ - %1$s create -V\n\ - %1$s create -?\n"), progname, blank); - /* END CSTYLED */ - - free(blank); -} - -/* - * Print the program usage to the given file stream. - * - * @param stream - * stdout or stderr, as appropriate. - */ -static void -print_usage_main( - FILE *stream) -{ - /* BEGIN CSTYLED */ - fprintf(stream, gettext("\ -Usage: %1$s [-?] [options]\n\ - %1$s -V\n\ - %1$s -?\n"), progname); - /* END CSTYLED */ -} - -/* - * Print the program version to the given file stream. - * - * @param stream - * stdout or stderr, as appropriate. - */ -static int -print_version( - FILE *stream) -{ - int error = 0; - struct utsname uname_info; - - if (uname(&uname_info) < 0) { - error = -1; - volume_set_error(gettext("could not determine version")); - } else { - fprintf(stream, gettext("%s %s"), progname, uname_info.version); - } - - fprintf(stream, "\n"); - - return (error); -} - -/* - * Get an xmlDocPtr by parsing the given file. - * - * @param file - * The file to read - * - * @param valid_types - * An array of the allowable root elements. If the root - * element of the parsed XML file is not in this list, an - * error is returned. - * - * @param doc - * RETURN: the XML document - * - * @param root - * RETURN: the root element of the document - * - * @return 0 if the given XML file was successfully parsed, - * non-zero otherwise - */ -static int -get_doc_from_file( - char *file, - char **valid_types, - xmlDocPtr *doc, - char **root) -{ - int error = 0; - - *root = NULL; - - /* - * Create XML doc by reading the specified file using the - * default SAX handler (which has been modified in init_xml()) - */ - *doc = xmlSAXParseFile((xmlSAXHandlerPtr) - &xmlDefaultSAXHandler, file, 0); - - if (*doc != NULL) { - int i; - xmlNodePtr root_elem = xmlDocGetRootElement(*doc); - - /* Is this a valid root element? */ - for (i = 0; valid_types[i] != NULL; i++) { - if (xmlStrcmp(root_elem->name, - (const xmlChar *)valid_types[i]) == 0) { - *root = valid_types[i]; - } - } - - /* Was a valid root element found? */ - if (*root == NULL) { - xmlFreeDoc(*doc); - } - } - - /* Was a valid root element found? */ - if (*root == NULL) { - volume_set_error( - gettext("%s: invalid or malformed XML file"), file); - error = -1; - } - - return (error); -} - -/* - * Creates a volume-request or volume-config XML document, based on the - * arguments passed into the command. - * - * @param doc - * RETURN: the XML document, or NULL if no valid document - * could be created. - * - * @param root - * RETURN: the root element of the document - * - * @return 0 if a volume-request or volume-config XML document - * could be read or created, non-zero otherwise - */ -static int -get_volume_request_or_config( - xmlDocPtr *doc, - char **root) -{ - int error = 0; - - if (arg_inputfile == NULL) { - /* Create a volume-request based on quality of service */ - *doc = create_volume_request_XML(); - - if (*doc == NULL) { - volume_set_error(gettext("error creating volume request")); - error = -1; - *root = NULL; - } else { - *root = ELEMENT_VOLUMEREQUEST; - } - } else { - char *valid[] = { - ELEMENT_VOLUMEREQUEST, - ELEMENT_VOLUMECONFIG, - NULL - }; - - error = get_doc_from_file(arg_inputfile, valid, doc, root); - } - - return (error); -} - -/* - * Handle processing of the given meta* commands. Commands are - * written to a file, the file is optionally executed, and optionally - * deleted. - * - * @param commands - * The commands to write to the command script file. - * - * @return 0 on success, non-zero otherwise. - */ -static int -handle_commands( - char *commands) -{ - int error = 0; - - if (action & ACTION_OUTPUT_COMMANDS) { - printf("%s", commands); - } - - if (action & ACTION_EXECUTE) { - - /* Write a temporary file with 744 permissions */ - if ((error = write_temp_file(commands, - S_IRWXU | S_IRGRP | S_IROTH, &commandfile)) == 0) { - - char *command; - - /* Create command line to execute */ - if (get_max_verbosity() >= OUTPUT_VERBOSE) { - /* Verbose */ - command = stralloccat(3, - commandfile, " ", COMMAND_VERBOSE_FLAG); - } else { - /* Terse */ - command = strdup(commandfile); - } - - if (command == NULL) { - volume_set_error(gettext("could not allocate memory")); - error = -1; - } else { - - oprintf(OUTPUT_VERBOSE, - gettext("Executing command script: %s\n"), command); - - /* Execute command */ - switch (error = system(command)) { - /* system() failed */ - case -1: - error = errno; - break; - - /* Command succeded */ - case 0: - break; - - /* Command failed */ - default: - volume_set_error( - /* CSTYLED */ - gettext("execution of command script failed with status %d"), - WEXITSTATUS(error)); - error = -1; - } - free(command); - } - } - } - - return (error); -} - -/* - * Handle processing of the given volume-config devconfig_t. The - * devconfig_t is first converted to XML. Then, depending - * on user input to the command, the XML is either written to a file - * or converted to a command script and passed on to - * handle_commands(). - * - * @param config - * A devconfig_t representing a valid volume-config. - * - * @return 0 on success, non-zero otherwise. - */ -static int -handle_config( - devconfig_t *config) -{ - int error; - xmlDocPtr doc; - - /* Get the xml document for the config */ - if ((error = config_to_xml(config, &doc)) == 0) { - - /* Get the text dump */ - xmlChar *text; - xmlDocDumpFormatMemory(doc, &text, NULL, 1); - - /* Should we output the config file? */ - if (action & ACTION_OUTPUT_CONFIG) { - printf("%s", text); - } else { - oprintf(OUTPUT_DEBUG, - gettext("Generated volume-config:\n%s"), text); - } - - xmlFree(text); - - /* Proceed to command generation? */ - if (action & ACTION_OUTPUT_COMMANDS || - action & ACTION_EXECUTE) { - char *commands; - - /* Get command script from the file */ - if ((error = xml_to_commands(doc, &commands)) == 0) { - if (commands == NULL) { - volume_set_error( - gettext("could not convert XML to commands")); - error = -1; - } else { - error = handle_commands(commands); - free(commands); - } - } - } - - xmlFreeDoc(doc); - } - - return (error); -} - -/* - * Handle processing of the given volume-request request_t and - * volume-defaults defaults_t. A layout is generated from these - * structures and the resulting volume-config devconfig_t is passed on - * to handle_config(). - * - * @param request - * A request_t representing a valid volume-request. - * - * @param defaults - * A defaults_t representing a valid volume-defaults. - * - * @return 0 on success, non-zero otherwise. - */ -static int -handle_request( - request_t *request, - defaults_t *defaults) -{ - int error; - - /* Get layout for given request and system defaults */ - if ((error = get_layout(request, defaults)) == 0) { - - /* Retrieve resulting volume config */ - devconfig_t *config = request_get_diskset_config(request); - - if (config != NULL) { - error = handle_config(config); - } - } - - return (error); -} - -/* - * Write the given text to a temporary file with the given - * permissions. If the file already exists, return an error. - * - * @param text - * The text to write to the file. - * - * @param mode - * The permissions to give the file, passed to chmod(2). - * - * @param file - * RETURN: The name of the file written. Must be - * free()d. - * - * @return 0 on success, non-zero otherwise. - */ -static int -write_temp_file( - char *text, - mode_t mode, - char **file) -{ - int error = 0; - - /* - * Create temporary file name -- "XXXXXX" is replaced with - * unique char sequence by mkstemp() - */ - *file = stralloccat(3, "/tmp/", progname, "XXXXXX"); - - if (*file == NULL) { - volume_set_error(gettext("out of memory")); - error = -1; - } else { - int fildes; - FILE *out = NULL; - - /* Open temp file */ - if ((fildes = mkstemp(*file)) != -1) { - out = fdopen(fildes, "w"); - } - - if (out == NULL) { - volume_set_error(gettext( - "could not open file for writing: %s"), *file); - error = -1; - } else { - - fprintf(out, "%s", text); - fclose(out); - - if (mode != 0) { - if (chmod(*file, mode)) { - volume_set_error( - gettext("could not change permissions of file: %s"), - *file); - error = -1; - } - } - - /* Remove file on error */ - if (error != 0) { - unlink(*file); - } - } - - /* Free *file on error */ - if (error != 0) { - free(*file); - *file = NULL; - } - } - - return (error); -} - -/* - * Main entry to metassist. See the print_usage_* functions* for - * usage. - * - * @return 0 on successful exit, non-zero otherwise - */ -int -main( - int argc, - char *argv[]) -{ - int error = 0; - int printusage = 0; - -#ifdef DEBUG - time_t start = time(NULL); -#endif - - /* - * Get the locale set up before calling any other routines - * with messages to ouput. Just in case we're not in a build - * environment, make sure that TEXT_DOMAIN gets set to - * something. - */ -#if !defined(TEXT_DOMAIN) -#define TEXT_DOMAIN "SYS_TEST" -#endif - (void) setlocale(LC_ALL, ""); - (void) textdomain(TEXT_DOMAIN); - - /* Set program name, strip directory */ - if ((progname = strrchr(argv[0], '/')) != NULL) { - progname++; - } else { - progname = argv[0]; - } - - /* Set up signal handlers to exit gracefully */ - { - struct sigaction act; - act.sa_handler = interrupthandler; - sigemptyset(&act.sa_mask); - act.sa_flags = 0; - sigaction(SIGHUP, &act, (struct sigaction *)0); - sigaction(SIGINT, &act, (struct sigaction *)0); - sigaction(SIGQUIT, &act, (struct sigaction *)0); - sigaction(SIGTERM, &act, (struct sigaction *)0); - } - - /* Set default verbosity level */ - set_max_verbosity(OUTPUT_TERSE, stderr); - - /* Verify we're running as root */ - if (geteuid() != 0) { - volume_set_error(gettext("must be run as root")); - error = -1; - } else { - - /* Disable error messages from getopt */ - opterr = 0; - - /* Parse command-line options */ - if ((error = parse_opts(argc, argv)) == 0) { - xmlDocPtr doc; - char *root; - - /* Initialize XML defaults */ - init_xml(); - - /* Read volume-request/config file */ - if ((error = get_volume_request_or_config(&doc, &root)) == 0) { - - /* Is this a volume-config? */ - if (strcmp(root, ELEMENT_VOLUMECONFIG) == 0) { - - /* Was the -d flag specified? */ - if (action & ACTION_OUTPUT_CONFIG) { - /* -d cannot be used with -F */ - volume_set_error(gettext( - "-%c incompatible with -%c "), - CREATE_SHORTOPT_CONFIGFILE, - CREATE_SHORTOPT_INPUTFILE); - error = -1; - printusage = 1; - } else { - devconfig_t *config; - if ((error = xml_to_config(doc, &config)) == 0) { - error = handle_config(config); - free_devconfig(config); - } - } - } else - - /* Is this a volume-request? */ - if (strcmp(root, ELEMENT_VOLUMEREQUEST) == 0) { - request_t *request; - - if ((error = xml_to_request(doc, &request)) == 0) { - - xmlDocPtr defaults_doc; - char *valid[] = { - ELEMENT_VOLUMEDEFAULTS, - NULL - }; - - /* Read defaults file */ - if ((error = get_doc_from_file(VOLUME_DEFAULTS_LOC, - valid, &defaults_doc, &root)) == 0) { - - defaults_t *defaults; - - oprintf(OUTPUT_DEBUG, - gettext("Using defaults file: %s\n"), - VOLUME_DEFAULTS_LOC); - - /* Parse defaults XML */ - if ((error = xml_to_defaults( - defaults_doc, &defaults)) == 0) { - error = handle_request(request, defaults); - free_defaults(defaults); - } - - xmlFreeDoc(defaults_doc); - } - - free_request(request); - } - } - - xmlFreeDoc(doc); - } - } else { - printusage = 1; - } - } - - /* Handle any errors that were propogated */ - if (error != 0) { - char *message = get_error_string(error); - - if (message != NULL && strlen(message)) { - fprintf(stderr, "%s: %s\n", progname, message); - - if (printusage) { - fprintf(stderr, "\n"); - } - } - - if (printusage) { - print_usage(stderr); - } - } - -#ifdef DEBUG - /* Print run report to stderr if METASSIST_DEBUG is set */ - if (getenv(METASSIST_DEBUG_ENV) != NULL) { - time_t end = time(NULL); - struct tm *time; - int i; -#define TIMEFMT "%8s: %.2d:%.2d:%.2d\n" - - fprintf(stderr, " Command:"); - for (i = 0; i < argc; i++) { - fprintf(stderr, " %s", argv[i]); - } - fprintf(stderr, "\n"); - - fprintf(stderr, " Version: "); - print_version(stderr); - - time = localtime(&start); - fprintf(stderr, TIMEFMT, "Start", - time->tm_hour, time->tm_min, time->tm_sec); - - time = localtime(&end); - fprintf(stderr, TIMEFMT, "End", - time->tm_hour, time->tm_min, time->tm_sec); - - end -= start; - time = gmtime(&end); - fprintf(stderr, TIMEFMT, "Duration", - time->tm_hour, time->tm_min, time->tm_sec); - } -#endif - - clean_up(); - - return (error != 0); -} diff --git a/usr/src/cmd/lvm/metassist/controller/metassist.h b/usr/src/cmd/lvm/metassist/controller/metassist.h deleted file mode 100644 index d107c093bb62..000000000000 --- a/usr/src/cmd/lvm/metassist/controller/metassist.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _METASSIST_H -#define _METASSIST_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#ifdef __cplusplus -extern "C" { -#endif - -/* Location of the volume-defaults.xml file */ -#define VOLUME_DEFAULTS_LOC "/etc/default/metassist.xml" - -/* Available/unavailable device list delimiters */ -#define DEVICELISTDELIM ", " - -/* Command-line arguments */ -#define COMMON_SHORTOPT_HELP '?' -#define COMMON_SHORTOPT_VERBOSITY 'v' -#define COMMON_SHORTOPT_VERSION 'V' -#define CREATE_SHORTOPT_AVAILABLE 'a' -#define CREATE_SHORTOPT_COMMANDFILE 'c' -#define CREATE_SHORTOPT_DATAPATHS 'p' -#define CREATE_SHORTOPT_DISKSET 's' -#define CREATE_SHORTOPT_FAULTRECOVERY 'f' -#define CREATE_SHORTOPT_INPUTFILE 'F' -#define CREATE_SHORTOPT_NAME 'n' -#define CREATE_SHORTOPT_REDUNDANCY 'r' -#define CREATE_SHORTOPT_SIZE 'S' -#define CREATE_SHORTOPT_CONFIGFILE 'd' -#define CREATE_SHORTOPT_UNAVAILABLE 'u' -#define CREATE_SHORTOPTS "a:cdfF:n:p:r:s:S:u:v:V?" -#define MAIN_SHORTOPTS "-v:V?" -#define MAIN_SUBCMD_CREATE "create" - -#define SUBCMD_NONE 0 -#define SUBCMD_CREATE 1 - -/* Command action masks */ -#define ACTION_EXECUTE 1 -#define ACTION_OUTPUT_CONFIG 2 -#define ACTION_OUTPUT_COMMANDS 4 - -/* Verbose flag sent to generated shell script */ -#define COMMAND_VERBOSE_FLAG "-v" - -/* The name used to invoke the command */ -extern char *progname; - -#ifdef __cplusplus -} -#endif - -#endif /* _METASSIST_H */ diff --git a/usr/src/cmd/lvm/metassist/layout/Makefile b/usr/src/cmd/lvm/metassist/layout/Makefile deleted file mode 100644 index 58e3fb164c39..000000000000 --- a/usr/src/cmd/lvm/metassist/layout/Makefile +++ /dev/null @@ -1,69 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# ident "%Z%%M% %I% %E% SMI" -# - -METASSIST_TOPLEVEL = .. - -SRCS= \ - layout.c \ - layout_concat.c \ - layout_device_cache.c \ - layout_device_util.c \ - layout_discovery.c \ - layout_dlist_util.c \ - layout_hsp.c \ - layout_messages.c \ - layout_mirror.c \ - layout_request.c \ - layout_slice.c \ - layout_stripe.c \ - layout_svm_util.c \ - layout_validate.c - -OBJS = $(SRCS:%.c=%.o) -HDRS = $(SRCS:%.c=%.h) -MSGFILES = $(SRCS:%.c=%.i) - -include $(METASSIST_TOPLEVEL)/../../Makefile.cmd -include $(METASSIST_TOPLEVEL)/Makefile.env - -INCLUDES += -I../common -I../controller -I../../../../lib/libdiskmgt/common \ - -I../../../../lib/lvm/libmeta/common/hdrs -CFLAGS += $(INCLUDES) - -POFILE = layoutp.po - -include $(METASSIST_TOPLEVEL)/Makefile.targ - -# Build .po file from message files -$(POFILE): $(MSGFILES) - $(BUILDPO.msgfiles) - -cstyle: - $(CSTYLE) $(CSTYLE_FLAGS) $(SRCS) $(HDRS) - -hdrchk: - $(HDRCHK) $(HDRCHK_FLAGS) $(HDRS) diff --git a/usr/src/cmd/lvm/metassist/layout/layout.c b/usr/src/cmd/lvm/metassist/layout/layout.c deleted file mode 100644 index 0e6ad4596ac2..000000000000 --- a/usr/src/cmd/lvm/metassist/layout/layout.c +++ /dev/null @@ -1,1143 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include -#include -#include - -#include "volume_error.h" -#include "volume_defaults.h" -#include "volume_dlist.h" -#include "volume_output.h" -#include "volume_request.h" - -#include "layout.h" -#include "layout_request.h" - -#include "layout_concat.h" -#include "layout_discovery.h" -#include "layout_device_cache.h" -#include "layout_device_util.h" -#include "layout_dlist_util.h" -#include "layout_hsp.h" -#include "layout_mirror.h" -#include "layout_slice.h" -#include "layout_stripe.h" -#include "layout_svm_util.h" -#include "layout_validate.h" - -#define _LAYOUT_C - -static int layout_init(devconfig_t *request, defaults_t *defaults); -static int layout_diskset(request_t *request, dlist_t *results); - -static int process_request(devconfig_t *request, dlist_t **results); -static int process_qos_request(devconfig_t *request, dlist_t **results); -static int process_hsp_request(devconfig_t *request, dlist_t **results); - -/* - * stuff for making/updating the HSP to service devices - * created by the toplevel request - */ -static devconfig_t *_hsp_request = NULL; -static dlist_t *_hsp_devices = NULL; -static void set_hsp_request(devconfig_t *request); -static void unset_hsp_request(); - -/* - * struct to track which disks have been explicitly modified - * during the layout process... - * - * disk is the dm_descriptor_t of the modified disk - * accessname is the name to access the disk thru - * slices is the list of modified slices on the disk - */ -typedef struct { - dm_descriptor_t disk; - char *accessname; - dlist_t *slices; -} moddisk_t; - -/* - * modified_disks is a list of moddisk_t structs - * tracking disks have been modified during layout. - */ -static dlist_t *_modified_disks = NULL; - -static int collect_modified_disks(devconfig_t *request, dlist_t *results); -static int add_modified_disks_to_diskset( - dlist_t *devices, - devconfig_t *diskset); -static int release_modified_disks(); -static int get_removed_slices_for_disks( - dlist_t *mod_disks); -static int get_modified_slices_for_disks( - dlist_t *moddisks); -static int compare_disk_to_moddisk_disk( - void *disk, - void *moddisk); - -static int convert_device_names(devconfig_t *request, dlist_t *devs); - -/* - * FUNCTION: get_layout(devconfig_t *request, defaults_t *defaults) - * - * INPUT: request - a devconfig_t pointer to the toplevel request - * defaults - a results_t pointer to the defaults - * - * RETURNS: int - 0 - on success - * !0 - otherwise - * - * PURPOSE: Public entry point to layout module. - */ -int -get_layout( - request_t *request, - defaults_t *defaults) -{ - devconfig_t *diskset_req = NULL; - dlist_t *iter = NULL; - dlist_t *results = NULL; - int error = 0; - - if ((diskset_req = request_get_diskset_req(request)) != NULL) { - - /* initialize using the the top-level disk set request... */ - if ((error = layout_init(diskset_req, defaults)) != 0) { - return (error); - } - - oprintf(OUTPUT_TERSE, - gettext("\nProcessing volume request...\n")); - - iter = devconfig_get_components(diskset_req); - for (; (iter != NULL) && (error == 0); iter = iter->next) { - - /* process each volume request, stop on any error */ - devconfig_t *subreq = (devconfig_t *)iter->obj; - dlist_t *subres = NULL; - - ((error = process_request(subreq, &subres)) != 0) || - (error = collect_modified_disks(subreq, subres)) || - (error = convert_device_names(subreq, subres)); - if (error == 0) { - results = dlist_append(subres, results, AT_TAIL); - } - } - - if (error == 0) { - /* process HSP request */ - dlist_t *subres = NULL; - error = process_hsp_request(diskset_req, &subres); - if (error == 0) { - results = dlist_append(subres, results, AT_TAIL); - } - } - - if (error == 0) { - oprintf(OUTPUT_TERSE, - gettext("\nAssembling volume specification...\n")); - /* determine required diskset modifications */ - error = layout_diskset(request, results); - } - - layout_clean_up(); - - if (error == 0) { - oprintf(OUTPUT_TERSE, - gettext("\nVolume request completed successfully.\n")); - } - - } else { - volume_set_error( - gettext("Malformed request, missing top level " - "disk set request.")); - } - - return (error); -} - -/* - * FUNCTION: layout_clean_up() - * - * PURPOSE: function which handles the details of cleaning up cached - * data and any other memory allocated during the layout - * process. - * - * release physical device data structs - * release SVM logical device data structs - * release validation data structs - * release modified device data structs - * release request processing data structs - * - * This function is also exported as part of the public - * interface to the layout module, clients of layout - * are required to call this function if get_layout() - * was called and was not allowed to return. For example, - * if SIGINT was received while a layout request was in - * process. - */ -void -layout_clean_up() -{ - (void) release_request_caches(); - (void) release_validation_caches(); - - (void) release_slices_to_remove(); - (void) release_modified_slices(); - (void) release_modified_disks(); - - (void) release_reserved_slices(); - (void) release_used_slices(); - - (void) release_usable_devices(); - (void) release_svm_names(get_request_diskset()); - (void) release_known_devices(); - - (void) unset_hsp_request(NULL); - (void) unset_request_defaults(NULL); - (void) unset_request_diskset(NULL); - (void) unset_toplevel_request(NULL); -} - -/* - * FUNCTION: layout_init(devconfig_t *diskset, defaults_t *defaults) - * - * INPUT: diskset - a devconfig_t pointer to the toplevel request - * defaults - a results_t pointer to the defaults - * - * RETURNS: int - 0 - on success - * !0 - otherwise - * - * PURPOSE: function which handles the details of initializing the layout - * module prior to processing a request. - * - * Determines the requested disk set and validates it. - * - * Scans the physical device configuration. - * Scans the SVM logical device configuration. - * - * Initializes layout private global data structures and does - * semantic validation of the request. - */ -static int -layout_init( - devconfig_t *diskset, - defaults_t *defaults) -{ - dlist_t *iter = NULL; - int error = 0; - char *dsname = NULL; - - ((error = validate_basic_svm_config()) != 0) || - - /* determine & validate requested disk set name */ - (error = devconfig_get_name(diskset, &dsname)) || - (error = set_request_diskset(dsname)) || - - /* discover known physical and logical devices */ - (error = discover_known_devices()) || - (error = scan_svm_names(dsname)) || - - /* validate and remember toplevel request */ - (error = set_toplevel_request(diskset)) || - - /* validate and remember defaults for this request */ - (error = set_request_defaults(defaults)); - - if (error != 0) { - return (error); - } - - oprintf(OUTPUT_TERSE, - gettext("\nValidating volume request...\n")); - - iter = devconfig_get_components(diskset); - for (; (iter != NULL) && (error == 0); iter = iter->next) { - devconfig_t *subreq = (devconfig_t *)iter->obj; - error = validate_request(subreq); - } - - if (error == 0) { - error = discover_usable_devices(dsname); - } - - if (error == 0) { - /* final validation on explicitly requested components */ - error = validate_reserved_slices(); - } - - if (error == 0) { - /* final validation on request sizes vs. actual avail space */ - error = validate_request_sizes(diskset); - } - - return (error); -} - -/* - * FUNCTION: process_request(devconfig_t *req, dlist_t **results) - * - * INPUT: req - a devconfig_t pointer to the current request - * results - pointer to a list of resulting volumes - * - * RETURNS: int - 0 - on success - * !0 - otherwise - * - * PURPOSE: function which handles the details of an explicit - * volume request. - * - * Determines the requested volume type, whether the - * request contains specific subcomponents and dispatches - * to the appropriate layout function for that type. - * - * Resulting volumes are appended to the results list. - * - * Note that an HSP request is held until all the volumes - * in the request have been successfully composed. This - * ensures that HSP spare sizing can be appropriate to - * those volumes. - */ -static int -process_request( - devconfig_t *req, - dlist_t **results) -{ - component_type_t type = TYPE_UNKNOWN; - uint64_t nbytes = 0; /* requested volume size */ - dlist_t *comps = NULL; - int ncomps = 0; - int error = 0; - - (void) devconfig_get_type(req, &type); - (void) devconfig_get_size(req, &nbytes); - comps = devconfig_get_components(req); - - if (type == TYPE_HSP) { - /* HSP processing needs to happen after all other volumes. */ - /* set the HSP request aside until all other requests have */ - /* been completed successfully */ - set_hsp_request(req); - return (0); - } - - oprintf(OUTPUT_TERSE, "\n"); - oprintf(OUTPUT_VERBOSE, "******************\n"); - - ncomps = dlist_length(comps); - - if (type == TYPE_STRIPE) { - if (ncomps > 0) { - return (populate_explicit_stripe(req, results)); - } else { - return (layout_stripe(req, nbytes, results)); - } - } - - if (type == TYPE_CONCAT) { - if (ncomps > 0) { - return (populate_explicit_concat(req, results)); - } else { - return (layout_concat(req, nbytes, results)); - } - } - - if (type == TYPE_MIRROR) { - if (ncomps > 0) { - return (populate_explicit_mirror(req, results)); - } else { - uint16_t nsubs = 0; - if ((error = get_mirror_nsubs(req, &nsubs)) != 0) { - return (error); - } else { - return (layout_mirror(req, nsubs, nbytes, results)); - } - } - } - - if (type == TYPE_VOLUME) { - error = process_qos_request(req, results); - } - - return (error); -} - -/* - * FUNCTION: process_qos_request(devconfig_t *req, dlist_t **results) - * - * INPUT: req - a devconfig_t pointer to the current request - * results - pointer to a list of resulting volumes - * - * RETURNS: int - 0 - on success - * !0 - otherwise - * - * PURPOSE: function which handles the details of mapping an implicit - * volume request of QoS attributes into a volume type. - * - * Resulting volumes are appended to the results list. - */ -static int -process_qos_request( - devconfig_t *req, - dlist_t **results) -{ - int error = 0; - - uint64_t nbytes = 0; - uint16_t rlevel = 0; - - /* get QoS attributes */ - (void) devconfig_get_size(req, &nbytes); - - if ((error = get_volume_redundancy_level(req, &rlevel)) != 0) { - if (error == ERR_ATTR_UNSET) { - error = 0; - rlevel = 0; - } - } - - if (error == 0) { - if (rlevel == 0) { - error = layout_stripe(req, nbytes, results); - } else { - error = layout_mirror(req, rlevel, nbytes, results); - } - } - - return (error); -} - -/* - * FUNCTION: layout_diskset(request_t *req, dlist_t **results) - * - * INPUT: req - a request_t pointer to the toplevel request - * results - pointer to the list of composed result volumes - * - * RETURNS: int - 0 - on success - * !0 - otherwise - * - * PURPOSE: function which handles the details of completing an layout - * request. - * - * Determines if the disk set specified in the request currently - * exists and sets it up for creation if it doesn't. - * - * Adds new disks required by the result volumes to the disk set. - * - * Attaches the result volumes to the disk set result. - * - * Convert slice and disk names to preferred names. - * - * Attaches the disk set result to the toplevel request. - */ -static int -layout_diskset( - request_t *request, - dlist_t *results) -{ - int error = 0; - devconfig_t *diskset = NULL; - dlist_t *comps = NULL; - - ((error = new_devconfig(&diskset, TYPE_DISKSET)) != 0) || - (error = devconfig_set_name(diskset, get_request_diskset())) || - (error = add_modified_disks_to_diskset(results, diskset)); - if (error != 0) { - free_devconfig(diskset); - return (error); - } - - /* add resulting volumes */ - if (results != NULL) { - comps = devconfig_get_components(diskset); - comps = dlist_append(results, comps, AT_TAIL); - devconfig_set_components(diskset, comps); - } - - request_set_diskset_config(request, diskset); - - return (error); -} - -/* - * FUNCTION: convert_device_names(devconfig_t request, dlist_t *devices) - * - * INPUT: request - a devconfig_t request pointer - * devices - a list of devconfig_t devices - * - * RETURNS: int - 0 - on success - * !0 - on any error - * - * PURPOSE: Utility function to convert any slice or disk drive - * names in a result devconfig_t to the preferred name - * which should be used to access the device. - * - * This convert the temporary names used by layout to - * the proper DID or /dev/dsk alias. - */ -static int -convert_device_names( - devconfig_t *request, - dlist_t *devices) -{ - int error = 0; - dlist_t *iter; - - for (iter = devices; - (iter != NULL) && (error == 0); - iter = iter->next) { - - devconfig_t *dev = (devconfig_t *)iter->obj; - component_type_t type = TYPE_UNKNOWN; - dm_descriptor_t disk = (dm_descriptor_t)0; - char *devname = NULL; - char *diskname = NULL; - char *slicename = NULL; - uint16_t index; - - if ((error = devconfig_get_type(dev, &type)) == 0) { - switch (type) { - - case TYPE_MIRROR: - case TYPE_STRIPE: - case TYPE_CONCAT: - case TYPE_HSP: - - error = convert_device_names(request, - devconfig_get_components(dev)); - - break; - - case TYPE_SLICE: - - ((error = devconfig_get_name(dev, &devname)) != 0) || - (error = devconfig_get_slice_index(dev, &index)) || - (error = get_disk_for_named_slice(devname, &disk)) || - (error = get_device_access_name(request, disk, - &diskname)) || - (error = make_slicename_for_diskname_and_index( - diskname, index, &slicename)); - - if ((error == 0) && (slicename != NULL)) { - error = devconfig_set_name(dev, slicename); - free(slicename); - } - - break; - } - } - } - - return (error); -} - -/* - * FUNCTION: add_modified_disk(devconfig_t request, dm_descriptor_t disk); - * - * INPUT: request - a pointr to a devconfig_t request - * disk - dm_descriptor_t handle for a disk that has been modified - * - * SIDEEFFECTS: adds an entry to the _modified_disks list which tracks - * the disks that have been explicitly modified by - * the layout code. - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Adds the input disk to the list of those that have been - * modified. - * - * Disks are modified during layout for two reasons: - * - * 1. any disk that is to be added to the disk set gets - * an explicitly updated label. - * - * 2. once a disk is in the disk set, existing slices - * may be resized or new slices can be added. - */ -int -add_modified_disk( - devconfig_t *request, - dm_descriptor_t disk) -{ - dlist_t *iter = NULL; - moddisk_t *moddisk = NULL; - dlist_t *item = NULL; - int error = 0; - - for (iter = _modified_disks; iter != NULL; iter = iter->next) { - moddisk = (moddisk_t *)iter->obj; - if (compare_descriptor_names( - (void *)(uintptr_t)moddisk->disk, - (void *)(uintptr_t)disk) == 0) { - /* already in list */ - return (0); - } - } - - moddisk = (moddisk_t *)calloc(1, sizeof (moddisk_t)); - if (moddisk == NULL) { - error = ENOMEM; - } else { - char *aname = NULL; - error = get_device_access_name(request, disk, &aname); - if (error == 0) { - - /* add to list of modified disks */ - moddisk->disk = disk; - moddisk->accessname = aname; - moddisk->slices = NULL; - - if ((item = dlist_new_item((void *)moddisk)) == NULL) { - free(moddisk); - error = ENOMEM; - } else { - _modified_disks = - dlist_append(item, _modified_disks, AT_HEAD); - } - } - } - - return (error); -} - -/* - * FUNCTION: collect_modified_disks(devconfig_t *request, dlist_t* devs) - * - * INPUT: devs - pointer to a list of composed volumes - * OUTPUT: none - - * SIDEEFFECT: updates the module global list _modified_disks - * - * RETURNS: int - 0 - success - * !0 - failure - * - * PURPOSE: Helper to maintain the list of disks to be added to the - * disk set. - * - * Iterates the input list of devices and determines which - * disks they use. If a disk is not in the _modified_disks - * list, it is added. - */ -static int -collect_modified_disks( - devconfig_t *request, - dlist_t *devs) -{ - int error = 0; - - char *sname = NULL; - dm_descriptor_t disk = (dm_descriptor_t)0; - - for (; (devs != NULL) && (error == 0); devs = devs->next) { - - devconfig_t *dev = (devconfig_t *)devs->obj; - component_type_t type = TYPE_UNKNOWN; - - if ((error = devconfig_get_type(dev, &type)) == 0) { - - switch (type) { - case TYPE_MIRROR: - case TYPE_STRIPE: - case TYPE_CONCAT: - case TYPE_HSP: - - error = collect_modified_disks(request, - devconfig_get_components(dev)); - break; - - case TYPE_SLICE: - - ((error = devconfig_get_name(dev, &sname)) != 0) || - (error = get_disk_for_named_slice(sname, &disk)) || - (error = add_modified_disk(request, disk)); - - break; - } - } - } - - return (error); -} - -/* - * FUNCTION: add_modified_disks_to_diskset(dlist_t *devices, - * devconfig_t *diskset) - * - * INPUT: devices - pointer to a list of devices - * - * OUTPUT: diskset - pointer to a devconfig_t representing the disk set, - * updated to include modified disks and slices as - * components. - * - * RETURNS: int - 0 - success - * !0 - failure - * - * PURPOSE: Helper to add devconfig_t structs for disks and slices - * to the disk set. - * - * Updates the list of _modified_disks by examining the input - * list of composed devices. - * - * Iterates _modified_disks and creates a devconfig_t component - * for each disk in the list, the list of disks is then attached - * to the input disk set. - * - * Modified slices for disks in the disk set are added as well. - */ -static int -add_modified_disks_to_diskset( - dlist_t *results, - devconfig_t *diskset) -{ - int error = 0; - - dlist_t *iter; - dlist_t *list = NULL; - char *dsname = get_request_diskset(); - - /* add modified disks to disk set's component list */ - list = devconfig_get_components(diskset); - - oprintf(OUTPUT_TERSE, - gettext(" Collecting modified disks...\n")); - - /* collect removed slices for modified disks */ - error = get_removed_slices_for_disks(_modified_disks); - - /* collect modified slices for modified disks */ - error = get_modified_slices_for_disks(_modified_disks); - - for (iter = _modified_disks; - (iter != NULL) && (error == 0); - iter = iter->next) { - - moddisk_t *moddisk = (moddisk_t *)iter->obj; - dm_descriptor_t disk = moddisk->disk; - devconfig_t *newdisk = NULL; - boolean_t in_set = B_FALSE; - - oprintf(OUTPUT_VERBOSE, " %s\n", moddisk->accessname); - - error = is_disk_in_diskset(disk, dsname, &in_set); - if ((error == 0) && (in_set != B_TRUE)) { - /* New disk, add it to the disk set */ - ((error = new_devconfig(&newdisk, TYPE_DRIVE)) != 0) || - (error = devconfig_set_name(newdisk, moddisk->accessname)); - if (error == 0) { - dlist_t *item = dlist_new_item(newdisk); - if (item == NULL) { - error = ENOMEM; - } else { - list = dlist_append(item, list, AT_TAIL); - oprintf(OUTPUT_DEBUG, - gettext(" must add %s to disk set \"%s\"\n"), - moddisk->accessname, dsname); - } - } else { - free_devconfig(newdisk); - } - } - - if ((error == 0) && (moddisk->slices != NULL)) { - /* move moddisk's slice list to disk set comp list */ - list = dlist_append(moddisk->slices, list, AT_TAIL); - moddisk->slices = NULL; - } - } - - if (error == 0) { - devconfig_set_components(diskset, list); - } else { - dlist_free_items(list, NULL); - } - - return (error); -} - -/* - * FUNCTIONS: void release_modified_disks() - * - * INPUT: none - - * OUTPUT: none - - * - * PURPOSE: cleanup the module global list of disks that need - * to be added to the disk set to satisfy the request. - */ -static int -release_modified_disks() -{ - dlist_t *iter = _modified_disks; - - for (; iter != NULL; iter = iter->next) { - moddisk_t *moddisk = (moddisk_t *)iter->obj; - if (moddisk->slices != NULL) { - dlist_free_items(moddisk->slices, free_devconfig); - moddisk->slices = NULL; - } - free(moddisk); - iter->obj = NULL; - } - - dlist_free_items(_modified_disks, NULL); - _modified_disks = NULL; - - return (0); -} - -/* - * FUNCTION: get_removed_slices_for_disks(dlist_t *mod_disks) - * - * INPUT: mod_disks - a list of moddisk_t structs - * - * OUTPUT: mod_disks - the list of moddisk_t structs updated with - * the slices to be removed for each disk - * - * RETURNS: int - 0 - success - * !0 - failure - * - * PURPOSE: Helper to create a list of devconfig_t structs - * for slices on the input disks which need to be - * removed from the system. - * - * Iterates the list of slices to be removed and - * creates a devconfig_t component for each slice - * in the list that is on any of the input modified - * disks. - * - * Slice names are constructed using the modified disk's - * access name to ensure that the correct alias is - * used to get to the slice. - */ -static int -get_removed_slices_for_disks( - dlist_t *mod_disks) -{ - int error = 0; - dlist_t *iter = NULL; - - /* collect slices to be removed for the modified disks */ - for (iter = get_slices_to_remove(); - (iter != NULL) && (error == 0); - iter = iter->next) { - - rmvdslice_t *rmvd = (rmvdslice_t *)iter->obj; - dm_descriptor_t disk = (dm_descriptor_t)0; - moddisk_t *moddisk = NULL; - char *sname = NULL; - devconfig_t *newslice = NULL; - dlist_t *item = NULL; - - (void) get_disk_for_named_slice(rmvd->slice_name, &disk); - - if ((item = dlist_find(mod_disks, (void *)(uintptr_t)disk, - compare_disk_to_moddisk_disk)) == NULL) { - /* slice on disk that we don't care about */ - continue; - } - - moddisk = (moddisk_t *)item->obj; - - /* create output slice struct for the removed slice */ - ((error = make_slicename_for_diskname_and_index( - moddisk->accessname, rmvd->slice_index, &sname)) != 0) || - (error = new_devconfig(&newslice, TYPE_SLICE)) || - (error = devconfig_set_name(newslice, sname)) || - (error = devconfig_set_size_in_blocks(newslice, 0)); - - /* add to the moddisk's list of slices */ - if (error == 0) { - if ((item = dlist_new_item(newslice)) == NULL) { - free_devconfig(newslice); - error = ENOMEM; - } else { - moddisk->slices = - dlist_append(item, moddisk->slices, AT_TAIL); - } - } else { - free_devconfig(newslice); - } - } - - return (error); -} - -/* - * FUNCTION: get_modified_slices_for_disks(dlist_t *mod_disks) - * - * INPUT: mod_disks - a list of moddisk_t structs - * - * OUTPUT: mod_disks - the list of moddisk_t structs updated with - * the modified slices for each disk - * - * RETURNS: int - 0 - success - * !0 - failure - * - * PURPOSE: Helper to create a list of devconfig_t structs - * for slices on the input disks which have been - * modified for use by layout. - * - * Iterates the list of modified slices and creates a - * devconfig_t component for each slice in the list - * that is on any of the input modified disks. - * - * Slice names are constructed using the modified disk's - * access name to ensure that the correct alias is - * used to get to the slice. - */ -int -get_modified_slices_for_disks( - dlist_t *mod_disks) -{ - int error = 0; - dlist_t *iter = NULL; - - for (iter = get_modified_slices(); - (iter != NULL) && (error == 0); - iter = iter->next) { - - modslice_t *mods = (modslice_t *)iter->obj; - devconfig_t *slice = mods->slice_devcfg; - devconfig_t *newslice = NULL; - dm_descriptor_t disk; - moddisk_t *moddisk; - dlist_t *item; - char *sname = NULL; - uint64_t stblk = 0; - uint64_t nblks = 0; - uint16_t index; - - /* only add modified slices that were sources */ - if ((mods->times_modified == 0) || - (mods->src_slice_desc != (dm_descriptor_t)0)) { - continue; - } - - (void) devconfig_get_name(slice, &sname); - (void) get_disk_for_named_slice(sname, &disk); - - if ((item = dlist_find(mod_disks, (void *)(uintptr_t)disk, - compare_disk_to_moddisk_disk)) == NULL) { - /* slice on disk that we don't care about */ - continue; - } - - moddisk = (moddisk_t *)item->obj; - - /* create output slice struct for the modified slice */ - ((error = devconfig_get_slice_start_block(slice, - &stblk)) != 0) || - (error = devconfig_get_size_in_blocks(slice, &nblks)) || - (error = devconfig_get_slice_index(slice, &index)) || - (error = make_slicename_for_diskname_and_index( - moddisk->accessname, index, &sname)) || - (error = new_devconfig(&newslice, TYPE_SLICE)) || - (error = devconfig_set_name(newslice, sname)) || - (error = devconfig_set_slice_start_block(newslice, stblk)) || - (error = devconfig_set_size_in_blocks(newslice, nblks)); - - /* add to the moddisk's list of slices */ - if (error == 0) { - if ((item = dlist_new_item(newslice)) == NULL) { - free_devconfig(newslice); - error = ENOMEM; - } else { - moddisk->slices = - dlist_append(item, moddisk->slices, AT_TAIL); - } - } else { - free_devconfig(newslice); - } - } - - return (error); -} - -/* - * FUNCTION: compare_disk_to_moddisk_disk(void *disk, void *moddisk) - * - * INPUT: disk - opaque pointer to a dm_descriptor_t - * moddisk - opaque moddisk_t pointer - * - * RETURNS: int - 0 - if disk == moddisk->disk - * !0 - otherwise - * - * PURPOSE: dlist_t helper which compares the input disk dm_descriptor_t - * handle to the disk dm_descriptor_t handle in the input - * moddisk_t struct. - * - * Comparison is done via compare_descriptor_names. - */ -static int -compare_disk_to_moddisk_disk( - void *disk, - void *moddisk) -{ - assert(disk != (dm_descriptor_t)0); - assert(moddisk != NULL); - - return (compare_descriptor_names((void *)disk, - (void *)(uintptr_t)((moddisk_t *)moddisk)->disk)); -} - -/* - * FUNCTIONS: void set_hsp_request() - * - * INPUT: none - - * OUTPUT: none - - * - * PURPOSE: set the module global HSP request struct. - */ -static void -set_hsp_request( - devconfig_t *req) -{ - _hsp_request = req; -} - -/* - * FUNCTIONS: void unset_hsp_request() - * - * INPUT: none - - * OUTPUT: none - - * - * PURPOSE: unset the module global HSP request struct. - */ -static void -unset_hsp_request() -{ - _hsp_request = NULL; -} - -/* - * FUNCTION: process_hsp_request(devconfig_t *req, dlist_t **results) - * INPUT: req - pointer to the toplevel disk set devconfig_t request - * results - pointer to a list of composed results - * - * RETURNS: int - 0 - success - * !0 - failure - * - * PURPOSE: Helper which determines HSP processing for the - * composed volumes which need HSP spares. - */ -static int -process_hsp_request( - devconfig_t *req, - dlist_t **results) -{ - int error = 0; - - if (_hsp_request != NULL) { - oprintf(OUTPUT_TERSE, - gettext("\nProcessing HSP...\n")); - } - - if (_hsp_devices == NULL) { - /* no devices -> no HSP */ - oprintf(OUTPUT_VERBOSE, - gettext(" No devices require hot spares...\n")); - } else { - - oprintf(OUTPUT_TERSE, "\n"); - - ((error = layout_hsp(req, _hsp_request, _hsp_devices, - results)) != 0) || - (error = collect_modified_disks(_hsp_request, *results)) || - (error = convert_device_names(_hsp_request, *results)); - } - - return (error); -} - -/* - * FUNCTION: add_to_hsp_list(dlist_t* list) - * INPUT: devs - pointer to a list of composed volumes - * OUTPUT: none - - * SIDEEFFECT: updates the module global list _hsp_devices - * - * RETURNS: int - 0 - success - * !0 - failure - * - * PURPOSE: Helper to update the list of devices which need HSP spares. - * - * Iterates the input list of devices and adds them them to the - * module provate list of devices needing spares. - */ -int -add_to_hsp_list( - dlist_t *list) -{ - dlist_t *iter = NULL; - int error = 0; - - for (iter = list; iter != NULL; iter = iter->next) { - dlist_t *item = NULL; - - if ((item = dlist_new_item(iter->obj)) == NULL) { - error = ENOMEM; - break; - } - _hsp_devices = dlist_append(item, _hsp_devices, AT_HEAD); - } - - return (error); -} - -/* - * FUNCTION: string_case_compare( - * char *str1, char *str2) - * - * INPUT: str1 - char * - * str2 - char * - * - * RETURNS: int - <0 - if str1 < str2 - * 0 - if str1 == str2 - * >0 - if str1 > str2 - * - * PURPOSE: More robust case independent string comparison function. - * - * Assumes str1 and str2 are both char * - * - * Compares the lengths of each and if equivalent compares - * the strings using strcasecmp. - */ -int -string_case_compare( - char *str1, - char *str2) -{ - int result = 0; - - assert(str1 != NULL); - assert(str2 != NULL); - - if ((result = (strlen(str1) - strlen(str2))) == 0) { - result = strcasecmp(str1, str2); - } - - return (result); -} diff --git a/usr/src/cmd/lvm/metassist/layout/layout.h b/usr/src/cmd/lvm/metassist/layout/layout.h deleted file mode 100644 index 3cb99f80aec1..000000000000 --- a/usr/src/cmd/lvm/metassist/layout/layout.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _VOLUME_LAYOUT_H -#define _VOLUME_LAYOUT_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#ifdef __cplusplus -extern "C" { -#endif - -#include "volume_request.h" -#include "volume_defaults.h" - -/* - * FUNCTION: get_layout(devconfig_t *request, defaults_t *defaults) - * - * INPUT: request - a devconfig_t pointer to the toplevel request - * defaults - a results_t pointer to the defaults - * - * RETURNS: int - 0 - on success - * !0 - otherwise - * - * PURPOSE: Public entry point to layout module. - */ -extern int get_layout(request_t *request, defaults_t *defaults); - -/* - * FUNCTION: layout_clean_up() - * INPUT: - * OUTPUT: - * SIDEEFFECTS: releases all memory allocated during layout processing - * - * PURPOSE: function which handles the details of cleaning up memory - * allocated while processing a request. - * - * This function must be called explicitly if a call to - * get_layout() was terminated abnormally, for example, - * if the user terminates the calling process with a SIGINT. - */ -extern void layout_clean_up(); - -#ifdef __cplusplus -} -#endif - -#endif /* _VOLUME_LAYOUT_H */ diff --git a/usr/src/cmd/lvm/metassist/layout/layout_concat.c b/usr/src/cmd/lvm/metassist/layout/layout_concat.c deleted file mode 100644 index 3fe529763d34..000000000000 --- a/usr/src/cmd/lvm/metassist/layout/layout_concat.c +++ /dev/null @@ -1,673 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include -#include - -#include "libdiskmgt.h" - -#include "volume_error.h" -#include "volume_defaults.h" -#include "volume_devconfig.h" -#include "volume_dlist.h" -#include "volume_output.h" -#include "volume_request.h" - -#include "layout_concat.h" -#include "layout_device_cache.h" -#include "layout_device_util.h" -#include "layout_discovery.h" -#include "layout_dlist_util.h" -#include "layout_messages.h" -#include "layout_request.h" -#include "layout_slice.h" -#include "layout_svm_util.h" - -#define _LAYOUT_CONCAT_C - -static int -compose_concat_within_hba( - devconfig_t *request, - dlist_t *hbas, - uint64_t nbytes, - devconfig_t **concat); - -static int -assemble_concat( - devconfig_t *request, - dlist_t *comps, - devconfig_t **concat); - -/* - * FUNCTION: layout_concat(devconfig_t *request, uint64_t nbytes, - * dlist_t **results) - * - * INPUT: request - pointer to a devconfig_t of the current request - * nbytes - the desired capacity of the concat - * - * OUPUT: results - pointer to a list of composed volumes - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Main layout driver for composing concat volumes. - * - * Attempts to construct a concat of size nbytes. - * - * Several different layout strategies are tried in order - * of preference until one succeeds or there are none left. - * - * 1 - concat within an HBA - * . requires sufficient space available on the HBA - * - * 2 - concat across all available similar HBAs - * - * 3 - concat across all available HBAs - * - * get available HBAs - * - * group HBAs by characteristics - * for (each HBA grouping) and (concat not composed) { - * select next HBA group - * for (strategy[1,2]) and (concat not composed) { - * compose concat using HBAs in group - * } - * } - * - * if (concat not composed) { - * for (strategy[3]) and (concat not composed) { - * compose concat using all HBAs - * } - * } - * - * if (concat composed) { - * append composed concat to results - * } - */ -int -layout_concat( - devconfig_t *request, - uint64_t nbytes, - dlist_t **results) -{ - /* - * these enums define the # of strategies and the preference order - * in which they are tried - */ - typedef enum { - CONCAT_WITHIN_SIMILAR_HBA = 0, - CONCAT_ACROSS_SIMILAR_HBAS, - N_SIMILAR_HBA_STRATEGIES - } similar_hba_strategy_order_t; - - typedef enum { - CONCAT_ACROSS_ANY_HBAS = 0, - N_ANY_HBA_STRATEGIES - } any_hba_strategy_order_t; - - dlist_t *usable_hbas = NULL; - dlist_t *similar_hba_groups = NULL; - dlist_t *iter = NULL; - devconfig_t *concat = NULL; - - int error = 0; - - (error = get_usable_hbas(&usable_hbas)); - if (error != 0) { - volume_set_error(gettext("There are no usable HBAs.")); - return (error); - } - - print_layout_volume_msg(devconfig_type_to_str(TYPE_CONCAT), nbytes); - - if (dlist_length(usable_hbas) == 0) { - print_no_hbas_msg(); - return (-1); - } - - error = group_similar_hbas(usable_hbas, &similar_hba_groups); - if (error != 0) { - return (error); - } - - for (iter = similar_hba_groups; - (error == 0) && (concat == NULL) && (iter != NULL); - iter = iter->next) { - - dlist_t *hbas = (dlist_t *)iter->obj; - - similar_hba_strategy_order_t order; - - for (order = CONCAT_WITHIN_SIMILAR_HBA; - (order < N_SIMILAR_HBA_STRATEGIES) && - (concat == NULL) && (error == 0); - order++) { - - dlist_t *selhbas = NULL; - dlist_t *disks = NULL; - - switch (order) { - - case CONCAT_WITHIN_SIMILAR_HBA: - - error = select_hbas_with_n_disks( - request, hbas, 1, &selhbas, &disks); - - if (error == 0) { - -/* BEGIN CSTYLED */ -oprintf(OUTPUT_TERSE, - gettext(" -->Strategy 1: use disks from a single HBA - concat within HBA\n")); -/* END CSTYLED */ - - error = compose_concat_within_hba( - request, selhbas, nbytes, &concat); - } - - break; - - case CONCAT_ACROSS_SIMILAR_HBAS: - - error = select_hbas_with_n_disks( - request, hbas, 1, &selhbas, &disks); - - if (error == 0) { - -/* BEGIN CSTYLED */ -oprintf(OUTPUT_TERSE, - gettext(" -->Strategy 2: use disks from all similar HBAs - concat across HBAs\n")); -/* END CSTYLED */ - - error = populate_concat( - request, nbytes, disks, - NULL, &concat); - } - - break; - - default: - break; - } - - dlist_free_items(disks, NULL); - dlist_free_items(selhbas, NULL); - } - } - - for (iter = similar_hba_groups; iter != NULL; iter = iter->next) { - dlist_free_items((dlist_t *)iter->obj, NULL); - } - dlist_free_items(similar_hba_groups, NULL); - - /* try all HBAs */ - if (concat == NULL && error == 0) { - - any_hba_strategy_order_t order; - - for (order = CONCAT_ACROSS_ANY_HBAS; - (order < N_ANY_HBA_STRATEGIES) && - (concat == NULL) && (error == 0); - order++) { - - dlist_t *selhbas = NULL; - dlist_t *disks = NULL; - - switch (order) { - - case CONCAT_ACROSS_ANY_HBAS: - - error = select_hbas_with_n_disks( - request, usable_hbas, 1, &selhbas, &disks); - - if (error == 0) { - -/* BEGIN CSTYLED */ -oprintf(OUTPUT_VERBOSE, - gettext(" -->Strategy 3: use disks from all available HBAs - concat across HBAs\n")); -/* END CSTYLED */ - - error = populate_concat( - request, nbytes, disks, - NULL, &concat); - } - - break; - - default: - break; - } - - dlist_free_items(disks, NULL); - dlist_free_items(selhbas, NULL); - } - } - - if (concat != NULL) { - - dlist_t *item = dlist_new_item(concat); - if (item == NULL) { - error = ENOMEM; - } else { - - *results = dlist_append(item, *results, AT_TAIL); - - print_layout_success_msg(); - } - - } else if (error != 0) { - - print_debug_failure_msg( - devconfig_type_to_str(TYPE_CONCAT), - get_error_string(error)); - - } else { - - print_insufficient_resources_msg( - devconfig_type_to_str(TYPE_CONCAT)); - error = -1; - } - - return (error); -} - -static int -compose_concat_within_hba( - devconfig_t *request, - dlist_t *hbas, - uint64_t nbytes, - devconfig_t **concat) -{ - int error = 0; - - dlist_t *iter = NULL; - - for (iter = hbas; - (iter != NULL) && (*concat == NULL) && (error == 0); - iter = iter->next) { - - dm_descriptor_t hba = (uintptr_t)iter->obj; - dlist_t *disks = NULL; - uint64_t space = 0; - char *name; - - /* check for sufficient space on the HBA */ - ((error = get_display_name(hba, &name)) != 0) || - (error = hba_get_avail_disks_and_space(request, - hba, &disks, &space)); - - if (error == 0) { - if (space >= nbytes) { - error = populate_concat(request, nbytes, disks, - NULL, concat); - } else { - print_hba_insufficient_space_msg(name, space); - } - } - - dlist_free_items(disks, NULL); - } - - return (error); -} - -/* - * FUNCTION: populate_concat(devconfig_t *request, uint64_t nbytes, - * dlist_t *disks, dlist_t *othervols, - * devconfig_t **concat) - * - * INPUT: request - pointer to a request devconfig_t - * nbytes - desired concat size - * disks - pointer to a list of availalb disks - * othervols - pointer to a list of other volumes whose - * composition may affect this concat - * (e.g., submirrors of the same mirror) - * - * OUTPUT: concat - pointer to a devconfig_t to hold resulting concat - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Helper to populate a concat with the specified aggregate - * capacity using slices on disks in the input list. - * - * If the othervols list is not empty, the slice components - * chosen for the concat must not on the same disks as any - * of the other volumes. - * - * If sufficient slice components can be found, the concat - * is assembled and returned. - */ -int -populate_concat( - devconfig_t *request, - uint64_t nbytes, - dlist_t *disks, - dlist_t *othervols, - devconfig_t **concat) -{ - dlist_t *other_hbas = NULL; - dlist_t *other_disks = NULL; - - dlist_t *slices = NULL; - dlist_t *comps = NULL; - - uint16_t npaths = 0; - uint64_t capacity = 0; - int error = 0; - - *concat = NULL; - - ((error = disks_get_avail_slices(request, disks, &slices)) != 0) || - (error = get_volume_npaths(request, &npaths)); - if (error != 0) { - dlist_free_items(slices, NULL); - return (error); - } - - print_populate_volume_msg(devconfig_type_to_str(TYPE_CONCAT), nbytes); - - if (slices == NULL) { - print_populate_no_slices_msg(); - return (0); - } - - /* determine HBAs and disks used by othervols */ - error = get_hbas_and_disks_used_by_volumes(othervols, - &other_hbas, &other_disks); - if (error != 0) { - dlist_free_items(other_hbas, NULL); - dlist_free_items(other_disks, NULL); - return (error); - } - - print_populate_choose_slices_msg(); - - while (capacity < nbytes) { - - devconfig_t *comp = NULL; - dlist_t *item = NULL; - dlist_t *rmvd = NULL; - char *cname = NULL; - uint64_t csize = 0; - - /* BEGIN CSTYLED */ - /* - * 1st B_TRUE: require a different disk than those used by - * comps and othervols - * 1st B_FALSE: slice with size less that requested is acceptable - * 2nd B_FALSE: do not add an extra cylinder when resizing slice, - * this is only necessary for Stripe components whose sizes - * get rounded down to an interlace multiple and then down - * to a cylinder boundary. - * - */ - /* END CSTYLED */ - error = choose_slice((nbytes-capacity), npaths, slices, comps, - other_hbas, other_disks, B_TRUE, B_FALSE, B_FALSE, &comp); - - if ((error == 0) && (comp != NULL)) { - - item = dlist_new_item(comp); - if (item == NULL) { - error = ENOMEM; - } else { - - /* add selected component to comp list */ - comps = dlist_append(item, comps, AT_HEAD); - - /* remove it from the available list */ - slices = dlist_remove_equivalent_item(slices, (void *) comp, - compare_devconfig_and_descriptor_names, &rmvd); - - if (rmvd != NULL) { - free(rmvd); - } - - /* add the component slice to the used list */ - if ((error = devconfig_get_name(comp, &cname)) == 0) { - error = add_used_slice_by_name(cname); - } - - /* increment concat's capacity */ - if ((error == 0) && - (error = devconfig_get_size(comp, &csize)) == 0) { - capacity += csize; - } - } - - } else { - /* no possible slice */ - break; - } - } - - dlist_free_items(slices, NULL); - dlist_free_items(other_hbas, NULL); - dlist_free_items(other_disks, NULL); - - if (capacity >= nbytes) { - - error = assemble_concat(request, comps, concat); - - if (error == 0) { - print_populate_success_msg(); - } else { - /* undo any slicing done for the concat */ - dlist_free_items(comps, free_devconfig_object); - } - - } else if (error == 0) { - - if (capacity > 0) { - dlist_free_items(comps, free_devconfig_object); - print_insufficient_capacity_msg(capacity); - } else { - print_populate_no_slices_msg(); - } - - } - - return (error); -} - -/* - * FUNCTION: populate_explicit_concat(devconfig_t *request, - * dlist_t **results) - * - * INPUT: request - pointer to a request devconfig_t - * - * OUTPUT: results - pointer to a list of volume devconfig_t results - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Processes the input concat request that specifies explicit - * slice components. - * - * The components have already been validated and reserved, - * all that is required is to create devconfig_t structs - * for each requested slice. - * - * The net size of the concat is determined by the slice - * components. - * - * The concat devconfig_t is assembled and appended to the - * results list. - * - * This function is also called from - * layout_mirror.populate_explicit_mirror() - */ -int -populate_explicit_concat( - devconfig_t *request, - dlist_t **results) -{ - int error = 0; - - dlist_t *comps = NULL; - dlist_t *iter = NULL; - dlist_t *item = NULL; - - devconfig_t *concat = NULL; - - print_layout_explicit_msg(devconfig_type_to_str(TYPE_CONCAT)); - - /* assemble components */ - iter = devconfig_get_components(request); - for (; (iter != NULL) && (error == 0); iter = iter->next) { - - devconfig_t *rqst = (devconfig_t *)iter->obj; - dm_descriptor_t rqst_slice = NULL; - char *rqst_name = NULL; - devconfig_t *comp = NULL; - - /* slice components have been validated */ - /* turn each into a devconfig_t */ - ((error = devconfig_get_name(rqst, &rqst_name)) != 0) || - (error = slice_get_by_name(rqst_name, &rqst_slice)) || - (error = create_devconfig_for_slice(rqst_slice, &comp)); - - if (error == 0) { - - print_layout_explicit_added_msg(rqst_name); - - item = dlist_new_item((void *)comp); - if (item == NULL) { - error = ENOMEM; - } else { - comps = dlist_append(item, comps, AT_TAIL); - } - } - } - - if (error == 0) { - error = assemble_concat(request, comps, &concat); - } - - if (error == 0) { - if ((item = dlist_new_item(concat)) == NULL) { - error = ENOMEM; - } else { - *results = dlist_append(item, *results, AT_TAIL); - print_populate_success_msg(); - } - } else { - dlist_free_items(comps, free_devconfig); - } - - return (error); -} - -/* - * FUNCTION: assemble_concat(devconfig_t *request, dlist_t *comps, - * devconfig_t **concat) - * - * INPUT: request - pointer to a devconfig_t of the current request - * comps - pointer to a list of slice components - * - * OUPUT: concat - pointer to a devconfig_t to hold final concat - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Helper which creates and populates a concat devconfig_t - * struct using information from the input request and the - * list of slice components. - * - * Determines the name of the concat either from the request - * or from the default naming scheme. - * - * Attaches the input list of components to the devconfig. - */ -static int -assemble_concat( - devconfig_t *request, - dlist_t *comps, - devconfig_t **concat) -{ - char *name = NULL; - int error = 0; - - if ((error = new_devconfig(concat, TYPE_CONCAT)) == 0) { - /* set concat name, use requested name if specified */ - if ((error = devconfig_get_name(request, &name)) != 0) { - if (error != ERR_ATTR_UNSET) { - volume_set_error(gettext("error getting requested name\n")); - } else { - error = 0; - } - } - - if (error == 0) { - if (name == NULL) { - if ((error = get_next_volume_name(&name, - TYPE_CONCAT)) == 0) { - error = devconfig_set_name(*concat, name); - free(name); - } - } else { - error = devconfig_set_name(*concat, name); - } - } - } - - if (error == 0) { - - /* compute and save true size of concat */ - if (error == 0) { - uint64_t nblks = 0; - dlist_t *iter; - - for (iter = comps; - (error == 0) && (iter != NULL); - iter = iter->next) { - - devconfig_t *comp = (devconfig_t *)iter->obj; - uint64_t comp_nblks = 0; - - if ((error = devconfig_get_size_in_blocks(comp, - &comp_nblks)) == 0) { - nblks += comp_nblks; - } - } - - if (error == 0) { - error = devconfig_set_size_in_blocks(*concat, nblks); - } - } - } - - if (error == 0) { - devconfig_set_components(*concat, comps); - } else { - free_devconfig(*concat); - *concat = NULL; - } - - return (error); -} diff --git a/usr/src/cmd/lvm/metassist/layout/layout_concat.h b/usr/src/cmd/lvm/metassist/layout/layout_concat.h deleted file mode 100644 index fb2d072d8006..000000000000 --- a/usr/src/cmd/lvm/metassist/layout/layout_concat.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _LAYOUT_CONCAT_H -#define _LAYOUT_CONCAT_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#ifdef __cplusplus -extern "C" { -#endif - -#include "volume_devconfig.h" -#include "volume_dlist.h" - -extern int layout_concat( - devconfig_t *request, - uint64_t nbytes, - dlist_t **results); - -extern int populate_concat( - devconfig_t *request, - uint64_t nbytes, - dlist_t *disks, - dlist_t *othervols, - devconfig_t **concat); - -extern int populate_explicit_concat( - devconfig_t *request, - dlist_t **results); - -#ifdef __cplusplus -} -#endif - -#endif /* _LAYOUT_CONCAT_H */ diff --git a/usr/src/cmd/lvm/metassist/layout/layout_device_cache.c b/usr/src/cmd/lvm/metassist/layout/layout_device_cache.c deleted file mode 100644 index 80a2865214ae..000000000000 --- a/usr/src/cmd/lvm/metassist/layout/layout_device_cache.c +++ /dev/null @@ -1,893 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include -#include - -#include -#include -#include -#include -#include - -#include "volume_dlist.h" -#include "volume_error.h" -#include "volume_output.h" - -#include "layout_device_cache.h" -#include "layout_dlist_util.h" -#include "layout_request.h" - -/* - * Implementation note: - * The current caches are implemented as linked lists of data - * structures described below. Cached object lookup uses hsearch() - * where possible to minimize the inefficiency of linear search. - */ - -/* - * The name and attribute maps use hesarch() for faster lookup - */ -static const uint32_t MAX_CACHED_OBJECTS = 50000; - -/* - * The attribute cache is maintained as a list of these - * structs which map a device name to attributes. The - * device name is the unique device name returned from - * the device library, typically a devfs path. It should - * not be confused with the "display" name of the device - * which is typically a CTD or DID name. - */ -typedef struct { - char *name; - nvlist_t *attrs; -} attr_cache_t; - -static dlist_t *_attr_cache = NULL; - -/* - * The name cache is maintained via a list of these structs - * which map a descriptor to its name. - * The descriptor is saved as a string for hsearch() - */ -typedef struct { - char *desc; - char *name; -} name_cache_t; -static dlist_t *_name_cache = NULL; - -/* - * The desc cache is maintained as a list of these - * structs which map a device display name (CTD or DID) - * or alias to a descriptor. - */ -typedef struct { - char *name; - dm_descriptor_t desc; -} desc_cache_t; - -static dlist_t *_desc_cache = NULL; - -/* - * Since each of the lookup caches shares the same hsearch() - * hash table, the names used as lookup keys for the desc_cache_t - * and attr_cache_t may cause collisions. - * - * The desc_cache_t map alters the device name by prepending - * this string to avoid collisions. - */ -static const char *DESC_CACHE_KEY_PREFIX = "desc_cache"; - -/* - * The set of descriptors to be returned to libdiskmgt is - * maintained via a list of dm_descriptor_t handles. - * descriptors are added by new_descriptor() and - * cache_descriptor_to_free(). - */ -typedef struct { - dm_descriptor_t desc; - boolean_t virtual; -} desc_free_t; -static dlist_t *_desc_to_free = NULL; - -static char *find_cached_name(dm_descriptor_t desc); -static nvlist_t *find_cached_attrs(char *name); - -static int add_descriptor_to_free(dm_descriptor_t desc); - -static void release_name_cache(); -static void release_desc_to_free_cache(); -static void release_attribute_cache(); -static void release_descriptor_cache(); - -static uint32_t interal_name_count = 0; - -/* - * FUNCTION: create_device_caches() - * - * PURPOSE: Helper which initializes the module's private data - * structures. - */ -int -create_device_caches() -{ - if (hcreate(MAX_CACHED_OBJECTS) == 0) { - return (ENOMEM); - } - - return (0); -} - -/* - * FUNCTION: release_device_caches() - * - * PURPOSE: Helper which cleans up memory allocated to the module's - * private data structures. - */ -int -release_device_caches() -{ - release_name_cache(); - release_desc_to_free_cache(); - release_attribute_cache(); - release_descriptor_cache(); - - return (0); -} - -/* - * FUNCTION: free_desc_cache_object(void *obj) - * - * INPUT: obj - opaque pointer - * - * PURPOSE: Frees memory associated with an entry in the - * desc cache. - * - * Assumes that the input object is a pointer - * to a desc_cache_t struct. - */ -static void -free_desc_cache_object( - void *obj) -{ - if (obj == NULL) { - return; - } - - free(((desc_cache_t *)obj)->name); - free(obj); -} -/* - * FUNCTION: release_descriptor_cache() - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Frees all entries in the name cache. - */ -static void -release_descriptor_cache() -{ - oprintf(OUTPUT_DEBUG, - gettext(" destroying descriptor cache (%d items)\n"), - dlist_length(_desc_cache)); - - dlist_free_items(_desc_cache, free_desc_cache_object); - _desc_cache = NULL; -} - -/* - * FUNCTION: add_cached_descriptor(char *name, dm_descriptor_t desc) - * - * INPUT: name - a device name - * desc - a dm_descriptor_t handle - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Adds an entry to the descriptor cache using the input - * descriptor and name. - * - * Note that all of the lookup caches shares the same hsearch() - * hash table and that the names used as lookup keys for the - * desc_cache_t and attr_cache_t cause collisions. - * - * The desc_cache_t map alters the device name to avoid collisions. - */ -int -add_cached_descriptor( - char *name, - dm_descriptor_t desc) -{ - desc_cache_t *dcp; - char buf[MAXNAMELEN+1]; - dlist_t *item; - ENTRY entry; - - if ((dcp = (desc_cache_t *) - calloc(1, sizeof (desc_cache_t))) == NULL) { - return (ENOMEM); - } - - dcp->desc = desc; - - (void) snprintf(buf, MAXNAMELEN, "%s-%s", DESC_CACHE_KEY_PREFIX, name); - dcp->name = strdup(buf); - if (dcp->name == NULL) { - free(dcp); - return (ENOMEM); - } - - /* - * insert into the hashtable... ignore the return from hsearch(), - * there is no existing entry corresponding to desc since the - * map was already searched just before this function is called, - * see get_name() below - */ - entry.key = dcp->name; - entry.data = (void *)dcp; - (void) hsearch(entry, ENTER); - - /* insert into the list cache... */ - if ((item = dlist_new_item((void *)dcp)) == NULL) { - free(dcp); - return (ENOMEM); - } - - _desc_cache = dlist_append(item, _desc_cache, AT_HEAD); - - return (0); -} - -/* - * FUNCTION: dm_descriptor_t find_cached_descriptor(char *name) - * - * INPUT: char * - pointer to a name or alias. - * - * RETURNS: dm_descriptor_t - dm_descriptor_t handle cached under the - * input name if a match is found. A null descriptor - * is returned if no match is found. - * - * PURPOSE: Searches for the desc that has been cached for - * the input device name. - * - * Note that all of the lookup caches shares the same hsearch() - * hash table and that the names used as lookup keys for the - * desc_cache_t and attr_cache_t cause collisions. - * - * The desc_cache_t map alters the device name to avoid collisions. - */ -dm_descriptor_t -find_cached_descriptor( - char *name) -{ - ENTRY item; - ENTRY *cached_item = NULL; - char buf[MAXNAMELEN+1]; - dm_descriptor_t desc = (dm_descriptor_t)0; - - (void) snprintf(buf, MAXNAMELEN, "%s-%s", DESC_CACHE_KEY_PREFIX, name); - item.key = buf; - - /* get descriptor associated with this name */ - if ((cached_item = hsearch(item, FIND)) != NULL) { - /* LINTED */ - desc = ((desc_cache_t *)cached_item->data)->desc; - } - - return (desc); -} - -/* - * FUNCTION: free_name_cache_object(void *obj) - * - * INPUT: obj - opaque pointer - * - * PURPOSE: Frees memory associated with an entry in the - * name cache. - * - * Assumes that the input object is a pointer - * to a name_cache_t struct. - */ -static void -free_name_cache_object( - void *obj) -{ - if (obj == NULL) { - return; - } - - free(((name_cache_t *)obj)->desc); - free(((name_cache_t *)obj)->name); - free(obj); -} - -/* - * FUNCTION: release_name_cache() - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Frees all entries in the name cache. - */ -static void -release_name_cache() -{ - oprintf(OUTPUT_DEBUG, - gettext(" destroying name cache (%d items)\n"), - dlist_length(_name_cache)); - - dlist_free_items(_name_cache, free_name_cache_object); - _name_cache = NULL; -} - -/* - * FUNCTION: add_cached_name(dm_descriptor_t desc, char *name) - * - * INPUT: desc - a dm_descriptor_t handle - * name - a device name - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Adds an entry to the name cache using the input - * descriptor and name. - */ -int -add_cached_name( - dm_descriptor_t desc, - char *name) -{ - name_cache_t *ncp; - char buf[MAXNAMELEN+1]; - dlist_t *item; - ENTRY entry; - - if ((ncp = (name_cache_t *) - calloc(1, sizeof (name_cache_t))) == NULL) { - return (ENOMEM); - } - - (void) snprintf(buf, MAXNAMELEN, "%llu", desc); - ncp->desc = strdup(buf); - if (ncp->desc == NULL) { - free(ncp); - return (ENOMEM); - } - - ncp->name = strdup(name); - if (ncp->name == NULL) { - free(ncp->desc); - free(ncp); - return (ENOMEM); - } - - /* - * insert into the hashtable... ignore the return from hsearch(), - * there is no existing entry corresponding to desc since the - * map was already searched just before this function is called, - * see get_name() below - */ - entry.key = ncp->desc; - entry.data = (void *)ncp; - (void) hsearch(entry, ENTER); - - /* insert into the list cache... */ - if ((item = dlist_new_item((void *)ncp)) == NULL) { - free(ncp->desc); - free(ncp->name); - free(ncp); - return (ENOMEM); - } - - _name_cache = dlist_append(item, _name_cache, AT_HEAD); - - return (0); -} - -/* - * FUNCTION: char *find_cached_name(dm_descriptor_t desc) - * - * INPUT: desc - a dm_descriptor_t handle - * - * RETURNS: char * - pointer to the name cached for the descriptor. - * Null otherwise. - * - * PURPOSE: Searches for the name that has been cached for - * the input dm_descriptor_t. - * - * Search linked list. - */ -static char * -find_cached_name( - dm_descriptor_t desc) -{ - char buf[MAXNAMELEN+1]; - ENTRY item; - ENTRY *cached_item = NULL; - char *name = NULL; - - (void) snprintf(buf, MAXNAMELEN, "%llu", desc); - item.key = buf; - - /* get name associated with this descriptor */ - if ((cached_item = hsearch(item, FIND)) != NULL) { - /* LINTED */ - name = ((name_cache_t *)cached_item->data)->name; - } - - return (name); -} - -/* - * FUNCTION: get_name(dm_descriptor_t desc, - * char_t **name) - * - * INPUT: desc - a dm_descriptor_t handle - * - * OUTPUT: name - pointer to char * to hold the name - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Searches for the name that has been cached for the - * input dm_descriptor_t. - * - * Names are cached using the dm_descriptor. - * If no name has yet been cached, it is retrieved from - * libdiskmgt and added to the cache. - * - * Names are cached so that all name strings obtained from - * libdiskmgt will get properly released when layout completes. - */ -int -get_name( - dm_descriptor_t desc, - char **name) -{ - - int dm_free = 1; - int error = 0; - - if ((desc != (dm_descriptor_t)0) && - (*name = find_cached_name(desc)) == NULL) { - - /* not in descriptor->name cache/map, add it */ - - if (is_virtual_slice(desc) != B_TRUE) { - - dm_desc_type_t type; - - *name = dm_get_name(desc, &error); - if (error != 0) { - volume_set_error( - gettext("failed to get name for descriptor: %d\n"), - error); - return (-1); - } - - /* - * some devices can be unnamed... - * assign a unique internal name if necessary - */ - if (*name == NULL) { - char buf[MAXNAMELEN]; - - dm_free = 0; - (void) snprintf(buf, MAXNAMELEN-1, "temp-name-%lu", - interal_name_count++); - *name = strdup(buf); - if (*name == NULL) { - volume_set_error( - gettext("failed to get name for descriptor: %d\n"), - errno); - return (-1); - } - oprintf(OUTPUT_DEBUG, - gettext("unnamed descriptor %llu assigned %s\n"), - desc, *name); - } - - /* - * media can have the same name as the associated drive - * which hoses the attribute caching scheme, so unique-ify - */ - if ((type = dm_get_type(desc)) == DM_MEDIA) { - char buf[MAXNAMELEN]; - (void) snprintf(buf, MAXNAMELEN-1, "%s-%d", *name, type); - error = add_cached_name(desc, buf); - } else { - error = add_cached_name(desc, *name); - } - if (dm_free) - dm_free_name(*name); - else - free(*name); - - if (error == 0) { - /* return copied name */ - *name = find_cached_name(desc); - } else { - *name = NULL; - } - } - } - - return (error); -} - -/* - * FUNCTION: free_attr_cache_object(void *obj) - * - * INPUT: obj - opaque pointer - * - * PURPOSE: Frees memory associated with an entry in the - * attribute cache. - * - * Assumes that the input object is a pointer - * to a attr_cache_t struct. - */ -static void -free_attr_cache_object( - void *obj) -{ - if (obj == NULL) { - return; - } - - nvlist_free(((attr_cache_t *)obj)->attrs); - free(obj); -} - -/* - * FUNCTION: release_attribute_cache() - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Frees all entries in the attribute cache. - */ -void -release_attribute_cache() -{ - oprintf(OUTPUT_DEBUG, - gettext(" destroying attribute cache (%d items)\n"), - dlist_length(_attr_cache)); - - dlist_free_items(_attr_cache, free_attr_cache_object); - _attr_cache = NULL; - - /* cleanup attribute cache lookup hashtable */ - hdestroy(); -} - -/* - * FUNCTION: add_cached_attributes(char *name, nvlist_t *attrs) - * - * INPUT: name - a device name - * attrs - pointer to an nvlist_t attribute structure - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Adds an entry to the attribute cache using the input - * name and attributes. - * - * Uses a linked list to cache attributes. - * Keeps a parallel hash table for faster lookup. - */ -int -add_cached_attributes( - char *name, - nvlist_t *attrs) -{ - attr_cache_t *acp = NULL; - dlist_t *item = NULL; - ENTRY *exist = NULL; - ENTRY entry; - - /* insert into the hashtable... */ - entry.key = name; - entry.data = (void *)attrs; - - if ((exist = hsearch(entry, ENTER)) != NULL) { - /* replace the existing attrs entry */ - exist->data = (void *)attrs; - } - - if ((acp = (attr_cache_t *)calloc(1, sizeof (attr_cache_t))) == NULL) { - return (ENOMEM); - } - - acp->name = name; - acp->attrs = attrs; - - /* and cache of attr structs to be freed */ - if ((item = dlist_new_item((void *)acp)) == NULL) { - free(acp); - return (ENOMEM); - } - - _attr_cache = dlist_append(item, _attr_cache, AT_HEAD); - - return (0); -} - -/* - * FUNCTION: nvlist_t *find_cached_attrs(char *name) - * - * INPUT: name - a device name - * - * RETURNS: nvlist_t * - pointer to an nvlist_t attribute structure - * cached under 'name'. Null otherwise. - * - * PURPOSE: Searches for the nvlist attributes that have been - * cached for the input name. - */ -static nvlist_t * -find_cached_attrs( - char *name) -{ - ENTRY item; - ENTRY *cached_item = NULL; - nvlist_t *attrs = NULL; - - item.key = name; - - /* get attributes cached under this name */ - if ((cached_item = hsearch(item, FIND)) != NULL) { - /* LINTED */ - attrs = (nvlist_t *)cached_item->data; - } - - return (attrs); -} - -/* - * FUNCTION: get_cached_attributes(dm_descriptor_t desc, - * nvlist_t **attrs) - * - * INPUT: desc - a dm_descriptor_t handle - * - * OUTPUT: attrs - pointer to an nvlist_t attribute structure - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Searches for the nvlist attributes that have been - * cached for the input dm_descriptor_t. - * - * Attributes are cached using the name associated with - * the descriptor. If no attributes have yet been cached - * they are retrieved from libdiskmgt and added to the - * cache. - * - * Attributes are cached so that layout may store transient - * data relevant to the layout process. - */ -int -get_cached_attributes( - dm_descriptor_t desc, - nvlist_t **attrs) -{ - int error = 0; - char *name = NULL; - - if ((desc != (dm_descriptor_t)0) && - (error = get_name(desc, &name)) == 0) { - - if ((*attrs = find_cached_attrs(name)) == NULL) { - /* get attrs and cache them */ - *attrs = dm_get_attributes(desc, &error); - if (error == 0) { - error = add_cached_attributes(name, *attrs); - } - } - } - - return (error); -} - -/* - * FUNCTION: new_descriptor(dm_descriptor_t *desc) - * - * INPUT: desc - a pointer to a dm_descriptor_t to hold - * the result. - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Allocates a new dm_descriptor_t handle. - * - * This is necessary because the process may have to - * create "virtual" objects to represent devices that - * do not yet exist on the system and hence are unknown - * to libdiskmgt and diskmgtd. - * - * A unique handle is created for such objects and may - * be used by layout to access the virtual devices as - * if they were obtained from libdiskmgt. - */ -int -new_descriptor( - dm_descriptor_t *desc) -{ - desc_free_t *dfp; - dlist_t *item; - - *desc = NULL; - - if ((dfp = (desc_free_t *) - calloc(1, sizeof (desc_free_t))) == NULL) { - return (ENOMEM); - } - - dfp->desc = (uintptr_t)dfp; - dfp->virtual = B_TRUE; - - if ((item = dlist_new_item((void *)dfp)) == NULL) { - free(dfp); - return (ENOMEM); - } - - _desc_to_free = dlist_append(item, _desc_to_free, AT_HEAD); - - *desc = (uintptr_t)dfp; - - return (0); -} - -/* - * FUNCTION: add_descriptors_to_free(dm_descriptor_t *desc) - * - * INPUT: desc - an array of dm_descriptor_t handles from - * libdiskmgt - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Function which accepts an array of dm_descriptor_t handles - * that need to be returned to libdiskmgt. - * - * The array is iterated and each handle is passed to - * add_descriptor_to_free. - */ -int -add_descriptors_to_free( - dm_descriptor_t *desc_list) -{ - int i = 0; - - if (desc_list != NULL) { - for (i = 0; desc_list[i] != NULL; i++) { - (void) add_descriptor_to_free(desc_list[i]); - } - } - - return (0); -} - -/* - * FUNCTION: add_descriptor_to_free(dm_descriptor_t desc) - * - * INPUT: desc - dm_descriptor_t handle from libdiskmgt - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Remembers a dm_descriptor_t handle which needs to be - * returned to libdiskmgt. These handles represent memory - * allocated by the the diskmgtd and must be returned in - * order for that memory to be released. - * - * The handles are cached for the duration of layout - * processing so that layout is guaranteed to have - * unique handles for all objects received from - * libdiskmgt. - * - * The caching is accomplished by adding the handle to - * a list of desc_free_t structs. - */ -static int -add_descriptor_to_free( - dm_descriptor_t desc) -{ - desc_free_t *dfp = NULL; - dlist_t *item = NULL; - - if (desc == (dm_descriptor_t)0) { - return (0); - } - - if (is_virtual_slice(desc) == B_TRUE) { - /* don't return virtual slice descriptors to libdiskmgt */ - return (0); - } - - if ((dfp = calloc(1, sizeof (desc_free_t))) == NULL) { - return (ENOMEM); - } - - dfp->desc = desc; - dfp->virtual = B_FALSE; - - if ((item = dlist_new_item((void *)dfp)) == NULL) { - free(dfp); - return (ENOMEM); - } - - _desc_to_free = dlist_append(item, _desc_to_free, AT_HEAD); - - return (0); -} - -/* - * FUNCTION: release_desc_to_free_cache() - * - * PURPOSE: Frees all entries in the desc_to_free cache. - * - * Iterates the _desc_to_free list and builds an - * array with all dm_descriptor_t handles that were - * obtained from libdiskmgt. Passing this array to - * dm_free_descriptors() is faster than calling - * dm_free_descriptor() to free individual handles. - */ -void -release_desc_to_free_cache() -{ - dlist_t *iter; - dm_descriptor_t *array; - int i = 0; - - oprintf(OUTPUT_DEBUG, - gettext(" destroying desc_to_free cache (%d items)\n"), - dlist_length(_desc_to_free)); - - array = (dm_descriptor_t *)calloc( - dlist_length(_desc_to_free) + 1, sizeof (dm_descriptor_t)); - - if (array != NULL) { - for (iter = _desc_to_free; iter != NULL; iter = iter->next) { - desc_free_t *dfp = (desc_free_t *)iter->obj; - if (dfp->virtual == B_FALSE) { - array[i++] = dfp->desc; - } - } - array[i] = (dm_descriptor_t)0; - dm_free_descriptors(array); - } - - /* - * If the calloc failed, the descriptors aren't explicitly freed, - * but the libdiskmgt daemon will eventually reclaim them after - * a period of inactivity. - */ - dlist_free_items(_desc_to_free, free); - - _desc_to_free = NULL; -} diff --git a/usr/src/cmd/lvm/metassist/layout/layout_device_cache.h b/usr/src/cmd/lvm/metassist/layout/layout_device_cache.h deleted file mode 100644 index 6328e6b7ec86..000000000000 --- a/usr/src/cmd/lvm/metassist/layout/layout_device_cache.h +++ /dev/null @@ -1,101 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _LAYOUT_DEVICE_CACHE_H -#define _LAYOUT_DEVICE_CACHE_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * This module manages cached copies of a dm_descriptor_t's nvpair - * list of attributes and its device name. The caches are used to - * make sure that the memory allocated to these objects is correctly - * released after the layout process has finished. The cached attrs - * also allow the layout code to store and retrieve transient, - * layout-private data in the same data structure as the other - * relevant device information. - * - * There are two primary caches of information: - * - * descriptor->name - which maps a dm_descriptor_t handle to - * the associated device's name - * - * name->attributes - which maps a device name to an nvlist_t - * attribute collection. - * - * These two data structures thus allow the following lookup chain: - * descriptor->name->attributes. - * - * The attributes are accessed by device name because the it is the - * unique identifier for the device. The descriptor returned by - * libdiskmgt is just an arbitrary handle, multiple calls into the - * library may return different descriptors for the same device. - * - * Descriptors are also get re-cycled by the library which could - * result in the same descriptor being used to represent different - * devices (although not concurrently). To prevent such recycling - * all of the descriptors are held until the layout process has - * completed. - * - * Performance testing indicated that searching the lists of known - * devices by display (CTD or DID) name or alias was a significant - * bottleneck. A mapping from display name to descriptor was added - * to address this. - * - * The module should be initialized once by calling create_device_caches() - * prior to any call which accesses data maintained by the cache. - * - * The caches should be flushed after all accesses have completed by - * calling release_device_caches. - */ - -#include "libdiskmgt.h" -#include "layout_device_util.h" - -extern int create_device_caches(); -extern int release_device_caches(); - -extern int add_cached_descriptor(char *name, dm_descriptor_t desc); -extern dm_descriptor_t find_cached_descriptor(char *name); - -extern int add_cached_name(dm_descriptor_t desc, char *name); -extern int get_name(dm_descriptor_t desc, char **name); - -extern int add_cached_attributes(char *name, nvlist_t *attrs); -extern int get_cached_attributes(dm_descriptor_t desc, nvlist_t **list); - -extern int new_descriptor(dm_descriptor_t *desc); -extern int add_descriptors_to_free(dm_descriptor_t *desc_list); - -#ifdef __cplusplus -} -#endif - -#endif /* _LAYOUT_DEVICE_CACHE_H */ diff --git a/usr/src/cmd/lvm/metassist/layout/layout_device_util.c b/usr/src/cmd/lvm/metassist/layout/layout_device_util.c deleted file mode 100644 index 687cd44a5cb0..000000000000 --- a/usr/src/cmd/lvm/metassist/layout/layout_device_util.c +++ /dev/null @@ -1,3459 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include "meta_repartition.h" - -#define _LAYOUT_DEVICE_UTIL_C - -#include "volume_dlist.h" -#include "volume_error.h" -#include "volume_output.h" -#include "volume_nvpair.h" - -#include "layout_device_cache.h" -#include "layout_device_util.h" -#include "layout_discovery.h" -#include "layout_dlist_util.h" -#include "layout_slice.h" - -/* - * Macros to produce a quoted string containing the value of a - * preprocessor macro. For example, if SIZE is defined to be 256, - * VAL2STR(SIZE) is "256". This is used to construct format - * strings for scanf-family functions below. - */ -#define QUOTE(x) #x -#define VAL2STR(x) QUOTE(x) - -/* private utilities for disks */ -static int disk_get_uint64_attribute( - dm_descriptor_t disk, - char *attr, - uint64_t *val); - -static int disk_get_boolean_attribute( - dm_descriptor_t disk, - char *attr, - boolean_t *bool); - -static int disk_get_rpm( - dm_descriptor_t disk, - uint32_t *val); - -static int disk_get_sync_speed( - dm_descriptor_t disk, - uint32_t *val); - -static int disk_has_virtual_slices( - dm_descriptor_t disk, - boolean_t *bool); - -static int disk_get_virtual_slices( - dm_descriptor_t disk, - dlist_t **list); - -static int disk_get_reserved_indexes( - dm_descriptor_t disk, - uint16_t **array); - -static int disk_get_associated_desc( - dm_descriptor_t disk, - dm_desc_type_t assoc_type, - char *assoc_type_str, - dlist_t **list); - -/* utilities for slices */ -static int slice_get_uint64_attribute( - dm_descriptor_t slice, - char *attr, - uint64_t *val); - -static int slice_set_attribute( - dm_descriptor_t slice, - char *attr, - uint64_t val); - -/* - * Virtual slices are created to represent slices that will be - * on the system after disks have been added to the destination - * diskset. For the purposes of layout, these slices must - * look & function just as real slices that are currently on - * the system. - */ -static dlist_t *_virtual_slices = NULL; - -/* temporary implementation */ -static int virtual_repartition_drive( - dm_descriptor_t disk, - mdvtoc_t *vtocp); - -static int disk_add_virtual_slice( - dm_descriptor_t disk, - dm_descriptor_t slice); - -static int virtual_slice_get_disk( - dm_descriptor_t slice, - dm_descriptor_t *diskp); - -/* - * attribute names for layout private information stored in - * device nvpair attribute lists. - */ -static char *ATTR_RESERVED_INDEX = "vdu_reserved_index"; -static char *ATTR_VIRTUAL_SLICES = "vdu_virtual_slices"; -static char *ATTR_DISK_FOR_SLICE = "vdu_disk_for_slice"; -static char *ATTR_DEV_CTD_NAME = "vdu_device_ctd_name"; -static char *ATTR_HBA_N_DISKS = "vdu_hba_n_usable_disks"; - -/* - * FUNCTION: is_ctd_like_slice_name(char *name) - * INPUT: name - a char * - * - * RETURNS: boolean_t - B_TRUE - if name follows an alternate slice - * naming scheme similar to CTD - * B_FALSE - otherwise - * - * PURPOSE: Determines if the input name is of the form XXXsNNN - * (e.g., whizzy0s1) - */ -boolean_t -is_ctd_like_slice_name( - char *name) -{ - uint_t s = 0; - uint_t d = 0; - int l = 0; - boolean_t is = B_FALSE; - - /* The format strings below match and discard the non-numeric part. */ - if ((sscanf(name, "/dev/dsk/%*[^0-9/]%us%u%n", &d, &s, &l) == 2 || - sscanf(name, "/dev/rdsk/%*[^0-9/]%us%u%n", &d, &s, &l) == 2 || - sscanf(name, "%*[^0-9/]%us%u%n", &d, &s, &l) == 2) && - (l == strlen(name))) { - is = B_TRUE; - } - - return (is); -} - -/* - * FUNCTION: is_bsd_like_slice_name(char *name) - * INPUT: name - a char * - * - * RETURNS: boolean_t - B_TRUE - if name follows an alternate slice - * BSD-like naming scheme - * B_FALSE - otherwise - * - * PURPOSE: Determines if the input name is of the form XXXNNN[a-h] - * (e.g., whizzy0a) - */ -boolean_t -is_bsd_like_slice_name( - char *name) -{ - uint_t d = 0; - int l = 0; - boolean_t is = B_FALSE; - - /* The format strings below match and discard the non-numeric part. */ - if ((sscanf(name, "/dev/dsk/%*[^0-9/]%u%*[a-h]%n", &d, &l) == 1 || - sscanf(name, "/dev/rdsk/%*[^0-9/]%u%*[a-h]%n", &d, &l) == 1 || - sscanf(name, "%*[^0-9/]%u%*[a-h]%n", &d, &l) == 1) && - (l == strlen(name))) { - is = B_TRUE; - } - - return (is); -} - -/* - * FUNCTION: is_did_name(char *name) - * INPUT: name - a char * - * - * RETURNS: boolean_t - B_TRUE - if name is from the DID namespace - * B_FALSE - otherwise - * - * PURPOSE: Determines if the input name is from the DID namespace. - */ -boolean_t -is_did_name( - char *name) -{ - return (is_did_slice_name(name) || is_did_disk_name(name)); -} - -/* - * FUNCTION: is_did_slice_name(char *name) - * INPUT: name - a char * - * - * RETURNS: boolean_t - B_TRUE - if name represents a slice from the DID - * namespace - * B_FALSE - otherwise - * - * PURPOSE: Determines if the input name is a slice from the DID namespace. - */ -boolean_t -is_did_slice_name( - char *name) -{ - uint_t d = 0, s = 0; - int l = 0; - boolean_t is = B_FALSE; - - if ((sscanf(name, "/dev/did/rdsk/d%us%u%n", &d, &s, &l) == 2 || - sscanf(name, "/dev/did/dsk/d%us%u%n", &d, &s, &l) == 2 || - sscanf(name, "d%us%u%n", &d, &s, &l) == 2) || - (l == strlen(name))) { - is = B_TRUE; - } - - return (is); -} - -/* - * FUNCTION: is_did_disk_name(char *name) - * INPUT: name - a char * - * - * RETURNS: boolean_t - B_TRUE - if name represents a disk from the DID - * namespace - * B_FALSE - otherwise - * - * PURPOSE: Determines if the input name is a disk from the DID namespace. - */ -boolean_t -is_did_disk_name( - char *name) -{ - uint_t d = 0; - int l = 0; - boolean_t is = B_FALSE; - - if ((sscanf(name, "/dev/did/rdsk/d%u%n", &d, &l) == 1 || - sscanf(name, "/dev/did/dsk/d%u%n", &d, &l) == 1 || - sscanf(name, "d%u%n", &d, &l) == 1) && - (l == strlen(name))) { - is = B_TRUE; - } - - return (is); -} - -/* - * FUNCTION: is_ctd_name(char *name) - * INPUT: name - a char * - * - * RETURNS: boolean_t - B_TRUE - if name is from the CTD namespace - * B_FALSE - otherwise - * - * PURPOSE: Determines if the input name is from the CTD namespace. - * - * {/dev/dsk/, /dev/rdsk/}cXtXdXsX - * {/dev/dsk/, /dev/rdsk/}cXtXdX - * {/dev/dsk/, /dev/rdsk/}cXdXsX - * {/dev/dsk/, /dev/rdsk/}cXdX - */ -boolean_t -is_ctd_name( - char *name) -{ - return (is_ctd_slice_name(name) || is_ctd_disk_name(name) || - is_ctd_target_name(name) || is_ctd_ctrl_name(name)); -} - -/* - * FUNCTION: is_ctd_slice_name(char *name) - * INPUT: name - a char * - * - * RETURNS: boolean_t - B_TRUE - if name represents a slice from the CTD - * namespace - * B_FALSE - otherwise - * - * PURPOSE: Determines if the input name is a slice name from the - * CTD namespace. - * - * {/dev/dsk/, /dev/rdsk/}cXtdXsX - * {/dev/dsk/, /dev/rdsk/}cXtXdXsX - * {/dev/dsk/, /dev/rdsk/}cXdXsX - */ -boolean_t -is_ctd_slice_name( - char *name) -{ - uint_t c = 0, t = 0, d = 0, s = 0; - char buf[MAXNAMELEN+1]; - int l = 0; - boolean_t is = B_FALSE; - - if ((sscanf(name, "/dev/dsk/c%ut%ud%us%u%n", &c, &t, &d, &s, &l) == 4 || - sscanf(name, "/dev/rdsk/c%ut%ud%us%u%n", &c, &t, &d, &s, &l) == 4 || - sscanf(name, "c%ut%ud%us%u%n", &c, &t, &d, &s, &l) == 4 || - sscanf(name, "/dev/dsk/c%ud%us%u%n", &c, &d, &s, &l) == 3 || - sscanf(name, "/dev/rdsk/c%ud%us%u%n", &c, &d, &s, &l) == 3 || - sscanf(name, "c%ud%us%u%n", &c, &d, &s, &l) == 3 || - sscanf(name, "c%ud%us%u%n", &c, &d, &s, &l) == 2) && - (l == strlen(name))) { - is = B_TRUE; - } else if ( - (sscanf(name, "/dev/dsk/c%ut%" VAL2STR(MAXNAMELEN) "s%n", - &c, buf, &l) == 2 || - sscanf(name, "/dev/rdsk/c%ut%" VAL2STR(MAXNAMELEN) "s%n", - &c, buf, &l) == 2 || - sscanf(name, "c%ut%" VAL2STR(MAXNAMELEN) "s%n", - &c, buf, &l) == 2) && (l == strlen(name))) { - char *dev_pos; - - /* see if buf ends with "dXsX" */ - if (((dev_pos = strrchr(buf, 'd')) != NULL) && - (sscanf(dev_pos, "d%us%u%n", &d, &s, &l) == 2) && - (l == strlen(dev_pos))) { - - char wwn[MAXNAMELEN+2]; - - /* buf ends with "dXsX", truncate at the 'd' */ - *dev_pos = '\0'; - - /* prepend "0X" to remainder and try to scan as a hex WWN */ - (void) snprintf(wwn, sizeof (wwn), "%s%s", "0X", buf); - if ((sscanf(wwn, "%x%n", &t, &l) == 1) && (l == strlen(wwn))) { - is = B_TRUE; - } - } - } - - return (is); -} - -/* - * FUNCTION: is_ctd_disk_name(char *name) - * INPUT: name - a char * - * - * RETURNS: boolean_t - B_TRUE - if name represents a disk from the CTD - * namespace - * B_FALSE - otherwise - * - * PURPOSE: Determines if the input name is a disk name from the - * CTD namespace. - * - * {/dev/dsk/, /dev/rdsk/}cXtdX - * {/dev/dsk/, /dev/rdsk/}cXtXdX - * {/dev/dsk/, /dev/rdsk/}cXdX - */ -boolean_t -is_ctd_disk_name( - char *name) -{ - uint_t c = 0, t = 0, d = 0; - int l = 0; - char buf[MAXNAMELEN+1]; - boolean_t is = B_FALSE; - - if ((sscanf(name, "/dev/dsk/c%ut%ud%u%n", &c, &t, &d, &l) == 3 || - sscanf(name, "/dev/rdsk/c%ut%ud%u%n", &c, &t, &d, &l) == 3 || - sscanf(name, "c%ut%ud%u%n", &c, &t, &d, &l) == 3 || - sscanf(name, "/dev/dsk/c%ud%u%n", &c, &d, &l) == 2 || - sscanf(name, "/dev/rdsk/c%ud%n%n", &c, &d, &l) == 2 || - sscanf(name, "c%ud%u%n", &c, &d, &l) == 2) && - (l == strlen(name))) { - is = B_TRUE; - } else if ((sscanf(name, "/dev/dsk/c%ut%" VAL2STR(MAXNAMELEN) "s%n", - &c, buf, &l) == 2 || - sscanf(name, "/dev/rdsk/c%ut%" VAL2STR(MAXNAMELEN) "s%n", - &c, buf, &l) == 2 || - sscanf(name, "c%ut%" VAL2STR(MAXNAMELEN) "s%n", - &c, buf, &l) == 2) && (l == strlen(name))) { - char *dev_pos; - - /* see if buf ends with "dX" */ - if (((dev_pos = strrchr(buf, 'd')) != NULL) && - (sscanf(dev_pos, "d%u%n", &d, &l) == 1) && - (l == strlen(dev_pos))) { - - char wwn[MAXNAMELEN+2]; - - /* buf ends with "dX", truncate at the 'd' */ - *dev_pos = '\0'; - - /* prepend "0X" to remainder and try to scan as a hex WWN */ - (void) snprintf(wwn, sizeof (wwn), "%s%s", "0X", buf); - if ((sscanf(wwn, "%x%n", &t, &l) == 1) && (l == strlen(wwn))) { - is = B_TRUE; - } - } - } - - return (is); -} - -/* - * FUNCTION: is_ctd_disk_name(char *name) - * INPUT: name - a char * - * - * RETURNS: boolean_t - B_TRUE - if name represents a target from the CTD - * namespace - * B_FALSE - otherwise - * - * PURPOSE: Determines if the input name is a target name from the - * CTD namespace. - * - * {/dev/dsk/, /dev/rdsk/}cXt - * {/dev/dsk/, /dev/rdsk/}cXtX - */ -boolean_t -is_ctd_target_name( - char *name) -{ - uint_t c = 0, t = 0; - int l = 0; - char buf[MAXNAMELEN+1]; - boolean_t is = B_FALSE; - - if ((sscanf(name, "/dev/dsk/c%ut%u%n", &c, &t, &l) == 2 || - sscanf(name, "/dev/rdsk/c%ut%u%n", &c, &t, &l) == 2 || - sscanf(name, "c%ut%u%n", &c, &t, &l) == 2) && - (l == strlen(name))) { - is = B_TRUE; - } else if ( - (sscanf(name, "/dev/dsk/c%ut%" VAL2STR(MAXNAMELEN) "s%n", - &c, buf, &l) == 2 || - sscanf(name, "/dev/rdsk/c%ut%" VAL2STR(MAXNAMELEN) "s%n", - &c, buf, &l) == 2 || - sscanf(name, "c%ut%" VAL2STR(MAXNAMELEN) "s%n", - &c, &buf, &l) == 2) && (l == strlen(name))) { - - char wwn[MAXNAMELEN+2]; - - /* prepend "0X" to buf and try to scan as a hex WWN */ - (void) snprintf(wwn, sizeof (wwn), "%s%s", "0X", buf); - if ((sscanf(wwn, "%x%n", &t, &l) == 1) && (l == strlen(wwn))) { - is = B_TRUE; - } - } - - return (is); -} - -/* - * FUNCTION: is_ctd_ctrl_name(char *name) - * INPUT: name - a char * - * - * RETURNS: boolean_t - B_TRUE - if name represents a controller/hba - * from the CTD namespace - * B_FALSE - otherwise - * - * PURPOSE: Determines if the input name is an HBA name from the - * CTD namespace. - * - * {/dev/dsk/, /dev/rdsk/}cX - */ -boolean_t -is_ctd_ctrl_name( - char *name) -{ - uint_t c = 0; - int l = 0; - boolean_t is = B_FALSE; - - if ((sscanf(name, "/dev/dsk/c%u%n", &c, &l) == 1 || - sscanf(name, "/dev/rdsk/c%u%n", &c, &l) == 1 || - sscanf(name, "c%u%n", &c, &l) == 1) && - (l == strlen(name))) { - is = B_TRUE; - } - - return (is); -} - -/* - * FUNCTION: set_display_name(dm_descriptor_t desc, char *name) - * get_display_name(dm_descriptor_t desc, char **name) - * - * INPUT: desc - a dm_descriptor_t handle for a device - * name - a char * name - * - * OUTPUT: **name - a pointer to a char * to hold the display - * name associated with the input descriptor. - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Helpers to set/get the input descriptor's display name. - * - * Only slices, disks and HBAs should have display names. - * - * The attribute is only set in the cached copy of - * the device's nvpair attribute list. This function - * does not affect the underlying physical device. - * - * An entry is added in the name->descriptor cache - * so the descriptor can be found by name quickly. - */ -int -set_display_name( - dm_descriptor_t desc, - char *name) -{ - nvlist_t *attrs = NULL; - int error = 0; - - ((error = add_cached_descriptor(name, desc)) != 0) || - (error = get_cached_attributes(desc, &attrs)) || - (error = set_string(attrs, ATTR_DEV_CTD_NAME, name)); - - return (error); -} - -int -get_display_name( - dm_descriptor_t desc, - char **name) -{ - nvlist_t *attrs = NULL; - int error = 0; - - ((error = get_cached_attributes(desc, &attrs)) != 0) || - (error = get_string(attrs, ATTR_DEV_CTD_NAME, name)); - - return (error); -} - -/* - * FUNCTION: disk_get_slices(dm_descriptor_t disk, dlist_t **list) - * - * INPUT: disk - a dm_descriptor_t handle for a disk - * - * OUTPUT: *list - a pointer to list to hold the results. - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Collect all of the known slices for the input disk. - * - * These slices may be actual slices which currently exist - * on the disk, or virtual slices which will exist when the - * disk is added to the destination diskset. - */ -int -disk_get_slices( - dm_descriptor_t disk, - dlist_t **list) -{ - dm_descriptor_t *media = NULL; - boolean_t virtual = B_FALSE; - int i = 0; - int error = 0; - - *list = 0; - - if ((error = disk_has_virtual_slices(disk, &virtual)) != 0) { - return (error); - } - - if (virtual == B_TRUE) { - error = disk_get_virtual_slices(disk, list); - } - - /* add real slices from disk's media... */ - media = dm_get_associated_descriptors(disk, DM_MEDIA, &error); - (void) add_descriptors_to_free(media); - - if (error == 0) { - /* if there's no media, this is a removeable drive */ - if (media != NULL && *media != NULL) { - - /* examine media's slices... */ - dm_descriptor_t *slices = NULL; - slices = dm_get_associated_descriptors(*media, - DM_SLICE, &error); - (void) add_descriptors_to_free(slices); - - if (error != 0) { - print_get_assoc_desc_error(disk, gettext("slice"), error); - } else { - for (i = 0; (slices[i] != NULL) && (error == 0); i++) { - dlist_t *item = - dlist_new_item((void *)(uintptr_t)slices[i]); - if (item == NULL) { - error = ENOMEM; - } else { - *list = dlist_append(item, *list, AT_TAIL); - } - } - free(slices); - } - free(media); - } - } else { - print_get_assoc_desc_error(disk, gettext("media"), error); - } - - return (error); -} - -int -get_virtual_slices( - dlist_t **list) -{ - *list = _virtual_slices; - - return (0); -} - -/* - * FUNCTION: virtual_repartition_drive(dm_descriptor_t disk, - * mdvtoc_t *vtocp) - * - * INPUT: disk - the disk to be virtually repartitioned - * - * OUTPUT: vtocp - a poitner to a mdvtoc struct to hold the results - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Helper which emulates the repartitioning that is done - * when a disk is added to a diskset. - * - * Modified version of meta_partition_drive which uses info - * from libdiskmgt to accomplish the repartitioning. - * - * This exists to allow the layout module to run with a - * simulated hardware environment. - * - * XXX This method absolutely does not produce the exact - * same result as meta_repartition_drive: only information - * required by the layout code is returned. Basically, - * a slice 7 (or 6 on EFI labelled disks) is created and - * sized, the remained of the available cylinders are put - * into slice 0. - * - * XXX2 This method is required until there is resolution - * on whether metassist testing will be done using the - * hardware simulation mechanism libdiskmgt provides. - * Doing so will also require parts of libmeta to be - * simulated as well. Some research has been done into - * building an alternate libmeta.so containing - * implementations of the functions used by metassist - * that are compatible with the simulated hardware. - * Actual work is currently on hold. - */ -static int -virtual_repartition_drive( - dm_descriptor_t disk, - mdvtoc_t *vtocp) -{ - uint_t replicaslice = 7; - unsigned long long cylsize; - unsigned long long drvsize; - uint_t reservedcyl; - ushort_t resflag; - unsigned long long ressize; - diskaddr_t replica_start; - diskaddr_t replica_size; - diskaddr_t data_start; - diskaddr_t data_size; - - boolean_t efi = B_FALSE; - uint64_t ncyls = 0; - uint64_t nheads = 0; - uint64_t nsects = 0; - int error = 0; - - /* - * At this point, ressize is used as a minimum value. Later it - * will be rounded up to a cylinder boundary. ressize is in - * units of disk sectors. - */ - ressize = MD_DBSIZE + VTOC_SIZE; - resflag = V_UNMNT; - - ((error = disk_get_is_efi(disk, &efi)) != 0) || - (error = disk_get_ncylinders(disk, &ncyls)) || - (error = disk_get_nheads(disk, &nheads)) || - (error = disk_get_nsectors(disk, &nsects)); - if (error != 0) { - return (error); - } - - if (efi) { - replicaslice = 6; - } - - /* - * Both cylsize and drvsize are in units of disk sectors. - * - * The intended results are of type unsigned long long. Since - * each operand of the first multiplication is of type - * unsigned int, we risk overflow by multiplying and then - * converting the result. Therefore we explicitly cast (at - * least) one of the operands, forcing conversion BEFORE - * multiplication, and avoiding overflow. The second - * assignment is OK, since one of the operands is already of - * the desired type. - */ - cylsize = ((unsigned long long)nheads) * nsects; - drvsize = cylsize * ncyls; - - /* - * How many cylinders must we reserve for slice seven to - * ensure that it meets the previously calculated minimum - * size? - */ - reservedcyl = (ressize + cylsize - 1) / cylsize; - - /* - * It seems unlikely that someone would pass us too small a - * disk, but it's still worth checking for... - */ - if (reservedcyl >= ncyls) { - volume_set_error( - gettext("disk is too small to hold a metadb replica")); - return (-1); - } - - replica_start = 0; - replica_size = reservedcyl * cylsize; - data_start = reservedcyl * cylsize; - data_size = drvsize - (reservedcyl * cylsize); - - /* - * fill in the proposed VTOC information. - */ - - /* We need at least replicaslice partitions in the proposed vtoc */ - vtocp->nparts = replicaslice + 1; - vtocp->parts[MD_SLICE0].start = data_start; - vtocp->parts[MD_SLICE0].size = data_size; - vtocp->parts[MD_SLICE0].tag = V_USR; - vtocp->parts[replicaslice].start = replica_start; - vtocp->parts[replicaslice].size = replica_size; - vtocp->parts[replicaslice].flag = resflag; - vtocp->parts[replicaslice].tag = V_USR; - - return (0); -} - -/* - * FUNCTION: create_virtual_slices(dlist_t *disks) - * - * INPUT: possibles - a list of dm_descriptor_t disk handles for - * disks known to be available for use by layout. - * - * SIDEEFFECT: populates the private of virtual slices. - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Helper which creates virtual slices for each disk which - * could be added to a diskset if necessary... - * - * Iterate the input list of available disks and see what the - * slicing would be if the disk were added to a diskset. - * - * For the resulting slices, create virtual slice descriptors - * and attributes for these slices and add them to the list of - * available slices. - */ -int -create_virtual_slices( - dlist_t *disks) -{ - int error = 0; - dlist_t *iter; - boolean_t sim = B_FALSE; - static char *simfile = "METASSISTSIMFILE"; - - sim = ((getenv(simfile) != NULL) && (strlen(getenv(simfile)) > 0)); - - /* see what slices each of the disks will have when added to a set */ - for (iter = disks; error == 0 && iter != NULL; iter = iter->next) { - - dm_descriptor_t disk = (uintptr_t)iter->obj; - dlist_t *slices = NULL; - mdvtoc_t vtoc; - char *dname; - int i = 0; - - if ((error = get_display_name(disk, &dname)) != 0) { - break; - } - - if (sim != B_TRUE) { - - /* sim disabled: use meta_repartition_drive() */ - - md_error_t mderror = mdnullerror; - int opts = (MD_REPART_FORCE | MD_REPART_DONT_LABEL); - mdsetname_t *sp; - mddrivename_t *dnp; - - /* disk is in the local set */ - sp = metasetname(MD_LOCAL_NAME, &mderror); - if (!mdisok(&mderror)) { - volume_set_error(mde_sperror(&mderror, NULL)); - mdclrerror(&mderror); - error = -1; - break; - } - - dnp = metadrivename(&sp, dname, &mderror); - if (!mdisok(&mderror)) { - volume_set_error(mde_sperror(&mderror, NULL)); - mdclrerror(&mderror); - error = -1; - break; - } - - if (meta_repartition_drive( - sp, dnp, opts, &vtoc, &mderror) != 0) { - volume_set_error( - gettext("failed to repartition disk %s\n"), - dname); - error = -1; - break; - } - - } else { - - /* sim enabled: use faked repartition code */ - if (virtual_repartition_drive(disk, &vtoc) != 0) { - volume_set_error( - gettext("failed simulated repartition of %s\n"), - dname); - error = -1; - break; - } - } - - /* BEGIN CSTYLED */ - /* - * get the existing slices on the disk, if the repartition - * was successful, these slices need to have their size, start - * blk and size in blks set to 0 - */ - /* END CSTYLED */ - if ((error = disk_get_slices(disk, &slices)) == 0) { - dlist_t *iter2 = slices; - for (; iter2 != NULL; iter2 = iter2->next) { - dm_descriptor_t sp = (uintptr_t)iter2->obj; - ((error = slice_set_start_block(sp, 0)) != 0) || - (error = slice_set_size_in_blocks(sp, 0)) || - (error = slice_set_size(sp, 0)); - } - dlist_free_items(slices, NULL); - } - - /* scan VTOC, find slice with the free space */ - for (i = 0; i < vtoc.nparts; i++) { - - if (vtoc.parts[i].tag == V_USR && - vtoc.parts[i].flag != V_UNMNT) { - - /* non-replica slice with free space */ - char buf[MAXPATHLEN]; - (void) snprintf(buf, MAXPATHLEN-1, "%ss%d", dname, i); - - if ((error = add_virtual_slice(buf, - (uint32_t)i, - (uint64_t)vtoc.parts[i].start, - (uint64_t)vtoc.parts[i].size, - disk)) != 0) { - break; - } - - } else if (vtoc.parts[i].tag == V_RESERVED) { - - /* skip EFI reserved slice */ - continue; - - } else if (vtoc.parts[i].tag == V_USR && - vtoc.parts[i].flag == V_UNMNT) { - - /* BEGIN CSTYLED */ - /* - * Make the replica slice 0 sized -- this will - * force the disk to be repartitioned by - * metaset when it is added to the disk set. - * - * XXX this is a temporary workaround until - * 4712873 is integrated... - */ - /* BEGIN CSTYLED */ - char buf[MAXPATHLEN]; - (void) snprintf(buf, MAXPATHLEN-1, "%ss%d", dname, i); - add_slice_to_remove(buf, i); - - /* replica slice, stop here */ - break; - } - } - } - - return (error); -} - -/* - * FUNCTION: add_virtual_slice(char *name, uint32_t index, - * uint64_t startblk, uint64_t sizeblks, - * dm_descriptor_t disk) - * - * INPUT: name - the name of the new virtual slice - * index - the VTOC index ... - * startblk - the start block ... - * sizeblks - the size in blocks ... - * disk - the parent disk ... - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Helper which adds the appropriate data structures to - * represent a new virtual slice. - * - * allocates a new descriptor - * adds entries to name->desc and desc->name caches - * allocates an attribute nvpair list - * fills in the relevant attributes for the slice - * associates the slice with its parent disk - * adds an entry to the list of all virtual slices - * generates aliases if the associated disk has aliases. - */ -int -add_virtual_slice( - char *name, - uint32_t index, - uint64_t startblk, - uint64_t sizeblks, - dm_descriptor_t disk) -{ - dm_descriptor_t sp; - nvlist_t *attrs; - char *sname; - dlist_t *aliases = NULL; - dlist_t *item = NULL; - int error = 0; - - if ((error = nvlist_alloc(&attrs, NV_UNIQUE_NAME, 0)) != 0) { - return (error); - } - - /* create descriptor */ - ((error = new_descriptor(&sp)) != 0) || - /* cache name for the descriptor */ - (error = add_cached_name(sp, name)) || - /* cache descriptor for the name */ - (error = add_cached_descriptor(name, sp)) || - - /* fill in attributes */ - (error = set_string(attrs, ATTR_DEV_CTD_NAME, name)) || - (error = set_uint32(attrs, DM_INDEX, index)) || - (error = set_uint64(attrs, DM_START, startblk)) || - (error = set_uint64(attrs, DM_SIZE, sizeblks)) || - (error = set_uint64(attrs, ATTR_DISK_FOR_SLICE, (uint64_t)disk)) || - - /* add attributes to the cache */ - (error = get_name(sp, &sname)) || - (error = add_cached_attributes(sname, attrs)) || - - /* connect slice to disk */ - (error = disk_add_virtual_slice(disk, sp)) || - (error = get_display_name(disk, &name)) || - (error = get_aliases(disk, &aliases)); - - if (error != 0) { - return (error); - } - - /* generate slice's aliases if the disk has aliases */ - if (aliases != NULL) { - char buf[MAXNAMELEN]; - - for (; aliases != NULL; aliases = aliases->next) { - (void) snprintf(buf, MAXNAMELEN-1, "%ss%d", - (char *)aliases->obj, index); - error = set_alias(sp, buf); - } - dlist_free_items(aliases, free); - } - - if ((item = dlist_new_item((void *)(uintptr_t)sp)) == NULL) { - return (ENOMEM); - } - - _virtual_slices = dlist_append(item, _virtual_slices, AT_HEAD); - - oprintf(OUTPUT_DEBUG, - gettext(" created virtual slice %s start: %llu, size: %llu\n"), - sname, startblk, sizeblks); - - return (error); -} - -/* - * FUNCTION: release_virtual_slices() - * - * PURPOSE: Helper which cleans up the module private list of virtual - * slices. - * - * The descriptors for the virtual slices are cleaned up - * in device_cache_util.free_cached_descriptors - */ -void -release_virtual_slices() -{ - dlist_free_items(_virtual_slices, NULL); - _virtual_slices = NULL; -} - -/* - * FUNCTION: disk_add_virtual_slice(dm_descriptor_t disk, - * dm_descriptor_t slice) - * - * INPUT: disk - a dm_descriptor_t disk handle - * slice - a dm_descriptor_t virtual slice handle - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Helper which adds a virtual slice to the input disk's - * list of virtual slices. - * - * The disk's virtual slice dm_descriptor_t handles are - * stored in the disk's nvpair attribute list. - */ -static int -disk_add_virtual_slice( - dm_descriptor_t disk, - dm_descriptor_t slice) -{ - nvlist_t *attrs = NULL; - uint64_t *old_slices = NULL; - uint64_t *new_slices = NULL; - uint_t nelem = 0; - int i = 0; - int error = 0; - - if ((error = get_cached_attributes(disk, &attrs)) != 0) { - return (error); - } - - if ((error = get_uint64_array( - attrs, ATTR_VIRTUAL_SLICES, &old_slices, &nelem)) != 0) { - if (error != ENOENT) { - return (error); - } - error = 0; - } - - /* make a new array */ - new_slices = (uint64_t *)calloc(nelem + 1, sizeof (uint64_t)); - if (new_slices != NULL) { - - for (i = 0; i < nelem; i++) { - new_slices[i] = old_slices[i]; - } - new_slices[i] = slice; - - error = set_uint64_array( - attrs, ATTR_VIRTUAL_SLICES, new_slices, nelem); - - free(new_slices); - - } else { - error = ENOMEM; - } - - return (error); -} - -/* - * FUNCTION: disk_has_virtual_slices(dm_descriptor_t disk, boolean_t *bool) - * - * INPUT: disk - a dm_descriptor_t disk handle - * - * OUTPUT: bool - B_TRUE - if the disk has virtual slices - * B_FALSE - otherwise - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Helper which determines if the input disk has virtual slices. - * - * If a disk has virtual slices, their dm_descriptor_t handles - * will be stored in the disk's nvpair attribute list. - */ -static int -disk_has_virtual_slices( - dm_descriptor_t disk, - boolean_t *bool) -{ - nvlist_t *attrs = NULL; - uint64_t *slices = NULL; - uint_t nelem = 0; - int error = 0; - - *bool = B_FALSE; - - if ((error = get_cached_attributes(disk, &attrs)) != 0) { - return (error); - } - - if ((error = get_uint64_array( - attrs, ATTR_VIRTUAL_SLICES, &slices, &nelem)) != 0) { - if (error == ENOENT) { - error = 0; - nelem = 0; - } else { - /* count actual number of elements */ - int i = 0; - while (i < nelem) { - if (slices[i] != -1) { - ++i; - } - } - nelem = i; - } - } - - *bool = (nelem != 0); - - return (error); -} - -/* - * FUNCTION: disk_get_virtual_slices(dm_descriptor_t disk, boolean_t *bool) - * - * INPUT: disk - a dm_descriptor_t disk handle - * - * OUTPUT: list - a dlist_t list of dm_descriptor_t handles for the - * disk's virtual slices. - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Helper which retrieves a list of the input disk's virtual - * slices. - * - * If a disk has virtual slices, their dm_descriptor_t handles - * will be stored in the disk's nvpair attribute list. - */ -static int -disk_get_virtual_slices( - dm_descriptor_t disk, - dlist_t **list) -{ - nvlist_t *attrs = NULL; - uint64_t *slices = NULL; - uint_t nelem = 0; - int error = 0; - int i = 0; - - if ((error = get_cached_attributes(disk, &attrs)) != 0) { - return (error); - } - - if ((error = get_uint64_array( - attrs, ATTR_VIRTUAL_SLICES, &slices, &nelem)) != 0) { - if (error != ENOENT) { - return (error); - } - - return (0); - } - - for (i = 0; i < nelem && slices[i] != -1; i++) { - dlist_t *item = NULL; - - if ((item = dlist_new_item((void*)(uintptr_t)slices[i])) == NULL) { - error = ENOMEM; - break; - } - - *list = dlist_append(item, *list, AT_TAIL); - } - - return (error); -} - -/* - * FUNCTION: is_virtual_slice(dm_descriptor_t desc) - * - * INPUT: desc - a dm_descriptor_t handle - * - * RETURNS: boolean_t - B_TRUE if the input descriptor is for - * a virtual slice. - * B_FALSE otherwise - * - * PURPOSE: Helper which determines whether the input descriptor - * corresponds to a virtual slice. - * - * All virtual slices are stored in a module private list. - * This list is iterated to see if it contains the input - * descriptor. - */ -boolean_t -is_virtual_slice( - dm_descriptor_t desc) -{ - return (dlist_contains(_virtual_slices, - (void*)(uintptr_t)desc, compare_descriptors)); -} - -/* - * FUNCTION: disk_get_available_slice_index(dm_descriptor_t disk, - * uint32_t *newindex) - * - * INPUT: disk - a dm_descriptor_t handle for a disk - * - * OUTPUT: *newindex - a pointer to a uint32_t to hold the available - * index. If no index is available, the value pointed - * to is not modified. - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: examine the input disk's list of slices and find an unused - * slice index. The replica slice (index 7 or 6) is always - * off-limits -- it shows up as in use. Slice 0 should only - * be used as a last resort. - * - * If an available index is found, it is stored into newindex. - * Otherwise, newindex is unchanged. This allows the caller to - * pass in an index and check if it has been modified on return. - * - * V_NUMPAR is used as the number of available slices, - * SPARC systems have V_NUMPAR == 8, X86 have V_NUMPAR == 16. - * - * EFI disks have only 7. - */ -int -disk_get_available_slice_index( - dm_descriptor_t disk, - uint32_t *newindex) -{ - dlist_t *iter = NULL; - dlist_t *slices = NULL; - uint32_t index = 0; - uint16_t *reserved = NULL; - boolean_t *used = NULL; - boolean_t is_efi = B_FALSE; - int error = 0; - int i = 0; - int nslices = V_NUMPAR; - - if (((error = disk_get_slices(disk, &slices)) != 0) || - (error = disk_get_is_efi(disk, &is_efi)) != 0) { - return (error); - } - - if (is_efi == B_TRUE) { - /* limit possible indexes to 7 for EFI */ - nslices = 7; - } - - used = (boolean_t *)calloc(nslices, sizeof (boolean_t)); - if (used == NULL) { - oprintf(OUTPUT_DEBUG, - gettext("failed allocating slice index array\n"), - NULL); - return (ENOMEM); - } - - /* eliminate indexes that are reserved */ - if ((error = disk_get_reserved_indexes(disk, &reserved)) != 0) { - return (error); - } - - if (reserved != NULL) { - for (i = 0; i < nslices; i++) { - if (reserved[i] == 1) { - used[i] = B_TRUE; - } - } - } - - /* eliminate slices that are in use (have a size > 0) */ - /* 0 sized slices unused slices */ - for (iter = slices; iter != NULL; iter = iter->next) { - dm_descriptor_t sp = (uintptr_t)iter->obj; - uint64_t size = 0; - - ((error = slice_get_index(sp, &index)) != 0) || - (error = slice_get_size_in_blocks(sp, &size)); - if (error != 0) { - return (error); - } - - if (size > 0) { - used[(int)index] = B_TRUE; - } - } - dlist_free_items(slices, NULL); - - for (i = 0; i < nslices; i++) { - - /* skip the index passed in */ - if (i == *newindex) { - continue; - } - - if (used[i] != B_TRUE) { - index = i; - break; - } - } - - if (i != nslices) { - /* return unused slice index */ - *newindex = index; - } - - free((void *)used); - - return (0); -} - -/* - * FUNCTION: disk_get_media_type(dm_descriptor_t slice, uint32_t *type) - * - * INPUT: slice - a dm_descriptor_t handle for a disk - * - * OUTPUT: *type - a pointer to a uint32_t to hold the - * current type value for the media on which - * the input slice resides. - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Retrieves the media type for the disk. - * - * Get the media associate with the input disk descriptor - * and determine its type. - */ -int -disk_get_media_type( - dm_descriptor_t disk, - uint32_t *type) -{ - int error = 0; - dm_descriptor_t *mdp = NULL; - - mdp = dm_get_associated_descriptors(disk, DM_MEDIA, &error); - (void) add_descriptors_to_free(mdp); - - if (error != 0) { - print_get_assoc_desc_error(disk, gettext("media"), error); - } else { - /* disk should have exactly 1 media */ - if ((mdp != NULL) && (*mdp != NULL)) { - nvlist_t *attrs = dm_get_attributes(*mdp, &error); - if ((error == 0) && (attrs != NULL)) { - error = get_uint32(attrs, DM_MTYPE, type); - } - - nvlist_free(attrs); - } - /* no media: removeable drive */ - } - - if (mdp != NULL) { - free(mdp); - } - - return (error); -} - -/* - * FUNCTION: disk_get_rpm(dm_descriptor_t disk, uint32_t *val) - * disk_get_sync_speed(dm_descriptor_t disk, uint32_t *val) - * disk_get_size_in_blocks(dm_descriptor_t disk, uint64_t *val) - * disk_get_blocksize(dm_descriptor_t disk, uint64_t *val) - * disk_get_ncylinders(dm_descriptor_t disk, uint64_t *val) - * disk_get_nheads(dm_descriptor_t disk, uint64_t *val) - * disk_get_nsectors(dm_descriptor_t disk, uint64_t *val) - * disk_get_is_efi(dm_descriptor_t disk, boolean_t *val) - * disk_get_is_online(dm_descriptor_t disk, boolean_t *val) - * disk_get_media_type(dm_descriptor_t disk, uint32_t *type) - * disk_get_has_fdisk(dm_descriptor_t disk, boolean_t *val) - * disk_get_start_block(dm_descriptor_t disk, uint64_t *val) - * - * INPUT: disk - a dm_descriptor_t handle for a disk - * - * OUTPUT: *bool - a pointer to a variable of the appropriate - * type to hold the current value for the attribute - * of interest. - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Wrappers around disk_get_XXX_attribute that know - * which attribute needs to be retrieved and also handle - * any necesasry type or units conversions. - */ -static int -disk_get_rpm( - dm_descriptor_t disk, - uint32_t *val) -{ - uint64_t val64 = 0; - int error = 0; - - if ((error = disk_get_uint64_attribute( - disk, DM_RPM, &val64)) != 0) { - return (error); - } - - *val = (uint32_t)val64; - - return (error); -} - -int -disk_get_drive_type( - dm_descriptor_t disk, - uint32_t *val) -{ - uint64_t val64 = 0; - int error = 0; - - if ((error = disk_get_uint64_attribute( - disk, DM_DRVTYPE, &val64)) != 0) { - return (error); - } - - *val = (uint32_t)val64; - - return (error); -} - -static int -disk_get_sync_speed( - dm_descriptor_t disk, - uint32_t *val) -{ - uint64_t val64 = 0; - int error = 0; - - if ((error = disk_get_uint64_attribute( - disk, DM_SYNC_SPEED, &val64)) != 0) { - return (error); - } - - *val = (uint32_t)val64; - - return (error); -} - -/* returns number of usable blocks */ -int -disk_get_size_in_blocks( - dm_descriptor_t disk, - uint64_t *val) -{ - return (disk_get_uint64_attribute(disk, DM_NACCESSIBLE, val)); -} - -/* returns first usable block on disk */ -int -disk_get_start_block( - dm_descriptor_t disk, - uint64_t *val) -{ - return (disk_get_uint64_attribute(disk, DM_START, val)); -} - -int -disk_get_blocksize( - dm_descriptor_t disk, - uint64_t *val) -{ - return (disk_get_uint64_attribute(disk, DM_BLOCKSIZE, val)); -} - -int -disk_get_ncylinders( - dm_descriptor_t disk, - uint64_t *val) -{ - return (disk_get_uint64_attribute(disk, DM_NCYLINDERS, val)); -} - -int -disk_get_nheads( - dm_descriptor_t disk, - uint64_t *val) -{ - return (disk_get_uint64_attribute(disk, DM_NHEADS, val)); -} - -int -disk_get_nsectors( - dm_descriptor_t disk, - uint64_t *val) -{ - return (disk_get_uint64_attribute(disk, DM_NSECTORS, val)); -} - -/* - * FUNCTION: disk_get_is_online(dm_descriptor_t disk, boolean_t *val) - * - * INPUT: disk - a dm_descriptor_t handle for a disk - * - * OUTPUT: *bool - a pointer to a boolean_t to hold the result. - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Determine if the input disk is "online". - * - * Check the status bit of the drive, if it is 1 the drive - * is online, if it is 0 the drive is offline. - */ -int -disk_get_is_online( - dm_descriptor_t disk, - boolean_t *val) -{ - uint64_t status = 0; - int error = 0; - - *val = B_FALSE; - - error = disk_get_uint64_attribute(disk, DM_STATUS, &status); - if (error == 0) { - *val = (status == 1) ? B_TRUE : B_FALSE; - } - - return (error); -} - -/* - * FUNCTION: disk_get_is_efi(dm_descriptor_t disk, boolean_t *bool) - * - * INPUT: disk - a dm_descriptor_t handle for a disk - * - * OUTPUT: *bool - a pointer to a boolean_t to hold the result. - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Determine if the input disk is labeled with an EFI label. - * - * The label type is actually a property of the media - * associated with the disk, so retrieve the media and - * check if it is EFI labeled. - */ -int -disk_get_is_efi( - dm_descriptor_t disk, - boolean_t *bool) -{ - return (disk_get_boolean_attribute(disk, DM_EFI, bool)); -} - -/* - * FUNCTION: disk_get_has_fdisk(dm_descriptor_t disk, boolean_t *bool) - * - * INPUT: disk - a dm_descriptor_t handle for a disk - * - * OUTPUT: *bool - a pointer to a boolean_t to hold the result. - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Determine if the input disk has an FDISK partition. - */ -int -disk_get_has_fdisk( - dm_descriptor_t disk, - boolean_t *bool) -{ - return (disk_get_boolean_attribute(disk, DM_FDISK, bool)); -} - -/* - * FUNCTION: disk_get_has_solaris_partition(dm_descriptor_t disk, boolean_t *bool) - * - * INPUT: disk - a dm_descriptor_t handle for a disk - * - * OUTPUT: *bool - a pointer to a boolean_t to hold the result. - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Determine if the input disk has a Solaris FDISK partition. - */ -int -disk_get_has_solaris_partition( - dm_descriptor_t disk, - boolean_t *bool) -{ - boolean_t has_fdisk = B_FALSE; - int error = 0; - - if ((error = disk_get_has_fdisk(disk, &has_fdisk)) != 0) { - return (error); - } - - *bool = B_FALSE; - - if (has_fdisk == B_TRUE) { - /* get disk's media */ - dm_descriptor_t *media; - media = dm_get_associated_descriptors(disk, DM_MEDIA, &error); - (void) add_descriptors_to_free(media); - if (error != 0) { - print_get_assoc_desc_error(disk, gettext("media"), error); - } else if ((media != NULL) && (*media != NULL)) { - /* get media's partitions */ - dm_descriptor_t *parts; - parts = dm_get_associated_descriptors( - media[0], DM_PARTITION, &error); - (void) add_descriptors_to_free(parts); - if (error != 0) { - print_get_assoc_desc_error(media[0], - gettext("partitions"), error); - } else { - /* search partitions for one with type Solaris */ - int i = 0; - for (; (parts != NULL) && (parts[i] != NULL) && - (error == 0) && (*bool == B_FALSE); i++) { - nvlist_t *attrs = dm_get_attributes(parts[i], &error); - uint32_t ptype = 0; - if ((error == 0) && (attrs != NULL)) { - error = get_uint32(attrs, DM_PTYPE, &ptype); - if ((error == 0) && - (ptype == SUNIXOS || ptype == SUNIXOS2)) { - *bool = B_TRUE; - } - } - nvlist_free(attrs); - } - } - - free(parts); - free(media); - } - - /* if there was no media, it was a removeable drive */ - } - - return (error); -} - -static int -disk_get_boolean_attribute( - dm_descriptor_t disk, - char *attr, - boolean_t *bool) -{ - nvlist_t *attrs = NULL; - int error = 0; - - *bool = B_FALSE; - - if ((strcmp(attr, DM_EFI) == 0) || - (strcmp(attr, DM_FDISK) == 0)) { - - /* - * these attributes are actually on the media, - * not the disk... so get the media descriptor - * for this disk - */ - dm_descriptor_t *media; - - media = dm_get_associated_descriptors(disk, DM_MEDIA, &error); - (void) add_descriptors_to_free(media); - - if (error != 0) { - print_get_assoc_desc_error(disk, gettext("media"), error); - } else if ((media != NULL) && (*media != NULL)) { - /* if there's no media, it is a removeable drive */ - error = get_cached_attributes(media[0], &attrs); - } - free(media); - - } else { - error = get_cached_attributes(disk, &attrs); - if (error != 0) { - print_get_desc_attr_error(disk, gettext("drive"), attr, error); - } - } - - if (error != 0) { - return (error); - } - - if (nvlist_lookup_boolean(attrs, attr) == 0) { - *bool = B_TRUE; - } - - return (error); -} - -static int -disk_get_uint64_attribute( - dm_descriptor_t disk, - char *attr, - uint64_t *val) -{ - nvlist_t *attrs = NULL; - uint32_t ui32 = 0; - int error = 0; - - /* - * these attributes are actually on the media, - * not the disk... so get the media descriptor - * for this disk - */ - if ((strcmp(attr, DM_SIZE) == 0) || - (strcmp(attr, DM_START) == 0) || - (strcmp(attr, DM_NACCESSIBLE) == 0) || - (strcmp(attr, DM_BLOCKSIZE) == 0) || - (strcmp(attr, DM_NCYLINDERS) == 0) || - (strcmp(attr, DM_NHEADS) == 0) || - (strcmp(attr, DM_NSECTORS) == 0)) { - - dm_descriptor_t *media; - - media = dm_get_associated_descriptors(disk, DM_MEDIA, &error); - (void) add_descriptors_to_free(media); - - if (error != 0) { - print_get_assoc_desc_error(disk, gettext("media"), error); - } else if ((media == NULL) || (*media == NULL)) { - print_get_assoc_desc_error(disk, gettext("media"), error); - error = -1; - } else { - error = get_cached_attributes(media[0], &attrs); - free(media); - } - - } else { - error = get_cached_attributes(disk, &attrs); - if (error != 0) { - print_get_desc_attr_error(disk, gettext("drive"), attr, error); - } - } - - if (error != 0) { - return (error); - } - - if (strcmp(attr, DM_SIZE) == 0 || - strcmp(attr, DM_NACCESSIBLE) == 0 || - strcmp(attr, DM_START) == 0) { - error = get_uint64(attrs, attr, val); - } else if (strcmp(attr, DM_BLOCKSIZE) == 0 || - strcmp(attr, DM_NCYLINDERS) == 0 || - strcmp(attr, DM_NHEADS) == 0 || - strcmp(attr, DM_NSECTORS) == 0 || - strcmp(attr, DM_RPM) == 0 || - strcmp(attr, DM_DRVTYPE) == 0 || - strcmp(attr, DM_SYNC_SPEED) == 0 || - strcmp(attr, DM_STATUS) == 0) { - error = get_uint32(attrs, attr, &ui32); - *val = (uint64_t)ui32; - } - - return (error); -} - -/* - * FUNCTION: group_similar_hbas(dlist_t *hbas, dlist_t **list) - * - * INPUT: hbas - a list of HBA dm_descriptor_t handles. - * - * OUTPUT: **list - a pointer to a list to hold the lists of HBAs - * grouped by characteristics. - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Examine the input HBAs and collate them into separate - * lists, grouped by their type and the protocols they - * support. - * - * The returned list of list is arranged in decreasing order - * of preference, "better" HBAs come first. - * - * find all MPXIO controllers - * find all similar FC HBAs - * find all similar SCSI HBAs - * fast{wide}80 - * fast{wide}40 - * fast{wide}20 - * clock uint32 ?? - * find all similar ATA/IDE HBAs - * find all similar USB HBAs - */ -int -group_similar_hbas( - dlist_t *hbas, - dlist_t **list) -{ - /* preference order of HBAs */ - enum { - HBA_FIBRE_MPXIO = 0, - HBA_SCSI_MPXIO, - HBA_FIBRE, - HBA_SCSI_FW80, - HBA_SCSI_FW40, - HBA_SCSI_FW20, - HBA_SCSI_F80, - HBA_SCSI_F40, - HBA_SCSI_F20, - HBA_SCSI, - HBA_ATA, - HBA_USB, - HBA_LAST - }; - - dlist_t *groups = NULL; - dlist_t *iter = NULL; - dlist_t *item = NULL; - dlist_t *lists[HBA_LAST]; - - int error = 0; - int i = 0; - - (void) memset(lists, '\0', HBA_LAST * sizeof (dlist_t *)); - - for (iter = hbas; - (iter != NULL) && (error == 0); - iter = iter->next) { - - dm_descriptor_t hba = (uintptr_t)iter->obj; - char *type = NULL; - - /* if item doesn't go into a list it must be freed */ - if ((item = dlist_new_item((void *)(uintptr_t)hba)) == NULL) { - error = ENOMEM; - continue; - } - - if ((error = hba_get_type(hba, &type)) != 0) { - free(item); - continue; - } - - if (strcmp(type, DM_CTYPE_FIBRE) == 0) { - - boolean_t ismpxio = B_FALSE; - - if ((error = hba_is_multiplex(hba, &ismpxio)) == 0) { - if (ismpxio) { - lists[HBA_FIBRE_MPXIO] = - dlist_append(item, - lists[HBA_FIBRE_MPXIO], AT_TAIL); - } else { - lists[HBA_FIBRE] = - dlist_append(item, - lists[HBA_FIBRE], AT_TAIL); - } - } else { - free(item); - } - - } else if (strcmp(type, DM_CTYPE_SCSI) == 0) { - - /* determine subtype */ - boolean_t iswide = B_FALSE; - boolean_t ismpxio = B_FALSE; - boolean_t is80 = B_FALSE; - boolean_t is40 = B_FALSE; - boolean_t is20 = B_FALSE; - - ((error = hba_supports_wide(hba, &iswide)) != 0) || - (error = hba_is_multiplex(hba, &ismpxio)) || - (error = hba_is_fast_80(hba, &is80)) || - (error = hba_is_fast_40(hba, &is40)) || - (error = hba_is_fast_20(hba, &is20)); - - if (error == 0) { - - if (ismpxio) { - - lists[HBA_SCSI_MPXIO] = - dlist_append(item, - lists[HBA_SCSI_MPXIO], AT_TAIL); - - } else if (is80) { - - if (iswide) { - lists[HBA_SCSI_FW80] = - dlist_append(item, - lists[HBA_SCSI_FW80], AT_TAIL); - } else { - lists[HBA_SCSI_F80] = - dlist_append(item, - lists[HBA_SCSI_F80], AT_TAIL); - } - - } else if (is40) { - - if (iswide) { - lists[HBA_SCSI_FW40] = - dlist_append(item, - lists[HBA_SCSI_FW40], AT_TAIL); - } else { - lists[HBA_SCSI_F40] = - dlist_append(item, - lists[HBA_SCSI_F40], AT_TAIL); - } - - } else if (is20) { - - if (iswide) { - lists[HBA_SCSI_FW20] = - dlist_append(item, - lists[HBA_SCSI_FW20], AT_TAIL); - } else { - lists[HBA_SCSI_F20] = - dlist_append(item, - lists[HBA_SCSI_F20], AT_TAIL); - } - - } else { - lists[HBA_SCSI] = - dlist_append(item, lists[HBA_SCSI], AT_TAIL); - } - - } else { - free(item); - } - - } else if (strcmp(type, DM_CTYPE_ATA) == 0) { - lists[HBA_ATA] = - dlist_append(item, lists[HBA_ATA], AT_TAIL); - } else if (strcmp(type, DM_CTYPE_USB) == 0) { - lists[HBA_USB] = - dlist_append(item, lists[HBA_USB], AT_TAIL); - } else if (strcmp(type, DM_CTYPE_UNKNOWN) == 0) { - oprintf(OUTPUT_DEBUG, - gettext("found an HBA with unknown type\n")); - free(item); - } - } - - if (error == 0) { - /* collect individual lists into a list of lists */ - for (i = 0; (i < HBA_LAST) && (error == 0); i++) { - if (lists[i] != NULL) { - if ((item = dlist_new_item(lists[i])) == NULL) { - error = ENOMEM; - } else { - groups = dlist_append(item, groups, AT_TAIL); - } - } - } - } - - if (error != 0) { - for (i = 0; i < HBA_LAST; i++) { - dlist_free_items(lists[i], NULL); - lists[i] = NULL; - } - - if (groups != NULL) { - dlist_free_items(groups, NULL); - } - } - - *list = groups; - - return (error); -} - -/* - * FUNCTION: hba_group_usable_disks(dm_descriptor_t hba, dlist_t **list) - * - * INPUT: hba - a dm_descriptor_t handle for a slice - * - * OUTPUT: **list - a pointer to a list to hold the lists of disks - * grouped by characteristics. - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Examine the disks assocated with the HBA and collates them - * into separate lists, grouped by similar characteristics. - * - * get disks on HBA - * check disks against _usable_disks list - * group disks by similarities: - * sync-speed uint32 - * wide boolean - * rpm uint32 - * - * XXX this function is currently unused. At some point, - * it may be useful to group disks by performance - * characteristics and use "better" disks before others. - */ -int -hba_group_usable_disks( - dm_descriptor_t hba, - dlist_t **list) -{ - dm_descriptor_t *disk = NULL; - char *name = NULL; - int i = 0; - int error = 0; - - disk = dm_get_associated_descriptors(hba, DM_DRIVE, &error); - (void) add_descriptors_to_free(disk); - - if (error != 0) { - print_get_assoc_desc_error(hba, gettext("drive"), error); - return (error); - } else if ((disk == NULL) || (*disk == NULL)) { - print_get_assoc_desc_error(hba, gettext("drive"), error); - error = -1; - } - - for (i = 0; (disk[i] != NULL) && (error == 0); i++) { - - uint32_t dtype = DM_DT_UNKNOWN; - dlist_t *usable = NULL; - - /* ignore non fixed media drives */ - if (((error = disk_get_drive_type(disk[i], &dtype)) != 0) || - (dtype != DM_DT_FIXED)) { - continue; - } - - if (dlist_contains(usable, &disk[i], - compare_descriptor_names) == B_TRUE) { - - uint64_t bsize = 0; - uint64_t ncyls = 0; - uint64_t nsects = 0; - uint64_t nheads = 0; - uint32_t rpm = 0; - uint32_t sync = 0; - - name = NULL; - ((error = get_display_name(disk[i], &name)) != 0) || - (error = disk_get_blocksize(disk[i], &bsize)) || - (error = disk_get_nheads(disk[i], &nheads)) || - (error = disk_get_nsectors(disk[i], &nsects)) || - (error = disk_get_ncylinders(disk[i], &ncyls)) || - (error = disk_get_rpm(disk[i], &rpm)) || - (error = disk_get_sync_speed(disk[i], &sync)); - if (error != 0) { - continue; - } - - oprintf(OUTPUT_VERBOSE, - gettext("found an available disk: %s\n\t" - "sync_speed = %u, rpm = %u, " - "nsect = %llu, blksiz = %llu\n"), - name, sync, rpm, nsects, bsize); - - /* add to the appropriate list */ - } - } - - if (disk != NULL) { - free(disk); - } - - return (error); -} - -/* - * FUNCTION: hba_get_n_avail_disks(dm_descriptor_t hba, uint16_t *val) - * hba_set_n_avail_disks(dm_descriptor_t hba, uint16_t val) - * - * INPUT: hba - a dm_descriptor_t handle for a slice - * - * OUTPUT: *val - a pointer to a uint16_t to hold the current number - * of available disks for the input HBA. - * - * RETURNS: int - 0 on success - * !0 otherwise. - */ -int -hba_set_n_avail_disks( - dm_descriptor_t hba, - uint16_t val) -{ - nvlist_t *attrs; - int error = 0; - - ((error = get_cached_attributes(hba, &attrs)) != 0) || - (error = set_uint16(attrs, ATTR_HBA_N_DISKS, val)); - - return (error); -} - -int -hba_get_n_avail_disks( - dm_descriptor_t hba, - uint16_t *val) -{ - nvlist_t *attrs; - int error = 0; - - *val = 0; - - ((error = get_cached_attributes(hba, &attrs)) != 0) || - (error = get_uint16(attrs, ATTR_HBA_N_DISKS, val)); - - return (error); -} - -/* - * FUNCTION: hba_get_type(dm_descriptor_t hba, char **type) - * - * INPUT: hba - a dm_descriptor_t handle for a HBA - * - * OUTPUT: **type - a char * to hold the current type value for - * the HBA. - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Retrieves the type attribute for the HBA. - */ -int -hba_get_type( - dm_descriptor_t hba, - char **type) -{ - nvlist_t *attrs; - int error = 0; - - *type = NULL; - - ((error = get_cached_attributes(hba, &attrs)) != 0) || - (error = get_string(attrs, DM_CTYPE, type)); - - return (error); -} - -/* - * FUNCTION: hba_is_fast(dm_descriptor_t hba, boolean_t *bool) - * hba_is_fast20(dm_descriptor_t hba, boolean_t *bool) - * hba_is_fast40(dm_descriptor_t hba, boolean_t *bool) - * hba_is_fast80(dm_descriptor_t hba, boolean_t *bool) - * hba_is_multiplex(dm_descriptor_t hba, boolean_t *bool) - * hba_is_wide(dm_descriptor_t hba, boolean_t *bool) - * - * INPUT: hba - a dm_descriptor_t handle for a HBA - * - * OUTPUT: *bool - a pointer to a boolean_t to hold the - * boolean value of the predicate. - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Wrappers around hba_supports_protocol which determines - * if the input HBA supports the protocol of interest. - */ -int -hba_is_fast( - dm_descriptor_t hba, - boolean_t *bool) -{ - return (hba_supports_protocol(hba, DM_FAST, bool)); -} - -int -hba_is_fast_20( - dm_descriptor_t hba, - boolean_t *bool) -{ - return (hba_supports_protocol(hba, DM_FAST20, bool)); -} - -int -hba_is_fast_40( - dm_descriptor_t hba, - boolean_t *bool) -{ - return (hba_supports_protocol(hba, DM_FAST40, bool)); -} - -int -hba_is_fast_80( - dm_descriptor_t hba, - boolean_t *bool) -{ - return (hba_supports_protocol(hba, DM_FAST80, bool)); -} - -int -hba_is_multiplex( - dm_descriptor_t hba, - boolean_t *bool) -{ - return (hba_supports_protocol(hba, DM_MULTIPLEX, bool)); -} - -int -hba_supports_wide( - dm_descriptor_t hba, - boolean_t *bool) -{ - nvlist_t *attrs = NULL; - int error = 0; - - *bool = B_FALSE; - - if ((error = get_cached_attributes(hba, &attrs)) != 0) { - return (error); - } - - *bool = (0 == nvlist_lookup_boolean(attrs, DM_WIDE)); - - return (error); -} - -/* - * FUNCTION: hba_supports_protocol(dm_descriptor_t hba, char *attr, - * boolean_t *bool) - * - * INPUT: hba - a dm_descriptor_t handle for a HBA - * attr - a protocol "name" - * - * OUTPUT: *bool - a pointer to a boolean_t to hold the - * boolean value of the predicate. - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Checks the HBAs attributes to see if it is known to - * support the protocol of interest. - * - * If the protocol is supported, it will have an entry - * in the nvpair attribute list that can be retrieved. - * - * If the entry cannot be retrieved, the protocol is not - * supported. - */ -int -hba_supports_protocol( - dm_descriptor_t hba, - char *attr, - boolean_t *bool) -{ - nvlist_t *attrs = NULL; - int error = 0; - - *bool = B_FALSE; - - if ((error = get_cached_attributes(hba, &attrs)) != 0) { - return (error); - } - - *bool = (0 == nvlist_lookup_boolean(attrs, attr)); - - return (error); -} - -/* - * FUNCTION: slice_set_size(dm_descriptor_t slice, uint64_t size) - * - * INPUT: slice - a dm_descriptor_t handle for a slice - * - * OUTPUT: size - a uint64_t value representing the size of the - * slice. - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Wrapper around slice_set_uint64_attribute which converts - * the input size in bytes to blocks prior to storing it. - * - * This function is used when an existing slice gets resized - * to provide space for a new slice. It is necessary to update - * the slice's size so that it is accurate. - */ -int -slice_set_size( - dm_descriptor_t slice, - uint64_t size) -{ - dm_descriptor_t disk = NULL; - uint64_t blksize = 0; - int error = 0; - - ((error = slice_get_disk(slice, &disk)) != 0) || - (error = disk_get_blocksize(disk, &blksize)) || - (error = slice_set_size_in_blocks(slice, (uint64_t)(size / blksize))); - - return (error); -} - -/* - * FUNCTION: slice_set_size_in_blocks(dm_descriptor_t slice, uint64_t size) - * - * INPUT: slice - a dm_descriptor_t handle for a slice - * - * OUTPUT: size - a uint64_t value representing the size of the - * slice. - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Wrapper around slice_set_uint64_attribute to set the slice - * size. - * - * This function is used when an existing slice gets resized - * to provide space for a new slice. It is necessary to update - * the slice's size so that it is accurate. - */ -int -slice_set_size_in_blocks( - dm_descriptor_t slice, - uint64_t size) -{ - return (slice_set_attribute(slice, DM_SIZE, size)); -} - -/* - * FUNCTION: slice_set_start_block(dm_descriptor_t slice, uint64_t start) - * - * INPUT: slice - a dm_descriptor_t handle for a slice - * - * OUTPUT: size - a uint64_t value representing the start block of the - * slice. - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Wrapper around slice_set_attribute. - * - * This function is used when an existing slice gets adjusted - * due to being resized or combined with another slice. - */ -int -slice_set_start_block( - dm_descriptor_t slice, - uint64_t start) -{ - return (slice_set_attribute(slice, DM_START, start)); -} - -/* - * FUNCTION: slice_get_start_block(dm_descriptor_t slice, uint64_t *val) - * slice_get_size_in_blocks(dm_descriptor_t slice, uint64_t *val) - * slice_get_start(dm_descriptor_t slice, uint64_t *val) - * slice_get_size(dm_descriptor_t slice, uint64_t *val) - * slice_get_index(dm_descriptor_t slice, uint64_t *val) - * - * INPUT: slice - a dm_descriptor_t handle for a slice - * - * OUTPUT: *val - a pointer to a uint64_t to hold the - * current value of the desired attribute. - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Wrappers around slice_get_uint64_attribute which retrieve - * specific attribute values. - */ -int -slice_get_start_block( - dm_descriptor_t slice, - uint64_t *val) -{ - return (slice_get_uint64_attribute(slice, DM_START, val)); -} - -int -slice_get_size_in_blocks( - dm_descriptor_t slice, - uint64_t *val) -{ - return (slice_get_uint64_attribute(slice, DM_SIZE, val)); -} - -int -slice_get_start( - dm_descriptor_t slice, - uint64_t *val) -{ - dm_descriptor_t disk = NULL; - uint64_t blksize = 0; - uint64_t nblks = 0; - int error = 0; - - ((error = slice_get_disk(slice, &disk)) != 0) || - (error = disk_get_blocksize(disk, &blksize)) || - (error = slice_get_start_block(slice, &nblks)); - - if (error == 0) { - *val = (blksize * nblks); - } - - return (error); -} - -int -slice_get_size( - dm_descriptor_t slice, - uint64_t *val) -{ - dm_descriptor_t disk = NULL; - uint64_t blksize = 0; - uint64_t nblks = 0; - int error = 0; - - *val = 0; - - ((error = slice_get_disk(slice, &disk)) != 0) || - (error = slice_get_size_in_blocks(slice, &nblks)) || - (error = disk_get_blocksize(disk, &blksize)); - - if (error == 0) { - *val = (blksize * nblks); - } - - return (error); -} - -int -slice_get_index( - dm_descriptor_t slice, - uint32_t *val) -{ - uint64_t index = 0; - int error = 0; - - if ((error = slice_get_uint64_attribute( - slice, DM_INDEX, &index)) != 0) { - return (error); - } - - *val = (uint32_t)index; - - return (0); -} - -/* - * FUNCTION: slice_set_uint64_attribute(dm_descriptor_t slice, - * char *attr, uint64_t val) - * slice_get_uint64_attribute(dm_descriptor_t slice, - * char *attr, uint64_t *val) - * - * INPUT: slice - a dm_descriptor_t handle for a slice - * attr - a char * attribute name - * val - auint64_t value - * - * OUTPUT: *val - a pointer to a uint64_t to hold the - * current value of the named attribute. - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Helpers to set/get the value for a slice's attribute. - * - * Consolidate the details of getting/setting slice - * attributes. Some attributes are actually stored as - * uint32_t or uint16_t values, these functions mask - * the type conversions. - */ -static int -slice_get_uint64_attribute( - dm_descriptor_t slice, - char *attr, - uint64_t *val) -{ - nvlist_t *attrs = NULL; - uint32_t ui32 = 0; - int error = 0; - - if ((error = get_cached_attributes(slice, &attrs)) != 0) { - return (error); - } - - if (strcmp(attr, DM_INDEX) == 0) { - error = get_uint32(attrs, attr, &ui32); - *val = (uint64_t)ui32; - } else if (strcmp(attr, DM_START) == 0) { - error = get_uint64(attrs, attr, val); - } else if (strcmp(attr, DM_SIZE) == 0) { - error = get_uint64(attrs, attr, val); - } else if (strcmp(attr, ATTR_DISK_FOR_SLICE) == 0) { - error = get_uint64(attrs, attr, val); - } - - if (error != 0) { - print_get_desc_attr_error(slice, "slice", attr, error); - } - - return (error); -} - -/* - * Set a slice attribute. The attribute is only set in the cached - * copy of the slice's nvpair attribute list. This function does - * NOT affect the underlying physical device. - */ -static int -slice_set_attribute( - dm_descriptor_t slice, - char *attr, - uint64_t val) -{ - nvlist_t *attrs = NULL; - int error = 0; - - if ((error = get_cached_attributes(slice, &attrs)) != 0) { - return (error); - } - - if (strcmp(attr, DM_INDEX) == 0) { - error = set_uint32(attrs, attr, (uint32_t)val); - } else if (strcmp(attr, DM_START) == 0) { - error = set_uint64(attrs, attr, val); - } else if (strcmp(attr, DM_SIZE) == 0) { - error = set_uint64(attrs, attr, val); - } else if (strcmp(attr, ATTR_DISK_FOR_SLICE) == 0) { - error = set_uint64(attrs, attr, val); - } - - if (error != 0) { - print_set_desc_attr_error(slice, "slice", attr, error); - } - - return (error); -} - -/* - * FUNCTION: virtual_slice_get_disk(dm_descriptor_t slice, - * dm_descriptor_t *diskp) - * - * INPUT: slice - a dm_descriptor_t virtual slice handle - * diskp - pointer to a dm_descriptor_t disk handle - * to return the slice's disk - * - * OUTPUT: the disk associated with the virtual slice. - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Helper which determines the disk that the input virtual - * slice "belongs" to. - * - * The virtual slice's disk is stored in the slice's nvpair - * attribute list when the slice gets created. - */ -static int -virtual_slice_get_disk( - dm_descriptor_t slice, - dm_descriptor_t *diskp) -{ - uint64_t disk = 0; - int error = 0; - - if ((error = slice_get_uint64_attribute( - slice, ATTR_DISK_FOR_SLICE, &disk)) != 0) { - return (error); - } - - *diskp = (dm_descriptor_t)disk; - - if (disk == 0) { - print_get_desc_attr_error(slice, "virtual slice", "disk", error); - return (-1); - } - - return (0); -} - -/* - * FUNCTION: slice_get_disk(dm_descriptor_t disk, dm_descriptor_t *diskp) - * - * INPUT: slice - a dm_descriptor_t handle for a slice - * - * OUTPUT: diskp - a pointer to a dm_descriptor_t to hold the - * disk associated with the input slice - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Helper which retrieves the disk for a slice device. - * - * A slice is actually connected to its disk thru an intermediate - * device known as the "media". The media concept exists to - * model drives with removeable disk media. For the purposes - * of layout, such devices aren't relevant and the intermediate - * media can mostly be ignored. - */ -int -slice_get_disk( - dm_descriptor_t slice, - dm_descriptor_t *diskp) -{ - dm_descriptor_t *media = NULL; - - int i = 0; - int error = 0; - - *diskp = 0; - - if (is_virtual_slice(slice)) { - return (virtual_slice_get_disk(slice, diskp)); - } - - media = dm_get_associated_descriptors(slice, DM_MEDIA, &error); - (void) add_descriptors_to_free(media); - - if (error != 0) { - print_get_assoc_desc_error(slice, gettext("media"), error); - } else if ((media == NULL) || (*media == NULL)) { - print_get_assoc_desc_error(slice, gettext("media"), error); - error = -1; - } - - if (error != 0) { - return (error); - } - - /* slice should have exactly 1 media */ - for (i = 0; (media[i] != NULL) && (*diskp == NULL); i++) { - /* get disk from media */ - dm_descriptor_t *disks = NULL; - disks = dm_get_associated_descriptors(media[i], DM_DRIVE, &error); - (void) add_descriptors_to_free(disks); - - if ((error == 0) && (disks != NULL) && (disks[0] != NULL)) { - *diskp = disks[0]; - } - free(disks); - } - - if (media != NULL) { - free(media); - } - - if (*diskp == 0) { - print_get_desc_attr_error(slice, - gettext("slice"), gettext("disk"), ENODEV); - error = -1; - } - - return (error); -} - -/* - * FUNCTION: slice_get_hbas(dm_descriptor_t slice, dlist_t **list) - * - * INPUT: slice - a dm_descriptor_t handle for a slice - * - * OUTPUT: list - a pointer to a dlist_t list to hold the - * HBAs associated with the input slice - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Helper which retrieves the known HBAs for a slice device. - * - */ -int -slice_get_hbas( - dm_descriptor_t slice, - dlist_t **list) -{ - dm_descriptor_t disk = NULL; - int error = 0; - - *list = NULL; - - ((error = slice_get_disk(slice, &disk)) != 0) || - (error = disk_get_hbas(disk, list)); - - if (*list == NULL) { - print_get_desc_attr_error(slice, "slice", "HBA", ENODEV); - error = -1; - } - - return (error); -} - -/* - * FUNCTION: disk_get_associated_desc(dm_descriptor_t disk, - * dm_desc_type_t assoc_type, char *assoc_type_str, - * dlist_t **list) - * - * INPUT: disk - a dm_descriptor_t handle for a disk - * assoc_type - the type of associated object to get - * assoc_type_str - a char * string for the associated type - * - * OUTPUT: list - a pointer to a dlist_t list to hold the - * objects associated with the input disk - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Helper which retrieves the associated objects of the - * requested type for a disk device. - */ -static int -disk_get_associated_desc( - dm_descriptor_t disk, - dm_desc_type_t assoc_type, - char *assoc_type_str, - dlist_t **list) -{ - int i = 0; - int error = 0; - - dm_descriptor_t *assoc = - dm_get_associated_descriptors(disk, assoc_type, &error); - - (void) add_descriptors_to_free(assoc); - - if (error == 0) { - for (i = 0; - (assoc != NULL) && (assoc[i] != NULL) && (error == 0); - i++) { - dlist_t *item = dlist_new_item((void *)(uintptr_t)assoc[i]); - if (item == NULL) { - error = ENOMEM; - } else { - *list = dlist_append(item, *list, AT_TAIL); - } - } - } else { - print_get_assoc_desc_error(disk, assoc_type_str, error); - } - - if (assoc != NULL) { - free(assoc); - } - - if (error != 0) { - dlist_free_items(*list, NULL); - *list = NULL; - } - - return (error); -} - -/* - * FUNCTION: disk_get_hbas(dm_descriptor_t disk, dlist_t **list) - * - * INPUT: disk - a dm_descriptor_t handle for a disk - * - * OUTPUT: list - a pointer to a dlist_t list to hold the - * HBAs associated with the input disk - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Helper which retrieves the known HBAs for a disk device. - * - */ -int -disk_get_hbas( - dm_descriptor_t disk, - dlist_t **list) -{ - return (disk_get_associated_desc(disk, DM_CONTROLLER, - gettext("controller"), list)); -} - -/* - * FUNCTION: disk_get_paths(dm_descriptor_t disk, dlist_t **list) - * - * INPUT: disk - a dm_descriptor_t handle for a disk - * - * OUTPUT: list - a pointer to a dlist_t list to hold the - * paths associated with the input disk - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Helper which retrieves the known paths for a disk device. - * - * Paths are managed by the MPXIO driver, they represent hardware - * paths to the disk drive managed by the MPXIO and not visible - * externally, unlike aliases which are. - */ -int -disk_get_paths( - dm_descriptor_t disk, - dlist_t **list) -{ - return (disk_get_associated_desc(disk, DM_PATH, - gettext("path"), list)); -} - -/* - * FUNCTION: disk_get_aliases(dm_descriptor_t disk, dlist_t **list) - * - * INPUT: disk - a dm_descriptor_t handle for a disk - * - * OUTPUT: list - a pointer to a dlist_t list to hold the - * alias descriptors associated with the input disk - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Helper which retrieves the known aliases for a disk device. - * - * Aliases are the different CTD names for the disk drive when - * MPXIO is not enabled for multipathed drives. - */ -int -disk_get_aliases( - dm_descriptor_t disk, - dlist_t **list) -{ - return (disk_get_associated_desc(disk, DM_ALIAS, - gettext("alias"), list)); -} - -/* - * FUNCTION: compare_string_to_desc_name_or_alias( - * void *str, void *desc) - * - * INPUT: str - opaque pointer - * descr - opaque pointer - * - * RETURNS: int - <0 - if str < desc.name - * 0 - if str == desc.name - * >0 - if str > desc.name - * - * PURPOSE: dlist_t helper which compares a string to the name - * and aliases associated with the input dm_descriptor_t - * handle. - * - * Comparison is done via compare_device_names. - */ -static int -compare_string_to_desc_name_or_alias( - void *str, - void *desc) -{ - char *dname = NULL; - int result = -1; - - assert(str != (char *)NULL); - assert(desc != (dm_descriptor_t)0); - - (void) get_display_name((uintptr_t)desc, &dname); - - /* try name first, then aliases */ - if ((result = compare_device_names(str, dname)) != 0) { - dlist_t *aliases = NULL; - - (void) get_aliases((uintptr_t)desc, &aliases); - if ((aliases != NULL) && (dlist_contains(aliases, - str, compare_device_names) == B_TRUE)) { - result = 0; - } - dlist_free_items(aliases, free); - } - - return (result); -} - -/* - * FUNCTION: hba_get_by_name(char *name, dm_descriptor_t *hba) - * - * INPUT: name - a char * disk name - * - * OUTPUT: hba - a pointer to a dm_descriptor_t to hold the - * HBA corresponding to the input name, if found - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Helper which iterates the known HBAs, searching for - * the one matching name. - * - * If no HBA matches the name, 0 is returned and the - * value of 'hba' will be (dm_descriptor_t)0; - */ -int -hba_get_by_name( - char *name, - dm_descriptor_t *hba) -{ - int error = 0; - dlist_t *list = NULL; - dlist_t *item = NULL; - - *hba = (dm_descriptor_t)0; - - if (name == NULL) { - return (0); - } - - if ((error = get_known_hbas(&list)) != 0) { - return (error); - } - - if ((item = dlist_find(list, name, - compare_string_to_desc_name_or_alias)) != NULL) { - *hba = (uintptr_t)item->obj; - } - - return (error); -} - -/* - * FUNCTION: disk_get_by_name(char *name, dm_descriptor_t *disk) - * - * INPUT: name - a char * disk name - * - * OUTPUT: disk - a pointer to a dm_descriptor_t to hold the - * disk corresponding to the input name, if found - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Helper which retrieves a dm_descriptor_t disk handle - * by name. - * - * If no disk is found for the input name, variations of - * the name are tried. - * - * If the input name is unqualified, an appropriate leading - * path is prepended. - * - * If the input name is qualified, the leading path is - * removed. - * - * If no disk is found for the variations, 0 is returned - * and the value of 'disk' will be (dm_descriptor_t)0; - */ -int -disk_get_by_name( - char *name, - dm_descriptor_t *disk) -{ - assert(name != (char *)NULL); - - *disk = find_cached_descriptor(name); - if (*disk == (dm_descriptor_t)0) { - if (name[0] == '/') { - /* fully qualified, try unqualified */ - char *cp = strrchr(name, '/'); - if (cp != NULL) { - *disk = find_cached_descriptor(cp + 1); - } - } else { - /* unqualified, try fully qualified */ - char buf[MAXNAMELEN+1]; - if (is_ctd_disk_name(name)) { - (void) snprintf(buf, MAXNAMELEN, "/dev/dsk/%s", name); - } else if (is_did_disk_name(name)) { - (void) snprintf(buf, MAXNAMELEN, "/dev/did/dsk/%s", name); - } - *disk = find_cached_descriptor(buf); - } - } - - /* - * since the descriptor cache includes HBAs, disks and slices, - * what gets returned may not be a disk... make sure it is - */ - if (*disk != (dm_descriptor_t)0) { - if (dm_get_type(*disk) != DM_DRIVE) { - *disk = (dm_descriptor_t)0; - } - } - - return (0); -} - -/* - * FUNCTION: slice_get_by_name(char *name, dm_descriptor_t *slice) - * - * INPUT: name - a char * slice name - * - * OUTPUT: slice - a pointer to a dm_descriptor_t to hold the - * slice corresponding to the input name, if found. - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Helper which iterates the known slices, searching for - * the one matching name. - * - * If no slice is found for the input name, variations of - * the name are tried. - * - * If the input name is unqualified, an appropriate leading - * path is prepended. - * - * If the input name is qualified, the leading path is - * removed. - * - * If no slice matches the variations, 0 is returned and the - * value of 'slice' will be (dm_descriptor_t)0; - */ -int -slice_get_by_name( - char *name, - dm_descriptor_t *slice) -{ - assert(name != (char *)NULL); - - *slice = find_cached_descriptor(name); - if (*slice == (dm_descriptor_t)0) { - if (name[0] == '/') { - /* fully qualified, try unqualified */ - char *cp = strrchr(name, '/'); - if (cp != NULL) { - *slice = find_cached_descriptor(cp + 1); - } - } else { - /* unqualified, try fully qualified */ - char buf[MAXNAMELEN+1]; - if (is_ctd_slice_name(name) || is_ctd_like_slice_name(name) || - is_bsd_like_slice_name(name)) { - (void) snprintf(buf, MAXNAMELEN, "/dev/dsk/%s", name); - } else if (is_did_slice_name(name)) { - (void) snprintf(buf, MAXNAMELEN, "/dev/did/dsk/%s", name); - } - *slice = find_cached_descriptor(buf); - } - } - - /* - * since the descriptor cache includes HBAs, disks and slices, - * what gets returned may not be a slice... make sure it is - */ - if (*slice != (dm_descriptor_t)0) { - if (dm_get_type(*slice) != DM_SLICE && - is_virtual_slice(*slice) != B_TRUE) { - *slice = (dm_descriptor_t)0; - } - } - - return (0); -} - -/* - * FUNCTION: extract_hbaname(char *name, char **hbaname) - * - * INPUT: slicename - a char * device name - * - * OUTPUT: hbaname - a pointer to a char * to hold the hbaname derived - * from the input name. - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Helper which extracts the HBA name from the input name. - * - * If the input name is in ctd form, extracts just the cX part, - * by truncating everything following the last 't'. - * - * Of course on X86, with IDE drives, there is no 't' in the - * ctd name, so start by truncating everything following 'd' - * and then look for 't'. - * - * The returned string must be passed to free(). - */ -int -extract_hbaname( - char *name, - char **hbaname) -{ - char *cp; - - if (is_ctd_name(name)) { - if ((*hbaname = strdup(name)) == NULL) { - return (ENOMEM); - } - if ((cp = strrchr(*hbaname, 'd')) != NULL) { - *cp = '\0'; - } - if ((cp = strrchr(*hbaname, 't')) != NULL) { - *cp = '\0'; - } - } - - return (0); -} - -/* - * FUNCTION: extract_diskname(char *slicename, char **diskname) - * - * INPUT: slicename - a char * slice name - * - * OUTPUT: diskname - a pointer to a char * to hold the diskname derived - * from the input slicename. - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Helper which extracts the disk's name from a slice name. - * - * Checks to see if the input slicename is in ctd or did form, - * and if so, truncates everything following the last 's'. - * - * If the input slicename is BSD-like, truncate the last - * character (a-h). - * - * The returned string must be passed to free(). - */ -int -extract_diskname( - char *slicename, - char **diskname) -{ - char *cp; - - if (is_ctd_slice_name(slicename) || is_did_slice_name(slicename) || - is_ctd_like_slice_name(slicename)) { - - if ((*diskname = strdup(slicename)) == NULL) { - return (ENOMEM); - } - if ((cp = strrchr(*diskname, 's')) != NULL) { - *cp = '\0'; - } - - } else if (is_bsd_like_slice_name(slicename)) { - - if ((*diskname = strdup(slicename)) == NULL) { - return (ENOMEM); - } - (*diskname)[strlen((*diskname)-1)] = '\0'; - - } - - return (0); -} - -/* - * FUNCTION: get_disk_for_named_slice(char *slicename, - * dm_descriptor_t disk) - * - * INPUT: slicename - a char * slice name - * - * OUTPUT: disk - a pointer to a dm_descriptor_t to hold the - * disk corresponding to the input name, if found - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Helper which locates the disk dm_descriptor_t handle for - * the input slice name. - * - * If no disk matches the name, 0 is returned and the - * value of 'disk' will be (dm_descriptor_t)0; - */ -int -get_disk_for_named_slice( - char *slicename, - dm_descriptor_t *disk) -{ - dm_descriptor_t slice = (dm_descriptor_t)0; - int error = 0; - - assert(slicename != NULL); - - /* find disk for slice */ - if ((error = slice_get_by_name(slicename, &slice)) == 0) { - - if (slice != (dm_descriptor_t)0) { - error = slice_get_disk(slice, disk); - } else { - /* named slice was created by layout: */ - /* need to find disk by name */ - char *dname; - - error = extract_diskname(slicename, &dname); - if (error == 0) { - error = disk_get_by_name(dname, disk); - } - free(dname); - } - } - - assert(*disk != (dm_descriptor_t)0); - - return (error); -} - -/* - * FUNCTION: disk_get_reserved_indexes(dm_descriptor_t disk, - * uint16_t **array) - * - * INPUT: disk - a dm_descriptor_t disk handle - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Retrieves the input disk's list of reserved slice indices. - * - * The list of reserved indices is stored as an array in - * the disk's nvpair attribute list. - */ -static int -disk_get_reserved_indexes( - dm_descriptor_t disk, - uint16_t **array) -{ - nvlist_t *attrs = NULL; - uint_t nelem = 0; - int error = 0; - - if ((error = get_cached_attributes(disk, &attrs)) != 0) { - return (error); - } - - if ((error = get_uint16_array( - attrs, ATTR_RESERVED_INDEX, array, &nelem)) != 0) { - if (error == ENOENT) { - /* no reserved indices yet */ - error = 0; - } - } - - return (error); -} - -/* - * FUNCTION: disk_reserve_index(dm_descriptor_t disk, uint16_t index) - * - * INPUT: disk - a disk dm_descirptor_t handle - * undex - a VTOC slice index - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Reserves the input VTOC slice index for the input disk. - * - * The list of reserved indices is stored as an array in - * the disk's nvpair attribute list. - */ -int -disk_reserve_index( - dm_descriptor_t disk, - uint16_t index) -{ - nvlist_t *attrs = NULL; - uint16_t *oldindexes = NULL; - uint16_t *newindexes = NULL; - uint_t nelem = 0; - int error = 0; - int i = 0; - - if ((error = get_cached_attributes(disk, &attrs)) != 0) { - return (error); - } - - if ((error = get_uint16_array( - attrs, ATTR_RESERVED_INDEX, &oldindexes, &nelem)) != 0) { - if (error != ENOENT) { - return (error); - } - /* no reserved indices yet */ - error = 0; - } - - /* add new index */ - newindexes = (uint16_t *)calloc(VTOC_SIZE, sizeof (uint16_t)); - if (newindexes != NULL) { - for (i = 0; i < nelem; i++) { - newindexes[i] = oldindexes[i]; - } - newindexes[(int)index] = 1; - - error = set_uint16_array(attrs, ATTR_RESERVED_INDEX, - newindexes, VTOC_SIZE); - - free(newindexes); - } else { - error = ENOMEM; - } - return (error); -} - -/* - * FUNCTION: disk_release_index(dm_descriptor_t disk, uint16_t index) - * - * INPUT: disk - a disk dm_descirptor_t handle - * undex - a VTOC slice index - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Releases the input VTOC slice index for the input disk. - * The index was previously reserved by disk_reserve_index() - */ -int -disk_release_index( - dm_descriptor_t disk, - uint16_t index) -{ - nvlist_t *attrs = NULL; - uint16_t *oldindexes = NULL; - uint16_t *newindexes = NULL; - uint_t nelem = 0; - int error = 0; - int i = 0; - - if ((error = get_cached_attributes(disk, &attrs)) != 0) { - return (error); - } - - if ((error = get_uint16_array( - attrs, ATTR_RESERVED_INDEX, &oldindexes, &nelem)) != 0) { - if (error != ENOENT) { - return (error); - } - error = 0; - } - - newindexes = (uint16_t *)calloc(VTOC_SIZE, sizeof (uint16_t)); - if (newindexes != NULL) { - for (i = 0; i < nelem; i++) { - newindexes[i] = oldindexes[i]; - } - - /* release index */ - newindexes[(int)index] = 0; - - error = set_uint16_array(attrs, ATTR_RESERVED_INDEX, - newindexes, VTOC_SIZE); - - free(newindexes); - } else { - error = ENOMEM; - } - - return (error); -} - -/* - * FUNCTION: print_get_assoc_desc_error(dm_descriptor_t desc, char *which, - * int error) - * - * INPUT: desc - a dm_descriptor_t handle - * which - a char * indicating which association - * error - an integer error value - * - * PURPOSE: Utility function to print an error message for a failed - * call to dm_get_associated_descriptors(). - * - * Extracts the device's CTD name and formats an error message. - */ -void -print_get_assoc_desc_error( - dm_descriptor_t desc, - char *which, - int error) -{ - char *name = ""; - - (void) get_display_name(desc, &name); - oprintf(OUTPUT_TERSE, - gettext("dm_get_associated_descriptors(%s) for " - "'%s' failed: %d\n"), - which, name, error); - - volume_set_error( - gettext("Unexpected error getting associated " - "descriptors for '%s'"), - name); -} - -/* - * FUNCTION: print_get_desc_attr_error(dm_descriptor_t desc, - * char *devtype, char *attr, int error) - * - * INPUT: desc - a dm_descriptor_t handle - * devtype - a char * device type that's being accessed - * attr - a char * attribute name - * error - an integer error value - * - * PURPOSE: Shared utility function to print an error message for a failed - * call to retrieve an attribute for a descriptor. - * - * Extracts the device's CTD name and formats an error message. - */ -void -print_get_desc_attr_error( - dm_descriptor_t desc, - char *devtype, - char *attr, - int error) -{ - char *name = ""; - - (void) get_display_name(desc, &name); - oprintf(OUTPUT_TERSE, - gettext("'%s' get attribute (%s.%s) error: %d\n"), - name, devtype, attr, error); - - volume_set_error( - gettext("Unexpected error getting attribute '%s.%s' for '%s'"), - devtype, attr, name); -} - -/* - * FUNCTION: print_set_desc_attr_error(dm_descriptor_t desc, - * char *devtype, char *attr, int error) - * - * INPUT: desc - a dm_descriptor_t handle - * devtype - a char * device type that's being accessed - * attr - a char * attribute name - * error - an integer error value - * - * PURPOSE: Shared utility function to print an error message for a failed - * call to set an attribute for a descriptor. - * - * Extracts the device's CTD name and formats an error message. - */ -void -print_set_desc_attr_error( - dm_descriptor_t desc, - char *devtype, - char *attr, - int error) -{ - char *name = ""; - - (void) get_display_name(desc, &name); - oprintf(OUTPUT_TERSE, - gettext("'%s' set attribute (%s.%s) error: %d\n"), - name, devtype, attr, error); - - volume_set_error( - gettext("Unexpected error setting attribute '%s.%s' for '%s'"), - devtype, attr, name); -} diff --git a/usr/src/cmd/lvm/metassist/layout/layout_device_util.h b/usr/src/cmd/lvm/metassist/layout/layout_device_util.h deleted file mode 100644 index d8970ac5d54f..000000000000 --- a/usr/src/cmd/lvm/metassist/layout/layout_device_util.h +++ /dev/null @@ -1,140 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _LAYOUT_DEVICE_UTIL_H -#define _LAYOUT_DEVICE_UTIL_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#ifdef __cplusplus -extern "C" { -#endif - -#include - -extern boolean_t is_alt_slice_name(char *name); -extern boolean_t is_did_name(char *name); -extern boolean_t is_did_slice_name(char *name); -extern boolean_t is_did_disk_name(char *name); -extern boolean_t is_ctd_name(char *name); -extern boolean_t is_ctd_slice_name(char *name); -extern boolean_t is_ctd_disk_name(char *name); -extern boolean_t is_ctd_target_name(char *name); -extern boolean_t is_ctd_ctrl_name(char *name); - -extern int set_display_name(dm_descriptor_t desc, char *name); -extern int get_display_name(dm_descriptor_t slice, char **name); - -extern int slice_get_by_name(char *name, dm_descriptor_t *slicep); -extern int disk_get_by_name(char *name, dm_descriptor_t *diskp); -extern int hba_get_by_name(char *name, dm_descriptor_t *hbap); - -extern int extract_diskname(char *slicename, char **diskname); -extern int extract_hbaname(char *slicename, char **hbaname); - -extern int get_disk_for_named_slice(char *slicename, - dm_descriptor_t *diskp); - -/* - * functions to manipulate devices - */ -extern int group_similar_hbas(dlist_t *hbas, dlist_t **list); -extern int hba_is_multiplex(dm_descriptor_t hba, boolean_t *bool); - -extern int hba_set_n_avail_disks(dm_descriptor_t hba, uint16_t val); -extern int hba_get_n_avail_disks(dm_descriptor_t hba, uint16_t *val); - -extern int hba_get_type(dm_descriptor_t hba, char **type); -extern int hba_is_fast(dm_descriptor_t hba, boolean_t *bool); -extern int hba_is_fast_20(dm_descriptor_t hba, boolean_t *bool); -extern int hba_is_fast_40(dm_descriptor_t hba, boolean_t *bool); -extern int hba_is_fast_80(dm_descriptor_t hba, boolean_t *bool); -extern int hba_supports_protocol( - dm_descriptor_t hba, char *attr, boolean_t *bool); -extern int hba_supports_wide(dm_descriptor_t hba, boolean_t *bool); - -extern int disk_get_available_slice_index( - dm_descriptor_t diskp, uint32_t *index); - -extern int disk_get_hbas(dm_descriptor_t disk, dlist_t **list); -extern int disk_get_paths(dm_descriptor_t disk, dlist_t **list); -extern int disk_get_slices(dm_descriptor_t disk, dlist_t **list); -extern int disk_get_aliases(dm_descriptor_t disk, dlist_t **list); -extern int disk_get_blocksize(dm_descriptor_t disk, uint64_t *val); -extern int disk_get_ncylinders(dm_descriptor_t disk, uint64_t *val); -extern int disk_get_size_in_blocks(dm_descriptor_t disk, uint64_t *val); -extern int disk_get_start_block(dm_descriptor_t disk, uint64_t *val); -extern int disk_get_nheads(dm_descriptor_t disk, uint64_t *val); -extern int disk_get_nsectors(dm_descriptor_t disk, uint64_t *val); -extern int disk_get_is_efi(dm_descriptor_t disk, boolean_t *val); -extern int disk_get_has_fdisk(dm_descriptor_t disk, boolean_t *val); -extern int disk_get_has_solaris_partition(dm_descriptor_t disk, - boolean_t *val); -extern int disk_get_is_online(dm_descriptor_t disk, boolean_t *val); -extern int disk_get_drive_type(dm_descriptor_t disk, uint32_t *val); -extern int disk_get_media_type(dm_descriptor_t disk, uint32_t *type); -extern int disk_reserve_index(dm_descriptor_t disk, uint16_t index); -extern int disk_release_index(dm_descriptor_t disk, uint16_t index); - -extern int slice_get_hbas(dm_descriptor_t slice, dlist_t **list); -extern int slice_get_disk(dm_descriptor_t slice, dm_descriptor_t *diskp); -extern int slice_get_size(dm_descriptor_t slice, uint64_t *val); -extern int slice_get_index(dm_descriptor_t slice, uint32_t *val); -extern int slice_get_size_in_blocks(dm_descriptor_t slice, uint64_t *val); -extern int slice_get_start_block(dm_descriptor_t slice, uint64_t *val); -extern int slice_get_start(dm_descriptor_t slice, uint64_t *val); - -extern int slice_set_size(dm_descriptor_t slice, uint64_t size); -extern int slice_set_size_in_blocks(dm_descriptor_t slice, uint64_t size); -extern int slice_set_start_block(dm_descriptor_t slice, uint64_t start); - -/* - * virtual slice utilities. - */ -extern int create_virtual_slices(dlist_t *unused); -extern int add_virtual_slice(char *name, uint32_t index, - uint64_t startblk, uint64_t sizeblks, dm_descriptor_t disk); - -extern void release_virtual_slices(); -extern int get_virtual_slices(dlist_t **list); -extern boolean_t is_virtual_slice(dm_descriptor_t slice); - -/* - * shared error output functions for dm_descriptor_t objects - */ -extern void print_get_assoc_desc_error( - dm_descriptor_t desc, char *which, int error); -extern void print_get_desc_attr_error( - dm_descriptor_t desc, char *devtype, char *attr, int error); - -extern void print_set_desc_attr_error( - dm_descriptor_t desc, char *devtype, char *attr, int error); - -#ifdef __cplusplus -} -#endif - -#endif /* _LAYOUT_DEVICE_UTIL_H */ diff --git a/usr/src/cmd/lvm/metassist/layout/layout_discovery.c b/usr/src/cmd/lvm/metassist/layout/layout_discovery.c deleted file mode 100644 index 5441dec41c2f..000000000000 --- a/usr/src/cmd/lvm/metassist/layout/layout_discovery.c +++ /dev/null @@ -1,2458 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include -#include -#include - -#include - -#define _LAYOUT_DISCOVERY_C - -#include "volume_dlist.h" -#include "volume_error.h" -#include "volume_nvpair.h" -#include "volume_output.h" - -#include "layout_device_cache.h" -#include "layout_device_util.h" -#include "layout_dlist_util.h" -#include "layout_discovery.h" -#include "layout_request.h" -#include "layout_slice.h" -#include "layout_svm_util.h" - -/* - * lists of device dm_descriptor_t handles discovered during - * the initial system probe. Lists are populated by - * discover_known_devices. - * - * "bad" slices are those that are known to libdiskmgt but - * cannot be accessed. An example would be a slice that has - * disappeared due to disk re-slicing: libdiskmgt may have a - * cached handle for it, but the slice no longer exists. - * - * "bad" disks are thoese that are known to libdiskmgt but - * cannot be accessed. An example would be a disk that has - * failed or has gone offline: libdiskmgt may have a cached - * handle for it, but the disk does not respond. - */ -static dlist_t *_bad_slices = NULL; -static dlist_t *_bad_disks = NULL; - -static dlist_t *_known_slices = NULL; -static dlist_t *_known_disks = NULL; -static dlist_t *_known_hbas = NULL; - -/* - * helper functions for building known device lists, used by - * discover_known_devices. - */ -static int generate_known_slices(dlist_t *disks, dlist_t **known, - dlist_t **bad); -static int generate_known_disks(dlist_t **known, dlist_t **bad); -static int generate_known_hbas(dlist_t *disks, dlist_t **known); -static int generate_known_hba_name( - dm_descriptor_t hba, - dm_descriptor_t alias, - dm_descriptor_t disk); - -static void print_known_devices(); -static void print_device_list(dlist_t *devices); - -/* - * lists of device dm_descriptor_t handles that are usable by layout. - * These devices must still pass the user specified available/unavailable - * filter before they're actually considered available. - * - * Lists are populated by discover_usable_devices. - */ -static dlist_t *_usable_slices = NULL; -static dlist_t *_usable_disks = NULL; -static dlist_t *_usable_hbas = NULL; - -/* - * private flag that remembers if any HBA is known to support MPXIO - */ -static boolean_t _mpxio_enabled = B_FALSE; - -/* - * The slice_class struct is used to group slices by usage class. - */ -typedef struct { - char *usage; /* usage description */ - dlist_t *sliceinfo; /* list with info about each slice with usage */ -} slice_class_t; - -#define USE_DISKSET "diskset" - -static int check_slice_usage( - char *dsname, - dm_descriptor_t slice, - dm_descriptor_t disk, - boolean_t *avail, - dlist_t **bad, - dlist_t **classes); - -static int check_svm_slice_usage( - char *dsname, - dm_descriptor_t slice, - dm_descriptor_t disk, - boolean_t *avail, - dlist_t **classes); - -static int save_slice_classification( - char *dsname, - dm_descriptor_t slice, - dm_descriptor_t disk, - char *usage, - char *usage_detail, - dlist_t **classes); - -static int generate_usable_disks_and_slices_in_local_set( - dlist_t **classes, - dlist_t **bad_disks, - dlist_t **usable_disks, - dlist_t **usable_slices); - -static int generate_usable_disks_and_slices_in_named_set( - char *dsname, - dlist_t **classes, - dlist_t **bad_slices, - dlist_t **usable_disks, - dlist_t **usable_slices); - -static int create_usable_slices( - dm_descriptor_t disk, - dlist_t *used, - dlist_t *unused, - dlist_t **usable); - -static int add_new_usable( - dm_descriptor_t disk, - uint64_t stblk, - uint64_t nblks, - dlist_t **next_unused, - dlist_t **usable); - -static int update_slice_attributes( - dm_descriptor_t slice, - uint64_t stblk, - uint64_t nblks, - uint64_t nbytes); - -static int generate_usable_hbas( - dlist_t *disks, - dlist_t **usable); - -static void print_usable_devices(); - -static void print_unusable_devices( - dlist_t *badslices, - dlist_t *baddisks, - dlist_t *usedslices); - -static char *get_slice_usage_msg( - char *usage); - -/* - * virtual slices... - */ -static int generate_virtual_slices( - dlist_t *avail_disks_local_set, - dlist_t **usable); - -/* - * multipathed disks have aliases, as do slices on those disks. - * these need to be tracked since the user may specify them. - * A multi-pathed disk is one connected to the system thru - * more than one physical HBA, each connection gets a distinct - * name in the device tree and they're all more or less equivalent. - * No indication as to how many possible physical connections a - * disk may have, so we pick an arbitrary number of aliases to - * support. There is nothing significant about this number, - * it just controls the number of alias slots that get allocated. - */ -#define MAX_ALIASES 8 - -/* - * attribute name for layout private information stored in - * device nvpair attribute lists. - */ -static char *ATTR_DEVICE_ALIASES = "layout_device_aliases"; - -static int compare_start_blocks( - void *desc1, void *desc2); - -static int compare_desc_display_names( - void *desc1, void *desc2); - -/* - * FUNCTION: is_mpxio_enabled() - * - * RETURNS: boolean_t - B_TRUE - if MPXIO appears enabled for the system - * B_FALSE - otherwise - * - * PURPOSE: returns the value of _mpxio_enabled which is set to B_TRUE - * during system configuration discovery if any of the knwon - * HBAs advertises itself as a "multiplex" controller. - */ -boolean_t -is_mpxio_enabled() -{ - return (_mpxio_enabled); -} - -/* - * FUNCTION: discover_known_devices() - * - * SIDEEFFECT: populates the module private lists of known devices - * (_known_slices, _known_disks, _known_hbas). - * - * All known devices will also have had their CTD - * short names inferred and stored. - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Load physical devices discovered thru libdiskmgt. - */ -int -discover_known_devices() -{ - int error = 0; - - oprintf(OUTPUT_TERSE, - gettext("\nScanning system physical " - "device configuration...\n")); - - /* initialize layout_device_cache */ - ((error = create_device_caches()) != 0) || - - (error = generate_known_disks(&_known_disks, &_bad_disks)) || - (error = generate_known_slices(_known_disks, &_known_slices, - &_bad_slices)) || - (error = generate_known_hbas(_known_disks, &_known_hbas)); - - if (error == 0) { - print_known_devices(); - } - - return (error); -} - -/* - * FUNCTION: release_known_devices() - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Unloads all state currently held for known - * physical devices. - */ -int -release_known_devices( - char *diskset) -{ - /* these lists are module private */ - dlist_free_items(_bad_slices, NULL); - dlist_free_items(_bad_disks, NULL); - dlist_free_items(_known_slices, NULL); - dlist_free_items(_known_disks, NULL); - dlist_free_items(_known_hbas, NULL); - - _bad_slices = NULL; - _bad_disks = NULL; - _known_slices = NULL; - _known_disks = NULL; - _known_hbas = NULL; - - /* clean up state kept in layout_device_cache */ - release_device_caches(); - - return (0); -} - -/* - * FUNCTION: discover_usable_devices(char *diskset) - * - * INPUT: diskset - a char * diskset name. - * - * SIDEEFFECT: Traverses the lists of known devices and populates the - * module private lists of usable devices (_usable_slices, - * _usable_disks, _usable_hbas), as well as the module - * private list of used slices. - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Process the known devices and determine which of them are - * usable for generating volumes in the specified diskset. - * - * The specified diskset's name cannot be NULL or 0 length. - */ -int -discover_usable_devices( - char *diskset) -{ - int error = 0; - - dlist_t *used_classes = NULL; - dlist_t *iter = NULL; - - if (diskset == NULL || diskset[0] == '\0') { - volume_set_error( - gettext("a diskset name must be specified in " - "the request\n")); - return (-1); - } - - oprintf(OUTPUT_TERSE, - gettext("\nDetermining usable physical devices " - "for disk set \"%s\"...\n"), - diskset); - - error = generate_usable_disks_and_slices_in_local_set( - &used_classes, &_bad_slices, &_usable_disks, &_usable_slices); - if (error == 0) { - - error = generate_usable_disks_and_slices_in_named_set( - diskset, &used_classes, &_bad_slices, &_usable_disks, - &_usable_slices); - if (error == 0) { - - error = generate_usable_hbas(_usable_disks, &_usable_hbas); - if (error == 0) { - - print_usable_devices(); - print_unusable_devices( - _bad_slices, _bad_disks, used_classes); - } - } - } - - /* - * free slice classification usage and lists, items are char* - * the used_classes structure is only filled in if verbose - * output was requested. - */ - for (iter = used_classes; iter != NULL; iter = iter->next) { - slice_class_t *class = (slice_class_t *)iter->obj; - free(class->usage); - dlist_free_items(class->sliceinfo, free); - } - - dlist_free_items(used_classes, free); - return (error); -} - -/* - * FUNCTION: release_usable_devices() - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Unloads all state currently held for usable - * physical devices. - */ -int -release_usable_devices() -{ - /* list items are shared with _known_XXX lists */ - - dlist_free_items(_usable_slices, NULL); - dlist_free_items(_usable_disks, NULL); - dlist_free_items(_usable_hbas, NULL); - - _usable_slices = NULL; - _usable_disks = NULL; - _usable_hbas = NULL; - - /* clean up state kept in layout_device_util */ - release_virtual_slices(); - - return (0); -} - -/* - * FUNCTION: get_known_slices(dlist_t **list) - * get_known_disks(dlist_t **list) - * get_known_hbas(dlist_t **list) - * - * OUTPUT: list - a dlist_t pointer to hold the returned list of - * devices. - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Public accessors for the module private lists of - * available devices. - */ -int -get_known_slices( - dlist_t **list) -{ - *list = _known_slices; - - return (0); -} - -int -get_known_disks( - dlist_t **list) -{ - *list = _known_disks; - - return (0); -} - -int -get_known_hbas( - dlist_t **list) -{ - *list = _known_hbas; - - return (0); -} - -/* make fully qualified DID device name */ -static char * -make_fully_qualified_did_device_name( - char *device) -{ - static char buf[MAXPATHLEN]; - - if (device != NULL && strrchr(device, '/') == NULL) { - (void) snprintf(buf, MAXPATHLEN-1, "%s/%s", - "/dev/did/dsk", device); - return (buf); - } - - return (device); -} - -/* - * FUNCTION: generate_known_disks(dlist_t **known, - * dlist_t **bad) - * - * INPUT: NONE - * - * OUTPUT: known - populated list of known disks - * bad - populated list of known bad disks - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Does the system configuration discovery to determine - * what disks are known to be attached to the system. - * - * Determines the CTD name for each disk and saves it. - */ -static int -generate_known_disks( - dlist_t **known, - dlist_t **bad) -{ - int i; - int error = 0; - dm_descriptor_t *ddp; - - ddp = dm_get_descriptors(DM_DRIVE, NULL, &error); - (void) add_descriptors_to_free(ddp); - - *known = NULL; - - if (error != 0) { - volume_set_error( - gettext("Error discovering system hardware configuration,\n" - "unable to communicate with libdiskmgt or diskmgtd.\n")); - return (-1); - } - - if ((ddp == NULL) || (ddp[0] == NULL)) { - volume_set_error(gettext("there are no known disks\n")); - return (-1); - } - - /* iterate all returned disks and add them to the known list */ - for (i = 0; (ddp[i] != NULL) && (error == 0); i++) { - dm_descriptor_t disk = (dm_descriptor_t)ddp[i]; - dlist_t *aliases = NULL; - uint32_t mtype = DM_MT_UNKNOWN; - uint32_t dtype = DM_DT_UNKNOWN; - boolean_t bad_disk = B_FALSE; - boolean_t online = B_TRUE; - -#if defined(i386) - /* on X86, disks must have a solaris FDISK partition */ - boolean_t solpart = B_FALSE; -#endif /* defined(i386) */ - - if (((error = disk_get_is_online(disk, &online)) == 0 && - online == B_FALSE) || error == ENODEV) { - /* if the disk is offline, report it as bad */ - bad_disk = B_TRUE; - error = 0; - } else - - if (error == 0 && - (((error = disk_get_media_type(disk, &mtype)) != 0) || - ((error = disk_get_drive_type(disk, &dtype)) != 0)) && - error == ENODEV) { - /* - * if any disk attribute access fails with ENODEV - * report it as bad - */ - bad_disk = B_TRUE; - error = 0; - } else { - - /* - * Determine whether disk is fixed by checking its - * drive type. If drive type is unknown, check media - * type. - */ - int isfixed = (dtype == DM_DT_FIXED || - (dtype == DM_DT_UNKNOWN && mtype == DM_MT_FIXED)); - - if (!isfixed) { - continue; /* ignore non-fixed disks */ - } - -#if defined(i386) - if (((error = disk_get_has_solaris_partition(disk, - &solpart)) != 0) || (solpart != B_TRUE)) { - - /* X86 drive has no solaris partition, report as bad */ - oprintf(OUTPUT_DEBUG, - gettext("%s has no solaris FDISK partition.\n")); - - bad_disk = B_TRUE; - } -#endif /* defined(i386) */ - - } - - if (bad_disk) { - /* remember bad disks and continue */ - if (dlist_contains(*bad, (void *)(uintptr_t)disk, - compare_descriptor_names) != B_TRUE) { - dlist_t *item = dlist_new_item((void *)(uintptr_t)disk); - if (item == NULL) { - error = ENOMEM; - } else { - *bad = dlist_append(item, *bad, AT_TAIL); - } - } - continue; - } - - /* get disk name and multipath aliases */ - if ((error = disk_get_aliases(disk, &aliases)) == 0) { - dlist_t *iter; - boolean_t disk_name_set = B_FALSE; - - for (iter = aliases; - (iter != NULL) && (error == 0); - iter = iter->next) { - - dm_descriptor_t ap = (uintptr_t)iter->obj; - char *alias; - - if ((error = get_name(ap, &alias)) == 0) { - /* save first alias as display name */ - if (disk_name_set != B_TRUE) { - /* make sure DID disk alias is fully qualified */ - - if (is_did_disk_name(alias) == B_TRUE) { - char *qual_name = - make_fully_qualified_did_device_name(alias); - - set_display_name(disk, qual_name); - oprintf(OUTPUT_DEBUG, - gettext("DID disk name: %s\n"), - qual_name); - } else { - set_display_name(disk, alias); - oprintf(OUTPUT_DEBUG, - gettext("disk name: %s\n"), - alias); - } - disk_name_set = B_TRUE; - - } else { - /* save others as aliases */ - set_alias(disk, alias); - oprintf(OUTPUT_DEBUG, - gettext(" alias: %s\n"), - alias); - } - } - } - - dlist_free_items(aliases, NULL); - } - - if (error == 0) { - dlist_t *item = dlist_new_item((void *)(uintptr_t)disk); - if (item == NULL) { - error = ENOMEM; - } else { - *known = - dlist_insert_ordered(item, *known, - ASCENDING, compare_desc_display_names); - } - } - } - - if (ddp != NULL) { - free(ddp); - } - - return (error); -} - -/* - * FUNCTION: generate_known_slices(dlist_t *disks, - * dlist_t **known, dlist_t **bad) - * - * OUTPUT: disks - a pointer to a list of known disks - * known - a pointer to a dlist_t list to hold the known slices - * bad - a pointer to a dlist_t to hold the bad slices - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Examines input list of known disks and determines the slices - * attached to each. - * - * Some slices returned from libdiskmgt may not really exist, - * this is detected when trying to get more information about - * the slice -- ENODEV is returned. Any such slices will be - * added to the bad slice list. - */ -static int -generate_known_slices( - dlist_t *disks, - dlist_t **known, - dlist_t **bad) -{ - dlist_t *iter; - int error = 0; - - /* iterate list of disks and add their slices to the known list */ - for (iter = disks; (iter != NULL) && (error == 0); iter = iter->next) { - - dm_descriptor_t disk = (uintptr_t)iter->obj; - dlist_t *slices = NULL; - dlist_t *iter1; - char *dname = NULL; - boolean_t disk_ctd_alias_derived = B_FALSE; - - if (((error = disk_get_slices(disk, &slices)) != 0) || - ((error = get_display_name(disk, &dname)) != 0)) { - continue; - } - - for (iter1 = slices; - (iter1 != NULL) && (error == 0); - iter1 = iter1->next) { - - dm_descriptor_t slice = (uintptr_t)iter1->obj; - uint32_t index = 0; - nvlist_t *attrs = NULL; - char *sname = NULL; - - if (((error = get_name(slice, &sname)) != 0) || - ((error = slice_get_index(slice, &index)) != 0) || - ((error = get_cached_attributes(slice, &attrs)) != 0)) { - - if (error == ENODEV) { - /* bad slice, remember it and continue */ - dlist_t *item = - dlist_new_item((void *)(uintptr_t)slice); - if (item == NULL) { - error = ENOMEM; - } else { - *bad = dlist_insert_ordered( - item, *bad, - ASCENDING, compare_descriptor_names); - error = 0; - } - } - continue; - } - - if ((error == 0) && (is_did_slice_name(sname) == B_TRUE) && - (disk_ctd_alias_derived == B_FALSE)) { - /* BEGIN CSTYLED */ - /* - * If the slice name is a DID name, get the local CTD - * name for slice, extract the disk name and add it as - * an alias for the disk. - * - * This is the only way to derive the CTD alias for the - * disk when DID is enabled. - * - * The disk_ctd_alias_derived flag ensure the disk's - * CTD alias is only set once. - * - * The slice's CTD aliases are then derived from the - * disk's CTD alias in the normal, non-DID name processing - * which happens below. - */ - /* END CSTYLED */ - char *local = NULL; - if ((error = nvlist_lookup_string(attrs, DM_LOCALNAME, - &local)) != 0) { - if (error == ENOENT) { - /* no local name -> no DID */ - error = 0; - } - } else { - char *localdisk = NULL; - char *diskonly = NULL; - if ((error = extract_diskname(local, - &localdisk)) == 0) { - if ((diskonly = strrchr(localdisk, '/')) != NULL) { - ++diskonly; - } else { - diskonly = localdisk; - } - oprintf(OUTPUT_DEBUG, - gettext(" set DID disk CTD alias: %s\n"), - diskonly); - error = set_alias(disk, diskonly); - free(localdisk); - disk_ctd_alias_derived = B_TRUE; - } - } - } - - /* derive slice display name from disk's display name */ - if (error == 0) { - if ((error = make_slicename_for_diskname_and_index( - dname, index, &sname)) == 0) { - error = set_display_name(slice, sname); - } - } - - /* set slice aliases using disk aliases */ - if (error == 0) { - dlist_t *aliases = NULL; - if ((error = get_aliases(disk, &aliases)) == 0) { - - dlist_t *iter2 = aliases; - for (; (iter2 != NULL) && (error == 0); - iter2 = iter2->next) { - - char *dalias = (char *)iter2->obj; - char *salias = NULL; - - if ((error = make_slicename_for_diskname_and_index( - dalias, index, &salias)) == 0) { - error = set_alias(slice, salias); - free(salias); - } - } - dlist_free_items(aliases, free); - } - } - - if (error == 0) { - dlist_t *item = dlist_new_item((void *)(uintptr_t)slice); - if (item == NULL) { - error = ENOMEM; - } else { - *known = - dlist_insert_ordered( - item, *known, - ASCENDING, compare_desc_display_names); - } - } - } - - dlist_free_items(slices, NULL); - } - - return (error); -} - -/* - * FUNCTION: generate_known_hbas(dlist_t *disks, dlist_t **known) - * - * INPUT: diskset - a char * diskset name. - * - * OUTPUT: populates the list of known HBAs. - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Examines known disk list and derives the list of known HBAs. - * - * Determines the CTD name for an HBA and saves it. - */ -static int -generate_known_hbas( - dlist_t *disks, - dlist_t **known) -{ - dlist_t *iter; - int error = 0; - - /* - * for each known disk follow its HBA connections and - * assemble the list of known HBAs. - */ - for (iter = disks; - (iter != NULL) && (error == 0); - iter = iter->next) { - - dm_descriptor_t disk = (uintptr_t)iter->obj; - dlist_t *hbas = NULL; - dlist_t *iter2 = NULL; - dlist_t *iter3 = NULL; - dlist_t *aliases = NULL; - char *dname = NULL; - - ((error = get_display_name(disk, &dname)) != 0) || - (error = disk_get_aliases(disk, &aliases)) || - (error = disk_get_hbas(disk, &hbas)); - - if (error == 0) { - - if ((hbas == NULL) || (dlist_length(hbas) == 0)) { - - oprintf(OUTPUT_DEBUG, - gettext("Disk %s has no HBA/Controller?!\n"), - dname); - error = -1; - - dlist_free_items(hbas, NULL); - dlist_free_items(aliases, NULL); - - continue; - } - - for (iter2 = hbas, iter3 = aliases; - iter2 != NULL && iter3 != NULL; - iter2 = iter2->next, iter3 = iter3->next) { - - dm_descriptor_t hba = (uintptr_t)iter2->obj; - dm_descriptor_t alias = (uintptr_t)iter3->obj; - dlist_t *item = NULL; - - /* scan list of known HBAs and see if known */ - if (dlist_contains(*known, (void*)(uintptr_t)hba, - compare_descriptor_names) == B_TRUE) { - /* known HBA */ - continue; - } - - /* see if HBA supports MPXIO */ - if ((error == 0) && (_mpxio_enabled != B_TRUE)) { - hba_is_multiplex(hba, &_mpxio_enabled); - } - - /* generate a CTD name for HBA */ - error = generate_known_hba_name(hba, alias, disk); - if (error == 0) { - /* add to known HBA list */ - if ((item = dlist_new_item((void *)(uintptr_t)hba)) == - NULL) { - error = ENOMEM; - } else { - *known = - dlist_insert_ordered(item, *known, - ASCENDING, compare_desc_display_names); - } - } - } - } - - dlist_free_items(aliases, NULL); - dlist_free_items(hbas, NULL); - } - - return (error); -} - -/* - * FUNCTION: generate_known_hba_name(dm_descriptor_t hba, - * dm_descriptor_t alias, char *diskname) - * - * INPUT: hba - a dm_descriptor_t HBA handle. - * alias - a dm_descriptor_t disk alias handle. - * diskname - a char * disk name - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Sets the CTD name for the input HBA. - * - * The CTD name for the HBA is generated from the input - * disk alias (ex: cXdXtXsX) or from the disk name if - * the input alias is a DID name (ex: dX). - */ -static int -generate_known_hba_name( - dm_descriptor_t hba, - dm_descriptor_t alias, - dm_descriptor_t disk) -{ - char *hbaname = NULL; - char *aliasname = NULL; - int error = 0; - - ((error = get_name(alias, &aliasname)) != 0) || - (error = extract_hbaname(aliasname, &hbaname)); - if (error != 0) { - free(hbaname); - return (error); - } - - /* see if the input alias is a DID name... */ - if (is_did_disk_name(aliasname) == B_TRUE) { - - /* look for a non-DID name in disk's aliases */ - dlist_t *aliases = NULL; - error = get_aliases(disk, &aliases); - - for (; (error == 0) && (aliases != NULL); - aliases = aliases->next) { - - aliasname = (char *)aliases->obj; - if (is_did_disk_name(aliasname) != B_TRUE) { - /* this is the "local" CTD name generated by */ - /* generate_known_disks() above */ - error = extract_hbaname(aliasname, &hbaname); - if ((error == 0) && (hbaname != NULL)) { - set_display_name(hba, hbaname); - break; - } - } - } - dlist_free_items(aliases, free); - - } else { - /* use whatever was derived from the alias name */ - set_display_name(hba, hbaname); - } - - return (error); -} - -/* - * FUNCTION: print_known_devices() - * - * PURPOSE: Print out the known devices. - * - * Iterates the lists of known slices, disks and HBAs - * and prints out their CTD and device names. - */ -static void -print_known_devices( - char *diskset) -{ - int i = 0; - struct { - char *msg; - dlist_t *list; - } devs[3]; - - devs[0].msg = gettext("HBA/Controllers"); - devs[0].list = _known_hbas; - devs[1].msg = gettext("disks"); - devs[1].list = _known_disks; - devs[2].msg = gettext("slices"); - devs[2].list = _known_slices; - - for (i = 0; i < 3; i++) { - - oprintf(OUTPUT_VERBOSE, - gettext("\n These %s are known:\n\n"), - devs[i].msg); - - print_device_list(devs[i].list); - } -} - -/* - * FUNCTION: get_usable_slices(dlist_t **list) - * - * OUTPUT: list - a dlist_t pointer to hold the returned list of - * devices. - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Public accessors the the modules private lists of - * available devices. - * - * The functions are keyed by diskset name in the event - * objects in different disksets are loaded concurrently. - */ -int -get_usable_slices( - dlist_t **list) -{ - *list = _usable_slices; - - return (0); -} - -int -get_usable_disks( - dlist_t **list) -{ - *list = _usable_disks; - - return (0); -} - -int -get_usable_hbas( - dlist_t **list) -{ - *list = _usable_hbas; - - return (0); -} - -/* - * FUNCTION: generate_usable_disks_and_slices_in_local_set(dlist_t **classes, - * dlist_t **bad_disks, dlist_t **usable_disks, - * dlist_t **usable_slices) - * - * OUTPUT: used_classes - a pointer to a list of slice_class_t structs - * updated with known slices that have detected uses - * added to the correct class'e list of slices. - * bad_disks - a pointer to a list of bad/unusable disks updated - * with any bad disks that were detected - * useable_disks - a pointer to a list of usable disks - * useable_slices - a pointer to a list of usable slices - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Scans the disks in the local set to determine which are - * usable during layout processing. - * - * Determines which are usable by layout using usages detected - * by libdiskmgt. - */ -static int -generate_usable_disks_and_slices_in_local_set( - dlist_t **classes, - dlist_t **bad_slices, - dlist_t **usable_disks, - dlist_t **usable_slices) -{ - char *dsname = MD_LOCAL_NAME; - dlist_t *disks; - dlist_t *iter; - int error; - - /* Get disks in local set */ - error = get_disks_in_diskset(dsname, &disks); - if (error != 0) { - return (error); - } - - /* For each disk in this set... */ - for (iter = disks; iter != NULL && error == 0; iter = iter->next) { - dm_descriptor_t disk = (uintptr_t)iter->obj; - dlist_t *slices; - - /* Get slices on this disk */ - error = disk_get_slices(disk, &slices); - if (error == 0) { - dlist_t *iter2; - - /* - * Assume disk is available until a bad or unavailable - * slice is found - */ - boolean_t avail = B_TRUE; - boolean_t bad_disk = B_FALSE; - - /* For each slice on this disk... */ - for (iter2 = slices; - iter2 != NULL && error == 0 && - avail == B_TRUE && bad_disk == B_FALSE; - iter2 = iter2->next) { - - dm_descriptor_t slice = (uintptr_t)iter2->obj; - dlist_t *bad_slices_on_this_disk = NULL; - - /* Is this slice available? */ - error = check_slice_usage(dsname, slice, - disk, &avail, &bad_slices_on_this_disk, classes); - - /* Is the slice bad (inaccessible)? */ - if (error != 0 && bad_slices_on_this_disk != NULL) { - bad_disk = B_TRUE; - *bad_slices = dlist_append_list( - *bad_slices, bad_slices_on_this_disk); - } - } - - /* Is the disk available? */ - if (error == 0 && bad_disk == B_FALSE && avail == B_TRUE) { - error = dlist_append_object( - (void *)(uintptr_t)disk, usable_disks, AT_TAIL); - } - - dlist_free_items(slices, NULL); - } - } - - dlist_free_items(disks, NULL); - - if (error == 0) { - /* BEGIN CSTYLED */ - /* - * Now reslice usable disks in the local set to - * simulate the slices they'll have when they're added - * to the named disk set, and add these resulting - * virtual slices to the list of available slices. - */ - /* END CSTYLED */ - error = generate_virtual_slices(*usable_disks, usable_slices); - } - - return (error); -} - -/* - * FUNCTION: generate_virtual_slices(dlist_t *unused, dlist_t **usable) - * - * INPUT: slice_classes - a list of unused slice dm_descriptor_t handles. - * - * OUTPUT: usable - pointer to the list of usable slices, updated - * with any created virtual slices. - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Helper which creates virtual slices for each disk which - * could be added to a diskset if necessary... - * - * Search the input list of slice classes for the entry - * containing slices known to be available for use by layout. - * - * Iterate the list of unused slices and determine the set - * of unique disks. - * - * For each unique disk, create virtual slice descriptors to - * represent those that will exist if/when the disk is added - * to the diskset. - * - * Add theese virtual slices to the list of usable slices. - */ -static int -generate_virtual_slices( - dlist_t *avail_disks_local_set, - dlist_t **usable) -{ - dlist_t *iter = NULL; - int error = 0; - - /* generate virtual slices */ - error = create_virtual_slices(avail_disks_local_set); - if (error == 0) { - - get_virtual_slices(&iter); - for (; (iter != NULL) && (error == 0); iter = iter->next) { - - dlist_t *item = dlist_new_item((void *) iter->obj); - if (item == NULL) { - error = ENOMEM; - } else { - *usable = - dlist_insert_ordered(item, *usable, - ASCENDING, compare_desc_display_names); - } - } - } - - return (error); -} - -/* - * FUNCTION: generate_usable_disks_and_slices_in_named_set(char *dsname, - * dlist_t **classes, dlist_t **bad_slices, - * dlist_t **usable_slices, dlist_t **usable_disks) - * - * INPUT: dsname - a char * diskset name. - * - * OUTPUT: classes - pointer to a list of slice_class_t structs, - * updated to include slices in the disk set with - * known uses. - * bad_slices - pointer to a list of bad/unusable slices, - * updated to include slices in the disk set that - * are inaccessible or no longer existent. - * usable_slices - pointer to a list of usable slices in the - * disk set. - * usable_disks - pointer to a list of usable disks in the - * disk set. - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: 1. determine the disks in the named disk set - * 2. determine the used slices on the disks - * 3. determine the unused slices on the disks - * 4. look for unused space on the disks and collect it - * into an existing unused slice, or create a new - * virtual slice. - */ -static int -generate_usable_disks_and_slices_in_named_set( - char *dsname, - dlist_t **classes, - dlist_t **bad_slices, - dlist_t **usable_disks, - dlist_t **usable_slices) -{ - dlist_t *disks = NULL; - dlist_t *iter = NULL; - int error = 0; - - error = get_disks_in_diskset(dsname, &disks); - if (error != 0) { - return (error); - } - - /* For each disk... */ - for (iter = disks; - iter != NULL && error == 0; - iter = iter->next) { - - dm_descriptor_t disk = (uintptr_t)iter->obj; - dlist_t *iter2; - dlist_t *slices = NULL; - dlist_t *bad_slices_on_this_disk = NULL; - dlist_t *used_slices_on_this_disk = NULL; - dlist_t *unused_slices_on_this_disk = NULL; - boolean_t bad_disk = B_FALSE; - - error = disk_get_slices(disk, &slices); - if (error != 0) { - break; - } - - /* Determine the used, unused, and bad slices on the disk */ - - /* For each slice... */ - for (iter2 = slices; - iter2 != NULL && error == 0 && bad_disk == B_FALSE; - iter2 = iter2->next) { - - dm_descriptor_t slice = (uintptr_t)iter2->obj; - - boolean_t rsvd = B_FALSE; - boolean_t avail = B_FALSE; - - /* Get slice usage */ - if (((error = is_reserved_slice(slice, &rsvd)) == 0) && - ((error = check_slice_usage(dsname, slice, disk, &avail, - &bad_slices_on_this_disk, classes)) == 0)) { - - /* Is the slice bad (inaccessible)? */ - if (bad_slices_on_this_disk != NULL) { - *bad_slices = dlist_append_list( - *bad_slices, bad_slices_on_this_disk); - /* - * Since one slice on this disk is bad, don't - * use any slices on this disk - */ - bad_disk = B_TRUE; - } else { - - dlist_t *item = - dlist_new_item((void *)(uintptr_t)slice); - if (item == NULL) { - error = ENOMEM; - } else { - /* Add slice to used/unused list as appropriate */ - if (avail == B_TRUE && rsvd == B_FALSE) { - unused_slices_on_this_disk = dlist_append( - item, unused_slices_on_this_disk, AT_TAIL); - } else { - used_slices_on_this_disk = - dlist_insert_ordered(item, - used_slices_on_this_disk, - ASCENDING, compare_start_blocks); - } - } - } - } - } - - /* Done iterating slices */ - - if (error == 0 && bad_disk == B_FALSE) { - /* For each unused slice... */ - for (iter2 = unused_slices_on_this_disk; - iter2 != NULL && error == 0; - iter2 = iter2->next) { - - dm_descriptor_t slice = (uintptr_t)iter2->obj; - error = update_slice_attributes(slice, 0, 0, 0); - - /* Only do this once */ - if (error == 0 && iter2 == unused_slices_on_this_disk) { - error = add_modified_disk(NULL, disk); - } - } - - if (error == 0) { - /* Create usable slices from the used/unused slice lists */ - error = create_usable_slices(disk, used_slices_on_this_disk, - unused_slices_on_this_disk, usable_slices); - if (error == 0) { - error = dlist_append_object((void *)(uintptr_t)disk, - usable_disks, AT_TAIL); - } - } - } - - dlist_free_items(slices, NULL); - dlist_free_items(used_slices_on_this_disk, NULL); - dlist_free_items(unused_slices_on_this_disk, NULL); - } - - return (error); -} - -/* - * FUNCTION: create_usable_slices(dm_descriptor_t disk, dlist_t *used, - * dlist_t *unused, dlist_t **usable); - * - * INPUT: disk - a dm_descriptor_t disk handle - * used - pointer to a list of pvt_t structs - * representing existing used slices - * on the input disk. - * unused - pointer to a list of pvt_t structs - * representing existing unused slices - * on the input disk. - * - * OUTPUT: usable - pointer to a list of pvts representing slices - * which can be used for new volume layouts. - * - * Slices in this list have any available space on the - * disk collected into the fewest, lowest indexed slices - * possible. - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: helper for generate_usable_slices_and_disks_in_diskset() which - * turns any detected free space on the input disk into one or - * more slices. - */ -static int -create_usable_slices( - dm_descriptor_t disk, - dlist_t *used, - dlist_t *unused, - dlist_t **usable) -{ - dlist_t *iter; - int error = 0; - boolean_t first = B_TRUE; - dlist_t *next_unused = unused; - - char *dname = NULL; - uint64_t disk_firstblk = 0; - uint64_t disk_nblks = 0; - uint64_t disk_endblk = 0; - - oprintf(OUTPUT_DEBUG, - gettext("\n create_usable_slices for disk\n")); - - /* get necessary info about disk: */ - error = get_display_name(disk, &dname); - if (error != 0) { - return (error); - } - - /* disk start block is first usable block */ - error = disk_get_start_block(disk, &disk_firstblk); - if (error != 0) { - return (error); - } - - /* disk size determines last usable disk block */ - error = disk_get_size_in_blocks(disk, &disk_nblks); - if (error != 0) { - return (error); - } - - disk_endblk = disk_firstblk + disk_nblks - 1; - - /* search for gaps before, between and after used slices */ - for (iter = used; iter != NULL && error == 0; iter = iter->next) { - - dm_descriptor_t cur = (uintptr_t)iter->obj; - - uint64_t cur_stblk = 0; - uint64_t cur_nblks = 0; - uint64_t cur_endblk = 0; - uint32_t cur_index = 0; - - uint64_t new_stblk = 0; - uint64_t new_endblk = 0; - - char *sname = NULL; - (void) get_display_name(cur, &sname); - - if (((error = slice_get_index(cur, &cur_index)) != 0) || - ((error = slice_get_start_block(cur, &cur_stblk)) != 0) || - ((error = slice_get_size_in_blocks(cur, &cur_nblks)) != 0)) { - continue; - } - - cur_endblk = cur_stblk + cur_nblks - 1; - - oprintf(OUTPUT_DEBUG, - gettext(" used slice %d (%10llu to %10llu)\n"), - cur_index, cur_stblk, cur_endblk); - - if (first == B_TRUE) { - /* first slice: make sure it starts at disk_firstblk */ - first = B_FALSE; - if (cur_stblk != disk_firstblk) { - /* close gap at beginning of disk */ - new_stblk = disk_firstblk; - new_endblk = cur_stblk - 1; - - oprintf(OUTPUT_DEBUG, - gettext(" unused space before first " - "used slice\n")); - } - } - - if (iter->next != NULL) { - /* check for gap between slices */ - dm_descriptor_t next = (uintptr_t)iter->next->obj; - uint64_t next_stblk = 0; - uint32_t next_index = 0; - - if (((error = slice_get_start_block(next, &next_stblk)) == 0) && - ((error = slice_get_index(next, &next_index)) == 0)) { - if (cur_endblk != next_stblk - 1) { - /* close gap between slices */ - new_stblk = cur_endblk + 1; - new_endblk = next_stblk - 1; - - oprintf(OUTPUT_DEBUG, - gettext(" unused space between slices " - "%d and %d\n"), cur_index, next_index); - } - } - - } else { - /* last slice: make sure it includes last block on disk */ - if (cur_endblk != disk_endblk) { - /* close gap at end of disk */ - new_stblk = cur_endblk + 1; - new_endblk = disk_endblk; - - oprintf(OUTPUT_DEBUG, - gettext(" unused space after last slice " - "cur_endblk: %llu disk_endblk: %llu\n"), - cur_endblk, disk_endblk); - } - } - - if ((error == 0) && (new_endblk != 0)) { - error = add_new_usable(disk, new_stblk, - new_endblk - new_stblk + 1, &next_unused, usable); - } - } - - if (error != 0) { - dlist_free_items(*usable, free); - *usable = NULL; - } - - return (error); -} - -/* - * FUNCTION: add_new_usable(dm_descriptor_t disk, uint64_t stblk, - * uint64_t nblks, dlist_t **next_unused, - * dlist_t **usable); - * - * INPUT: disk - a dm_descriptor_t disk handle - * stblk - start block of the usable space - * nblks - number of usable blocks - * next_unused - pointer to the next unused slice - * - * OUTPUT: next_unused - updated pointer to the next unused slice - * usable - possibly updated pointer to a list of slices on - * the disk with usable space - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: helper for create_usable_slices() which turns free space - * on the input disk into a usable slice. - * - * If possible an existing unused slice will be recycled - * into a usable slice. If there are none, a new virtual - * slice will be created. - */ -static int -add_new_usable( - dm_descriptor_t disk, - uint64_t stblk, - uint64_t nblks, - dlist_t **next_unused, - dlist_t **usable) -{ - dm_descriptor_t new_usable = 0; - int error = 0; - - /* try to use an existing unused slice for the usable slice */ - if (*next_unused != NULL) { - new_usable = (uintptr_t)((*next_unused)->obj); - *next_unused = (*next_unused)->next; - - oprintf(OUTPUT_DEBUG, - gettext("\trecyling used slice into usable slice " - "start: %llu, end: %llu\n"), - stblk, stblk + nblks + 1); - } - - if (new_usable == NULL) { - /* no unused slices, try to make a new virtual slice */ - uint32_t index = UINT32_MAX; - error = disk_get_available_slice_index(disk, &index); - if ((error == 0) && (index != UINT32_MAX)) { - - char *dname = NULL; - error = get_display_name(disk, &dname); - if (error == 0) { - - char buf[MAXNAMELEN]; - (void) snprintf(buf, MAXNAMELEN-1, "%ss%d", dname, index); - error = add_virtual_slice(buf, index, 0, 0, disk); - if (error == 0) { - /* retrieve the virtual slice */ - error = slice_get_by_name(buf, &new_usable); - } - } - } - } - - if ((error == 0) && (new_usable != (dm_descriptor_t)0)) { - /* BEGIN CSTYLED */ - /* - * have an unused slice, update its attributes to reflect - * the usable space it represents - */ - /* END CSTYLED */ - uint64_t disk_blksz = 0; - error = disk_get_blocksize(disk, &disk_blksz); - if (error == 0) { - error = update_slice_attributes(new_usable, stblk, - nblks, nblks * disk_blksz); - if (error == 0) { - error = dlist_append_object( - (void *)(uintptr_t)new_usable, usable, AT_TAIL); - } - } - } - - return (error); -} - -/* - * FUNCTION: update_slice_attributes(dm_descriptor_t slice, uint64_t stblk, - * uint64_t nblks, uint64_t nbytes) - * - * INPUT: slice - a dm_descriptor_t slice handle - * stblk - start block of the usable space - * nblks - size of slice in blocks - * nbytes - size of slice in bytes - * - * SIDEEFFECT: adds a modification record for the slice. - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: utility which updates several slice attributes in one call. - */ -static int -update_slice_attributes( - dm_descriptor_t slice, - uint64_t stblk, - uint64_t nblks, - uint64_t nbytes) -{ - char *sname = NULL; - uint32_t index = 0; - int error = 0; - - if ((error = get_display_name(slice, &sname)) == 0) { - if ((error = slice_get_index(slice, &index)) == 0) { - if ((error = slice_set_start_block(slice, stblk)) == 0) { - if ((error = slice_set_size_in_blocks(slice, nblks)) == 0) { - if (nblks == 0) { - error = add_slice_to_remove(sname, index); - } else { - error = assemble_modified_slice((dm_descriptor_t)0, - sname, index, stblk, nblks, nbytes, NULL); - } - } - } - } - } - - return (error); -} - -/* - * FUNCTION: generate_usable_hbas(dlist_t *slices, - * dlist_t **usable) - * - * INPUT: disks - a list of usable disks. - * - * OUTPUT: usable - a populated list of usable HBAs. - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Examines usable disk list and derives the list of usable HBAs. - * - */ -static int -generate_usable_hbas( - dlist_t *disks, - dlist_t **usable) -{ - dlist_t *iter; - int error = 0; - - /* - * for each usable disk, follow its HBA connections and - * add them to the list of usable HBAs. - */ - for (iter = disks; (iter != NULL) && (error == 0); iter = iter->next) { - - dm_descriptor_t dp = NULL; - dlist_t *hbas = NULL; - dlist_t *iter2 = NULL; - - dp = (uintptr_t)iter->obj; - - error = disk_get_hbas(dp, &hbas); - if (error == 0) { - - for (iter2 = hbas; - (iter2 != NULL) && (error == 0); - iter2 = iter2->next) { - - dm_descriptor_t hba = (uintptr_t)iter2->obj; - dlist_t *item = NULL; - - /* scan list of usable HBAs and see if known */ - if (dlist_contains(*usable, (void*)(uintptr_t)hba, - compare_descriptor_names) == B_TRUE) { - /* known HBA, continue to next HBA/alias */ - continue; - } - - /* add this HBA to the usable list */ - if ((item = dlist_new_item((void *)(uintptr_t)hba)) == - NULL) { - error = ENOMEM; - } else { - *usable = - dlist_insert_ordered(item, *usable, - ASCENDING, compare_desc_display_names); - } - } - } - - dlist_free_items(hbas, NULL); - } - - return (error); -} - -/* - * FUNCTION: check_slice_usage(char *dsname, dm_descriptor_t slice, - * dm_descriptor_t disk, boolean_t *avail, - * dlist_t **bad, dlist_t **classes) - * - * INPUT: dsname - a char * diskset name. - * slice - a dm_descriptor_t handle for a known slices. - * disk - a dm_descriptor_t handle the slice's disk. - * - * OUTPUT: avail - a boolean_t to hold the slice's availability. - * bad - pointer to a list of bad/unusable slices, - * possibly updated if the input slice - * was determined to be inaccessible. - * classes - pointer to a list of slice_class_t structs, - * possibly updated to include the input slice - * if it has a known use. - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Handles the details of - * determining usage and/or availability of a single slice. - * - * Queries the device library for the input slice's detectable - * usage status. - * - * If the slice has a detected usage, its name is added to - * the appropriate slice_class_t list in the input list of - * slice classes, this is only done if verbose output was - * requested. - */ -static int -check_slice_usage( - char *dsname, - dm_descriptor_t slice, - dm_descriptor_t disk, - boolean_t *avail, - dlist_t **bad, - dlist_t **classes) -{ - boolean_t online = B_FALSE; - boolean_t used = B_FALSE; - nvlist_t *stats = NULL; - char *name = NULL; - char *used_by = NULL; - char *use_detail = NULL; - int error = 0; - - *avail = B_FALSE; - - if (((error = get_display_name(slice, &name)) != 0) || - (error = disk_get_is_online(disk, &online))) { - return (error); - } - - /* - * if the disk is known to be offline, skip getting status - * for the slice since it will just fail and return ENODEV. - */ - if (online != B_TRUE) { - error = ENODEV; - } else { - stats = dm_get_stats(slice, DM_SLICE_STAT_USE, &error); - } - - if (error != 0) { - if (error == ENODEV) { - dlist_t *item = dlist_new_item((void *)(uintptr_t)slice); - oprintf(OUTPUT_TERSE, - gettext("Warning: unable to get slice information " - "for %s, it will not be used.\n"), name); - - if (item == NULL) { - error = ENOMEM; - } else { - error = 0; - *bad = dlist_insert_ordered(item, *bad, ASCENDING, - compare_desc_display_names); - } - } else { - oprintf(OUTPUT_TERSE, - gettext("check_slice_usage: dm_get_stats for " - "%s failed %d\n"), - name, error); - } - - return (error); - } - - /* - * check if/how the slice is currently being used, - * device library provides this info in the nvpair_t list: - * - * stat_type is DM_SLICE_STAT_USE - * used_by: string (mount, svm, lu, vxvm, fs) - * used_name: string - * - */ - if (stats != NULL) { - error = get_string(stats, DM_USED_BY, &used_by); - if (error != 0) { - if (error == ENOENT) { - used_by = NULL; - error = 0; - } else { - oprintf(OUTPUT_TERSE, - gettext("check_slice_usage: dm_get_stats.%s for " - "%s failed %d\n"), - DM_USED_BY, name, error); - } - } - - if (error == 0) { - error = get_string(stats, DM_USED_NAME, &use_detail); - if (error != 0) { - if (error == ENOENT) { - use_detail = NULL; - error = 0; - } else { - oprintf(OUTPUT_TERSE, - gettext("check_slice_usage: " - "dm_get_stats.%s for " - "%s failed %d\n"), - DM_USED_NAME, name, error); - } - } - } - } - - if ((error == 0) && (used_by != NULL) && (used_by[0] != '\0')) { - - /* was detected usage SVM? */ - if (string_case_compare(used_by, DM_USE_SVM) == 0) { - - /* check use_detail, it is in the form diskset:name */ - if (strncmp("diskset:", use_detail, 8) == 0) { - - /* check disk set name */ - char *str = strrchr(use_detail, ':'); - if ((str != NULL) && - (string_case_compare(str+1, dsname) == 0)) { - - /* slice in the right diskset */ - error = check_svm_slice_usage( - dsname, slice, disk, &used, classes); - - } else { - - /* slice in other diskset */ - save_slice_classification( - dsname, slice, disk, used_by, use_detail, - classes); - used = B_TRUE; - } - - } else { - - /* slice is volume component */ - save_slice_classification( - dsname, slice, disk, used_by, use_detail, - classes); - used = B_TRUE; - } - - } else { - - /* save usage */ - save_slice_classification( - dsname, slice, disk, used_by, use_detail, - classes); - used = B_TRUE; - } - } - - nvlist_free(stats); - - if (error == 0) { - if (used == B_TRUE) { - *avail = B_FALSE; - } else { - *avail = B_TRUE; - } - } - - return (error); -} - -/* - * FUNCTION: check_svm_slice_usage(char *dsname, dm_descriptor_t slice, - * dm_descriptor_t disk, boolean_t *used, - * dlist_t **classes) - * - * INPUT: dsname - a char * diskset name. - * slice - a dm_descriptor_t handle for a known slices. - * disk - a dm_descriptor_t handle the slice's disk. - * - * OUTPUT: used - a boolean_t to hold the slice usage status. - * classes - pointer to a list of slice_class_t possibly updated - * with the input slice's SVM specific usage - * classification. - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Handles the finer details of - * a single slice is being used in the context of SVM. - * - * Currently, one thing is checked: - * - * 1. determine if the slice is reserved for metadb replicas. - * The convention for disks in disksets is that a single slice - * (index 6 or 7) is set aside for metadb replicas. - * - * If this condition does not hold, the slice is considered - * available for use by layout and 'used' is set to B_FALSE. - */ -static int -check_svm_slice_usage( - char *dsname, - dm_descriptor_t slice, - dm_descriptor_t disk, - boolean_t *used, - dlist_t **classes) -{ - boolean_t is_replica = B_FALSE; - uint32_t index = 0; - char *diskname = NULL; - int error = 0; - - ((error = slice_get_index(slice, &index)) != 0) || - (error = get_display_name(disk, &diskname)) || - (error = is_reserved_replica_slice_index( - dsname, diskname, index, &is_replica)); - - if (error == 0) { - if (is_replica == B_TRUE) { - /* is replica slice -> used */ - save_slice_classification(dsname, slice, disk, DM_USE_SVM, - gettext("reserved for metadb replicas"), classes); - *used = B_TRUE; - } else { - *used = B_FALSE; - } - } - - return (error); -} - -/* - * FUNCTION: save_slice_classification(char *dsname, dm_descriptor_t slice, - * dm_descriptor_t disk, char *used_by, char *usage_detail, - * dlist_t **classes) - * - * INPUT: dsname - a char * disk set name - * slice - a dm_descriptor_t slice handle. - * disk - a dm_descriptor_t handle for the slice's disk. - * used_by - a char * usage classification. - * usage_detail - a char * usage description for the slice. - * - * OUTPUT: classes - a list of slice_class_t updated to hold a usage - * entry for the input slicexs. - * - * SIDEEFFECT: adds the input slice to the list of known, used slices. - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Adds an entry to the - * appropriate slice_class_t list of slices. If there is - * not an appropriate slice_class_t entry in the input list - * of classes, one is added. - * - * As a performance optimization the slice usage classification - * information is only saved if verbose output was requested by - * the user. - */ -static int -save_slice_classification( - char *dsname, - dm_descriptor_t slice, - dm_descriptor_t disk, - char *usage, - char *usage_detail, - dlist_t **classes) -{ - int error = 0; - - error = add_used_slice(slice); - - if ((error == 0) && (get_max_verbosity() >= OUTPUT_VERBOSE)) { - - dlist_t *iter; - dlist_t *item; - slice_class_t *class = NULL; - - /* locate class struct matching 'usage' */ - for (iter = *classes; iter != NULL; iter = iter->next) { - class = (slice_class_t *)iter->obj; - if (string_case_compare(usage, class->usage) == 0) { - break; - } - } - - if (iter == NULL) { - /* add a new class to the list of classes */ - class = (slice_class_t *)calloc(1, sizeof (slice_class_t)); - if (class == NULL) { - error = ENOMEM; - } else { - class->usage = strdup(usage); - if (class->usage == NULL) { - free(class); - class = NULL; - error = ENOMEM; - } else { - item = dlist_new_item((void *)class); - if (item == NULL) { - free(class->usage); - free(class); - class = NULL; - error = ENOMEM; - } else { - *classes = dlist_append(item, *classes, AT_TAIL); - } - } - } - } - - if ((error == 0) && (class != NULL)) { - - char buf[BUFSIZ]; - char *dup = NULL; - char *slicename = NULL; - - (void) get_display_name(slice, &slicename); - (void) snprintf(buf, BUFSIZ-1, " %s: %s", - slicename, usage_detail); - if ((dup = strdup(buf)) == NULL) { - error = ENOMEM; - } else { - if ((item = dlist_new_item((void *)dup)) == NULL) { - free(dup); - error = ENOMEM; - } else { - class->sliceinfo = - dlist_insert_ordered( - item, class->sliceinfo, - ASCENDING, compare_strings); - } - } - } - } - - return (error); -} - -/* - * FUNCTION: print_usable_devices() - * - * PURPOSE: Print out the devices determined to be available for - * use by layout. - * - * Iterates the lists of usable slices, disks and HBAs - * and prints out their CTD and device names. - */ -static void -print_usable_devices() -{ - int i = 0; - - struct { - char *msg; - dlist_t *list; - } devs[3]; - - devs[0].msg = gettext("HBA/Controllers"); - devs[0].list = _usable_hbas; - devs[1].msg = gettext("disks"); - devs[1].list = _usable_disks; - devs[2].msg = gettext("slices"); - devs[2].list = _usable_slices; - - for (i = 0; i < 3; i++) { - - oprintf(OUTPUT_VERBOSE, - gettext("\n These %s are usable:\n\n"), - devs[i].msg); - - print_device_list(devs[i].list); - } -} - -/* - * FUNCTION: print_device_list(dlist_t *devices) - * - * INPUT: devices - a list of device descriptor handles - * - * PURPOSE: A helper for the print_XXX_devices() routines which iterates - * the input list and prints out each device name, CTD name and - * alias(es). - */ -static void -print_device_list( - dlist_t *devices) -{ - dlist_t *iter = NULL; - - for (iter = devices; iter != NULL; iter = iter->next) { - - dm_descriptor_t device = ((uintptr_t)iter->obj); - char *name = NULL; - char *ctd = NULL; - dlist_t *aliases = NULL; - - (void) get_display_name(device, &ctd); - (void) get_name(device, &name); - oprintf(OUTPUT_VERBOSE, - " %-25s %s\n", (ctd != NULL ? ctd : ""), name); - - (void) get_aliases(device, &aliases); - for (; aliases != NULL; aliases = aliases->next) { - oprintf(OUTPUT_VERBOSE, - gettext(" (alias: %s)\n"), - (char *)aliases->obj); - } - - dlist_free_items(aliases, free); - } -} - -/* - * FUNCTION: print_unusable_devices( - * dlist_t *bad_slices, dlist_t *bad_disks, - * dlist_t *used_classes) - * - * INPUT: used_classes - a list of slice_class_t structs - * - * PURPOSE: Print out the devices determined to be unavailable for - * use by layout. - * - * Iterates the input list of slice classifications and prints - * out a description of the class and the slices so classified. - * - * Also iterates the lists of bad slices and disks (those that - * libdiskmgt returned descriptors for but cannot be accessed) - * and notes them as unusable. - */ -static void -print_unusable_devices( - dlist_t *bad_slices, - dlist_t *bad_disks, - dlist_t *used_classes) -{ - dlist_t *iter = NULL; - dlist_t *slices = NULL; - char *preamble; - - struct { - char *msg; - dlist_t *list; - } devs[2]; - - /* report bad disks and slices */ - devs[0].msg = gettext("disks"); - devs[0].list = bad_disks; - devs[1].msg = gettext("slices"); - devs[1].list = bad_slices; - - if (bad_disks != NULL) { - oprintf(OUTPUT_VERBOSE, -#if defined(sparc) - gettext("\n These disks are not usable, they may " - "may be offline or cannot be accessed:\n\n")); -#elif defined(i386) - gettext("\n These disks are not usable, they may " - "may be offline,\n missing a Solaris FDISK " - "partition or cannot be accessed:\n\n")); -#endif - print_device_list(bad_disks); - } - - if (bad_slices != NULL) { - oprintf(OUTPUT_VERBOSE, gettext( - "\n These slices, and subsequently the disks on which they\n" - "reside, are not usable, they cannot be accessed:\n\n")); - print_device_list(bad_slices); - } - - /* report used slices and usages */ - preamble = gettext("\n These slices are not usable, %s:\n\n"); - for (iter = used_classes; iter != NULL; iter = iter->next) { - slice_class_t *class = (slice_class_t *)iter->obj; - - if (class->sliceinfo != NULL) { - - oprintf(OUTPUT_VERBOSE, preamble, - get_slice_usage_msg(class->usage)); - - slices = class->sliceinfo; - for (; slices != NULL; slices = slices->next) { - oprintf(OUTPUT_VERBOSE, " %s\n", (char *)slices->obj); - } - } - } - -} - -/* - * FUNCTION: char * get_slice_usage_msg(char *usage) - * - * INPUT: usage - char * string representing a slice usage classification - * - * OUTPUT: char * "friendly" usage message - * - * PURPOSE: the input usage string comes from libdiskmgt and is very terse. - * - * Convert it into a friendlier usage description suitable for user - * consumption. - */ -static char * -get_slice_usage_msg( - char *usage) -{ - char *str = NULL; - - if (string_case_compare(usage, DM_USE_MOUNT) == 0) { - str = gettext("they have mounted filesystems"); - } else if (string_case_compare(usage, DM_USE_FS) == 0) { - str = gettext("they appear to have unmounted filesystems"); - } else if (string_case_compare(usage, DM_USE_SVM) == 0) { - str = gettext("they are utilized by SVM"); - } else if (string_case_compare(usage, DM_USE_VXVM) == 0) { - str = gettext("they are utilized by VxVm"); - } else if (string_case_compare(usage, DM_USE_LU) == 0) { - str = gettext("they are utilized by LiveUpgrade"); - } else if (string_case_compare(usage, DM_USE_DUMP) == 0) { - str = gettext("they are reserved as dump devices"); - } else if (string_case_compare(usage, USE_DISKSET) == 0) { - str = gettext("they have disk set issues"); - } else { - /* libdiskmgt has detected a usage unknown to layout */ - str = usage; - } - - return (str); -} - -/* - * FUNCTION: set_alias(dm_descriptor_t desc, char *alias) - * - * INPUT: desc - a dm_descriptor_t handle. - * alias - a char * alias for the device represented - * by the descriptor. - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Adds the specified alias to the known aliases for the - * device associated with the input descriptor. - */ -int -set_alias( - dm_descriptor_t desc, - char *alias) -{ - nvlist_t *attrs = NULL; - char **old_aliases = NULL; - char **new_aliases = NULL; - uint_t nelem = 0; - int error = 0; - int i = 0; - - if ((error = get_cached_attributes(desc, &attrs)) != 0) { - return (error); - } - - if ((error = get_string_array( - attrs, ATTR_DEVICE_ALIASES, &old_aliases, &nelem)) != 0) { - if (error != ENOENT) { - return (error); - } - /* no aliases yet */ - error = 0; - } - - /* add new alias */ - new_aliases = (char **)calloc(MAX_ALIASES, sizeof (char *)); - if (new_aliases != NULL) { - - for (i = 0; i < nelem && i < MAX_ALIASES; i++) { - char *dup = strdup(old_aliases[i]); - if (dup != NULL) { - new_aliases[i] = dup; - } else { - error = ENOMEM; - } - } - - if (error == 0) { - if (i == MAX_ALIASES) { - volume_set_error( - gettext("Maximum number of device aliases " - "(8) reached\n"), - MAX_ALIASES); - error = -1; - - } else { - new_aliases[i] = alias; - error = set_string_array(attrs, ATTR_DEVICE_ALIASES, - new_aliases, i + 1); - } - } - - free(new_aliases); - } - - if (error == 0) { - /* cache descriptor under this alias */ - error = add_cached_descriptor(alias, desc); - } - - return (error); -} - -/* - * FUNCTION: get_aliases(dm_descriptor_t desc, dlist_t **list) - * - * INPUT: desc - a dm_descriptor_t handle. - * - * OUTPUT: list - a dlist_t list pointing to the list of - * aliases associated with the device - * represented by the descriptor. - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Retrieves aliases for the input descriptor and - * appends them to the input list. - * - * The list of returned items must be freed by calling - * dlist_free_items(list, free) - */ -int -get_aliases( - dm_descriptor_t desc, - dlist_t **list) -{ - nvlist_t *attrs = NULL; - char **aliases = NULL; - uint_t nelem = 0; - int error = 0; - int i; - - if ((error = get_cached_attributes(desc, &attrs)) != 0) { - return (error); - } - - if ((error = get_string_array( - attrs, ATTR_DEVICE_ALIASES, &aliases, &nelem)) != 0) { - if (error == ENOENT) { - /* no aliases */ - return (0); - } - } - - for (i = 0; i < nelem; i++) { - dlist_t *item; - char *dup; - - if ((dup = strdup(aliases[i])) == NULL) { - error = ENOMEM; - break; - } - - if ((item = dlist_new_item(dup)) == NULL) { - free(dup); - error = ENOMEM; - break; - } - - *list = dlist_append(item, *list, AT_TAIL); - } - - return (error); -} - -/* - * FUNCTION: compare_start_blocks( - * void *obj1, void *obj2) - * - * INPUT: desc1 - opaque pointer to a dm_descriptor_t - * desc2 - opaque pointer to a dm_descriptor_t - * - * RETURNS: int - <0 - if desc1.stblk < desc2.stblk - * 0 - if desc1.stblk == desc2.stblk - * >0 - if desc1.stblk > desc.stblk - * - * PURPOSE: dlist_t helper which compares the start blocks of - * the two input dm_descriptor_t slice handles. - */ -static int -compare_start_blocks( - void *desc1, - void *desc2) -{ - uint64_t stblk1 = 0; - uint64_t stblk2 = 0; - - assert(desc1 != (dm_descriptor_t)0); - assert(desc2 != (dm_descriptor_t)0); - - (void) slice_get_start_block((uintptr_t)desc1, &stblk1); - (void) slice_get_start_block((uintptr_t)desc2, &stblk2); - - return (stblk1 - stblk2); -} - -/* - * FUNCTION: compare_desc_display_names( - * void *desc1, void *desc2) - * - * INPUT: desc1 - opaque pointer to a dm_descriptor_t - * desc2 - opaque pointer to a dm_descriptor_t - * - * RETURNS: int - <0 - if desc1.name < desc2.name - * 0 - if desc1.name == desc2.name - * >0 - if desc1.name > desc.name - * - * PURPOSE: dlist_t helper which compares the CTD names of the - * two input dm_descriptor_t objects. - */ -static int -compare_desc_display_names( - void *desc1, - void *desc2) -{ - char *name1 = NULL; - char *name2 = NULL; - - assert(desc1 != (dm_descriptor_t)0); - assert(desc2 != (dm_descriptor_t)0); - - (void) get_display_name((uintptr_t)desc1, &name1); - (void) get_display_name((uintptr_t)desc2, &name2); - - return (string_case_compare(name1, name2)); -} diff --git a/usr/src/cmd/lvm/metassist/layout/layout_discovery.h b/usr/src/cmd/lvm/metassist/layout/layout_discovery.h deleted file mode 100644 index edcc661f267b..000000000000 --- a/usr/src/cmd/lvm/metassist/layout/layout_discovery.h +++ /dev/null @@ -1,89 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _LAYOUT_DISCOVERY_H -#define _LAYOUT_DISCOVERY_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#ifdef __cplusplus -extern "C" { -#endif - -#include "libdiskmgt.h" - -/* - * scan physical devices and build lists of known devices. - */ -extern int discover_known_devices(); - -/* - * release lists of known devices. - */ -extern int release_known_devices(); - -/* - * scan known devices and build lists of usable devices. - */ -extern int discover_usable_devices(char *diskset); - -/* - * release lists of usable devices. - */ -extern int release_usable_devices(); - -/* - * functions to access lists of known devices for the system, - * constructed by load_physical_devices - */ -extern int get_known_slices(dlist_t **list); -extern int get_known_disks(dlist_t **list); -extern int get_known_hbas(dlist_t **list); - -/* - * functions to access lists of devices for the named diskset - * constructed by load_physical_devices - */ -extern int get_usable_slices(dlist_t **list); -extern int get_usable_disks(dlist_t **list); -extern int get_usable_hbas(dlist_t **list); - -/* - * predicate indicating whether MPXIO appears enabled for the system - */ -extern boolean_t is_mpxio_enabled(); - -/* - * functions that set/get a descriptor's multipath alias name(s). - */ -extern int get_aliases(dm_descriptor_t desc, dlist_t **aliases); -extern int set_alias(dm_descriptor_t desc, char *alias); - -#ifdef __cplusplus -} -#endif - -#endif /* _LAYOUT_DISCOVERY_H */ diff --git a/usr/src/cmd/lvm/metassist/layout/layout_dlist_util.c b/usr/src/cmd/lvm/metassist/layout/layout_dlist_util.c deleted file mode 100644 index 4ced85510464..000000000000 --- a/usr/src/cmd/lvm/metassist/layout/layout_dlist_util.c +++ /dev/null @@ -1,454 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#define _LAYOUT_DLIST_UTIL_C - -#include -#include - -#include -#include - -#include "volume_devconfig.h" -#include "volume_dlist.h" -#include "volume_output.h" - -#include "layout_device_cache.h" -#include "layout_dlist_util.h" -#include "layout_request.h" - -#include "layout_slice.h" /* destroy_new_slice */ -#include "layout_svm_util.h" - -/* - * FUNCTION: compare_strings(void *str1, void *str2) - * - * INPUT: str1 - opaque pointer to a char * - * str2 - opaque pointer to a char * - * - * RETURNS: int - <0 - if str1 < str2 - * 0 - if str1 == str2 - * >0 - if str1 > str2 - * - * PURPOSE: dlist_t helper which compares the two input strings. - * - * Comparison is done with string_case_compare() - */ -int -compare_strings( - void *str1, - void *str2) -{ - assert(str1 != NULL); - assert(str2 != NULL); - - return (string_case_compare((char *)str1, (char *)str2)); -} - -/* - * FUNCTION: compare_devconfig_sizes(void *devconf1, void *devconf2) - * - * INPUT: devconf1 - opaque pointer - * devconf2 - opaque pointer - * - * RETURNS: int - <0 - if devconf1.size_in_blks < devconf2.size_in_blks - * 0 - if devconf1.size_in_blks == devconf2.size_in_blks - * >0 - if devconf1.size.in_blks > devconf2.size_in_blks - * - * PURPOSE: dlist_t helper which compares the sizes of two devconfig_t - * structs. - * - * Both input objects are assumed to be devconfig_t pointers. - */ -int -compare_devconfig_sizes( - void *devconf1, - void *devconf2) -{ - uint64_t size1 = 0; - uint64_t size2 = 0; - - assert(devconf1 != NULL); - assert(devconf2 != NULL); - - (void) devconfig_get_size_in_blocks((devconfig_t *)devconf1, &size1); - (void) devconfig_get_size_in_blocks((devconfig_t *)devconf2, &size2); - - return (size1 - size2); -} - -/* - * FUNCTION: compare_slice_sizes(void *desc1, void *desc2) - * - * INPUT: desc1 - opaque pointer to a dm_descriptor_t slice handle - * desc2 - opaque pointer to a dm_descriptor_t slice handle - * - * RETURNS: int - <0 - if desc1.slicesize < desc2.slicesize - * 0 - if desc1.slicesize == desc2.slicesize - * >0 - if desc1.slicesize > desc2.slicesize - * - * PURPOSE: dlist_t helper which compares the sizes of two slices - * represented as dm_descriptor_t handles. - */ -int -compare_slice_sizes( - void *desc1, - void *desc2) -{ - uint64_t size1 = 0; - uint64_t size2 = 0; - - assert(desc1 != NULL); - assert(desc2 != NULL); - - (void) slice_get_size((uintptr_t)desc1, &size1); - (void) slice_get_size((uintptr_t)desc2, &size2); - - return (size1 - size2); -} - -/* - * FUNCTION: compare_devconfig_and_descriptor_names(void *devconf, - * void *desc) - * - * INPUT: devconf - opaque pointer to a devconfig_t - * desc - opaque pointer to a dm_descriptor_t - * - * RETURNS: int - <0 - if devconf name is "less than" descr name - * 0 - if devconf name is "equal to" descr name - * >0 - if devconf name is "greater than" desc name - * - * PURPOSE: dlist_t helper which compares the name of a devconfig_t - * struct to the name for a dm_descriptor_t. - * - * Note that the order of the arguments is important. - * This function is intended to be passed into the various - * dlist_* functions which take a comparison function. - */ -int -compare_devconfig_and_descriptor_names( - void *devconf, - void *desc) -{ - char *volname = NULL; - char *descname = NULL; - - assert(devconf != NULL); - assert(desc != NULL); - - (void) devconfig_get_name((devconfig_t *)devconf, &volname); - (void) get_display_name((uintptr_t)desc, &descname); - - return (string_case_compare(volname, descname)); -} - -/* - * FUNCTION: compare_string_to_devconfig_name(void *str, void *devconf) - * - * INPUT: str - opaque pointer to a char *str - * devconf - opaque pointer to a devconfig_t - * - * RETURNS: int - <0 - if devconf name is "less than" str - * 0 - if devconf name is "equal to" str - * >0 - if devconf name is "greater than" str - * - * PURPOSE: dlist_t helper which compares a string to the name of - * a devconfig_t struct. - */ -int -compare_string_to_devconfig_name( - void *str, - void *devconf) -{ - char *volname = NULL; - - assert(str != NULL); - assert(devconf != NULL); - - (void) devconfig_get_name((devconfig_t *)devconf, &volname); - if (volname == NULL) { - /* no memory for new string(s) */ - return (-1); - } - - return (string_case_compare(volname, (char *)str)); -} - -/* - * FUNCTION: free_devconfig_object(void *obj) - * - * INPUT: obj - an opaque pointer - * - * RETURNS: void - * - * PURPOSE: helper which decomposes a devconfig_t struct after a - * failed layout attempt. - * - * reclaims allocated space. - * releases reserved volume/HSP names - * undoes slicing - */ -void -free_devconfig_object( - void *obj) -{ - devconfig_t *dev = NULL; - char *name = NULL; - dlist_t *iter = NULL; - component_type_t type = TYPE_UNKNOWN; - - if (obj == NULL) { - return; - } - - dev = (devconfig_t *)obj; - - (void) devconfig_get_type(dev, &type); - (void) devconfig_get_name(dev, &name); - - oprintf(OUTPUT_DEBUG, - gettext(" -->decomposing %s\n"), name); - - switch (type) { - case TYPE_MIRROR: - case TYPE_CONCAT: - case TYPE_RAID5: - case TYPE_HSP: - case TYPE_STRIPE: - - /* release name */ - if (devconfig_isA(dev, TYPE_HSP)) { - release_hsp_name(name); - } else { - release_volume_name(name); - } - - /* decompose volume's components */ - iter = devconfig_get_components(dev); - dlist_free_items(iter, free_devconfig_object); - - (void) devconfig_set_components(dev, NULL); - - break; - - case TYPE_SLICE: - - (void) destroy_new_slice(dev); - - break; - - default: - break; - - } - - free_devconfig(dev); -} - -/* - * FUNCTION: compare_device_names( - * void *str1, void *str2) - * - * INPUT: str1 - opaque pointer - * str2 - opaque pointer - * - * RETURNS: int - <0 - if str1 < str2 - * 0 - if str1 == str2 - * >0 - if str1 > str2 - * - * PURPOSE: dlist_t helper which compares two device name strings. - * - * Both names are assumed to be in CTD form. - * - * Either name may be fully qualified by an absolute - * path. If only one name is fully qualified, the - * leading path with be stripped off prior to the - * comparison. - * - * Uses string_case_compare() to compare the names. - */ -int -compare_device_names( - void *str1, - void *str2) -{ - char *name1 = (char *)str1; - char *name2 = (char *)str2; - - int val = 0; - - assert(str1 != NULL); - assert(str2 != NULL); - - /* if one doesn't start with '/', just compare device names */ - if (*name1 != '/' || *name2 != '/') { - - char *short1 = strrchr(name1, '/'); - char *short2 = strrchr(name2, '/'); - - if (short1 == NULL) { - short1 = name1; - } else { - ++short1; - } - - if (short2 == NULL) { - short2 = name2; - } else { - ++short2; - } - - val = string_case_compare(short2, short1); - - } else { - - /* if they both start with '/', assume they're full paths */ - val = string_case_compare(name2, name1); - } - - return (val); -} - -/* - * FUNCTION: compare_descriptors( - * void *desc1, void *desc2) - * - * INPUT: desc1 - opaque pointer - * desc2 - opaque pointer - * - * RETURNS: int - <0 - if desc1 < desc2 - * 0 - if desc1 == desc2 - * >0 - if desc1 > desc2 - * - * PURPOSE: dlist_t helper which compares two dm_descriptor_t handles. - */ -int -compare_descriptors( - void *desc1, - void *desc2) -{ - assert(desc1 != NULL); - assert(desc2 != NULL); - - return ((uintptr_t)desc1 - (uintptr_t)desc2); -} - -/* - * FUNCTION: compare_descriptor_names( - * void *desc1, void *desc2) - * - * INPUT: desc1 - opaque pointer - * desc2 - opaque pointer - * - * RETURNS: int - <0 - if desc1.name < desc2.name - * 0 - if desc1.name == desc2.name - * >0 - if desc1.name > desc2.name - * - * PURPOSE: dlist_t helper which compares the names associated - * with the input dm_descriptor_t handles. - * - * Retrieves the names associated with both descriptors - * and compares them using string_case_compare. - */ -int -compare_descriptor_names( - void *desc1, - void *desc2) -{ - char *name1 = NULL; - char *name2 = NULL; - - assert(desc1 != NULL); - assert(desc2 != NULL); - - (void) get_name((uintptr_t)desc1, &name1); - (void) get_name((uintptr_t)desc2, &name2); - - return (string_case_compare(name1, name2)); -} - -/* - * FUNCTION: compare_slices_on_same_hba( - * void *slice1, void *slice2) - * - * INPUT: slice1 - opaque pointer - * slice2 - opaque pointer - * - * RETURNS: int - 0 - if slice1 is on the same hba as slice2 - * !0 - otherwise - * - * PURPOSE: dlist_t helper which checks whether slice1 is on the - * same hba as slice2 - */ -int -compare_slices_on_same_hba( - void *slice1, - void *slice2) -{ - char *name1, *name2; - - /* Retrieve the names of the slices */ - if (devconfig_get_name((devconfig_t *)slice1, &name1) == 0 && - devconfig_get_name((devconfig_t *)slice2, &name2) == 0) { - - dm_descriptor_t desc1, desc2; - - /* Retrieve the disk descriptors for the slices */ - if (get_disk_for_named_slice(name1, &desc1) == 0 && - get_disk_for_named_slice(name2, &desc2) == 0) { - - dlist_t *hbas1 = NULL; - dlist_t *hbas2 = NULL; - - assert(desc1 != (dm_descriptor_t)0); - assert(desc2 != (dm_descriptor_t)0); - - /* Retrieve list of HBA descriptors for the slices */ - if (disk_get_hbas(desc1, &hbas1) == 0 && - disk_get_hbas(desc2, &hbas2) == 0) { - - dlist_t *itr1; - - for (itr1 = hbas1; itr1 != NULL; itr1 = itr1->next) { - dm_descriptor_t hba1 = (uintptr_t)itr1->obj; - dlist_t *itr2; - - for (itr2 = hbas2; itr2 != NULL; itr2 = itr2->next) { - dm_descriptor_t hba2 = (uintptr_t)itr2->obj; - - if (hba1 == hba2) { - return (0); - } - } - } - } - } - } - - return (1); -} diff --git a/usr/src/cmd/lvm/metassist/layout/layout_dlist_util.h b/usr/src/cmd/lvm/metassist/layout/layout_dlist_util.h deleted file mode 100644 index ff65497c992b..000000000000 --- a/usr/src/cmd/lvm/metassist/layout/layout_dlist_util.h +++ /dev/null @@ -1,216 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _LAYOUT_DLIST_UTIL_H -#define _LAYOUT_DLIST_UTIL_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * A collection of utility functions for manipulating and traversing - * dlist_t linked lists. - */ - -/* - * FUNCTION: compare_strings(void *str1, void *str2) - * - * INPUT: str1 - opaque pointer to a char * - * str2 - opaque pointer to a char * - * - * RETURNS: int - <0 - if str1 < str2 - * 0 - if str1 == str2 - * >0 - if str1 > str2 - * - * PURPOSE: dlist_t helper which compares the two input strings. - * - * Comparison is done with string_compare() - */ -extern int compare_strings(void *str1, void *str2); - -/* - * FUNCTION: compare_device_names( - * void *str1, void *str2) - * - * INPUT: str1 - opaque pointer - * str2 - opaque pointer - * - * RETURNS: int - <0 - if str1 < str2 - * 0 - if str1 == str2 - * >0 - if str1 > str2 - * - * PURPOSE: dlist_t helper which compares two device name strings. - * - * Both names are assumed to be in CTD form. - * - * Either name may be fully qualified by an absolute - * path. If only one name is fully qualified, the - * leading path with be stripped off prior to the - * comparison. - * - * Uses string_compare() to compare the names. - */ -extern int compare_device_names(void *str1, void *str2); - -/* - * FUNCTION: compare_devconfig_sizes(void *devconf1, void *devconf2) - * - * INPUT: devconf1 - opaque pointer - * devconf2 - opaque pointer - * - * RETURNS: int - <0 - if devconf1.size_in_blks < devconf2.size_in_blks - * 0 - if devconf1.size_in_blks == devconf2.size_in_blks - * >0 - if devconf1.size_in_blks > devconf2.size_in_blks - * - * PURPOSE: dlist_t helper which compares the sizes of two devconfig_t - * structs. - * - * Both input objects are assumed to be devconfig_t pointers. - */ -extern int compare_devconfig_sizes(void *devconf1, void *devconf2); - -/* - * FUNCTION: compare_slice_sizes(void *desc1, void *desc2) - * - * INPUT: desc1 - opaque pointer to a dm_descriptor_t slice handle - * desc2 - opaque pointer to a dm_descriptor_t slice handle - * - * RETURNS: int - <0 - if desc1.slicesize < desc2.slicesize - * 0 - if desc1.slicesize == desc2.slicesize - * >0 - if desc1.slicesize > desc2.slicesize - * - * PURPOSE: dlist_t helper which compares the sizes of two slices - * represented as dm_descriptor_t handles. - */ -extern int compare_slice_sizes(void *obj1, void *obj2); - -/* - * FUNCTION: compare_descriptors( - * void *desc1, void *desc2) - * - * INPUT: desc1 - opaque pointer - * desc2 - opaque pointer - * - * RETURNS: int - <0 - if desc1 < desc2 - * 0 - if desc1 == desc2 - * >0 - if desc1 > desc2 - * - * PURPOSE: dlist_t helper which compares two dm_descriptor_t handles. - */ -extern int compare_descriptors(void *desc1, void *desc2); - -/* - * FUNCTION: compare_descriptor_names( - * void *desc1, void *desc2) - * - * INPUT: desc1 - opaque pointer - * desc2 - opaque pointer - * - * RETURNS: int - <0 - if desc1.name < desc2.name - * 0 - if desc1.name == desc2.name - * >0 - if desc1.name > desc2.name - * - * PURPOSE: dlist_t helper which compares the names associated - * with the input dm_descriptor_t handles. - * - * Retrieves the names associated with both descriptors - * and compares them using string_compare. - */ -extern int compare_descriptor_names(void *desc1, void *desc2); - -/* - * FUNCTION: compare_devconfig_and_descriptor_names(void *devconf, - * void *desc) - * - * INPUT: devconf - opaque pointer to a devconfig_t - * desc - opaque pointer to a dm_descriptor_t - * - * RETURNS: int - <0 - if devconf name is "less than" descr name - * 0 - if devconf name is "equal to" descr name - * >0 - if devconf name is "greater than" desc name - * - * PURPOSE: dlist_t helper which compares the name of a devconfig_t - * struct to the name for a dm_descriptor_t. - * - * Note that the order of the arguments is important. - * This function is intended to be passed into the various - * dlist_* functions which take a comparison function. - */ -extern int compare_devconfig_and_descriptor_names(void *devconf, void *desc); - -/* - * FUNCTION: compare_string_to_devconfig_name(void *str, void *devconf) - * INPUT: str - opaque pointer to a char *str - * devconf - opaque pointer to a devconfig_t - * - * RETURNS: int - <0 - if devconf name is "less than" str - * 0 - if devconf name is "equal to" str - * >0 - if devconf name is "greater than" str - * - * PURPOSE: dlist_t helper which compares a string to the name of - * a devconfig_t struct. - */ -extern int compare_string_to_devconfig_name(void *str, void *devconf); - -/* - * FUNCTION: compare_slices_on_same_hba( - * void *slice1, void *slice2) - * - * INPUT: slice1 - opaque pointer - * slice2 - opaque pointer - * - * RETURNS: int - 0 - if slice1 is on the same hba as slice2 - * !0 - otherwise - * - * PURPOSE: dlist_t helper which checks whether slice1 is on the - * same hba as slice2 - */ -extern int compare_slices_on_same_hba(void *slice1, void *slice2); - -/* - * FUNCTION: free_devconfig_object(void *obj) - * - * INPUT: obj - an opaque pointer - * - * RETURNS: void - * - * PURPOSE: helper which decomposes a devconfig_t struct after a - * failed layout attempt. - * - * reclaims allocated space. - * releases reserved volume/HSP names - * undoes slicing - */ -extern void free_devconfig_object(void *obj); - -#ifdef __cplusplus -} -#endif - -#endif /* _LAYOUT_DLIST_UTIL_H */ diff --git a/usr/src/cmd/lvm/metassist/layout/layout_hsp.c b/usr/src/cmd/lvm/metassist/layout/layout_hsp.c deleted file mode 100644 index 6e52f09d18c1..000000000000 --- a/usr/src/cmd/lvm/metassist/layout/layout_hsp.c +++ /dev/null @@ -1,965 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include - -#include - -#include "volume_error.h" -#include "volume_dlist.h" -#include "volume_output.h" - -#include "layout_device_cache.h" -#include "layout_device_util.h" -#include "layout_discovery.h" -#include "layout_dlist_util.h" -#include "layout_messages.h" -#include "layout_request.h" -#include "layout_slice.h" -#include "layout_svm_util.h" - -#define _LAYOUT_HSP_C - -static int layout_explicit_hsp( - devconfig_t *hsprequest, - dlist_t *devices, - devconfig_t **hsp); - -static int layout_default_hsp( - devconfig_t *request, - dlist_t *devices, - devconfig_t **hsp); - -static int populate_hsp( - devconfig_t *request, - devconfig_t *hsp, - dlist_t *devices); - -static int assemble_hsp( - devconfig_t *hsp, - dlist_t *newspares, - dlist_t *devices); - -static int get_uniquely_sized_slices( - dlist_t *devices, - dlist_t **unique); - -static int remove_undersized_slices( - dlist_t *unique, - dlist_t **avail); - -static int find_spare_for_component( - devconfig_t *component, - dlist_t *all_spares, - dlist_t *hbas, - dlist_t *disks, - boolean_t *found); - -static int choose_spare_for_component( - devconfig_t *comp, - dlist_t **all_spares, - dlist_t **new_spares, - dlist_t **avail, - dlist_t *used_hbas, - dlist_t *used_disks, - uint16_t npaths); - -/* - * FUNCTION: layout_hsp(devconfig_t *request, devconfig_t hsprequest, - * dlist_t *devices, dlist_t **results) - * - * INPUT: request - pointer to the toplevel request devconfig_t - * hsp - pointer to the optional HSP request devconfig_t - * devices - pointer to a list of devices to be served by the HSP - * - * OUTPUT: results - pointer to a list result devconfig_t, if the HSP - * to service the input list of devices needs to be - * created or modified, it will be appended to the list. - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Main layout driver for HSP, attempts to build/populate a - * single HSP to service the list of devices. - * - * If the input hsprequest is NULL, use the default HSP scheme: - * a. use the first HSP in the diskset - * b. create an HSP if the diskset has none - * - * If the hsprequest is not NULL: - * a. if the request names an HSP and it already exists, use it - * b. if the request names an HSP and it does not exist, create it - * c. if the request specifies components, use them - */ -int -layout_hsp( - devconfig_t *request, - devconfig_t *hsprequest, - dlist_t *devices, - dlist_t **results) -{ - int error = 0; - devconfig_t *hsp = NULL; - - oprintf(OUTPUT_TERSE, - gettext(" ->Layout a %s\n"), - devconfig_type_to_str(TYPE_HSP)); - - if (hsprequest == NULL) { - error = layout_default_hsp(request, devices, &hsp); - } else { - error = layout_explicit_hsp(hsprequest, devices, &hsp); - } - - if (error != 0) { - print_debug_failure_msg(devconfig_type_to_str(TYPE_HSP), - get_error_string(error)); - } else if (hsp != NULL) { - - if (devconfig_get_components(hsp) == NULL) { - /* HSP is usable as it is */ - free_devconfig(hsp); - hsp = NULL; - } else { - dlist_t *item = NULL; - if ((item = dlist_new_item(hsp)) == NULL) { - error = ENOMEM; - } else { - *results = dlist_append(item, *results, AT_TAIL); - print_layout_success_msg(); - } - } - } - - return (error); -} - -/* - * FUNCTION: layout_default_hsp(devconfig_t *request, - * dlist_t *devices, devconfig_t **hsp) - * - * INPUT: request - pointer to the toplevel request devconfig_t - * devices - pointer to a list of devices to be served by the HSP - * - * OUTPUT: hsp - pointer to a devconfig_t to hold the resulting HSP - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Layout driver for default HSP construction. - * - * a. use the first HSP in the diskset - * b. create an HSP if the diskset has none - * c. add spares to the HSP to service the list of input devices. - */ -static int -layout_default_hsp( - devconfig_t *request, - dlist_t *devices, - devconfig_t **hsp) -{ - char *dsname = get_request_diskset(); - char *hspname = NULL; - boolean_t free_hspname = B_FALSE; - devconfig_t *default_hsp = NULL; - int error = 0; - - oprintf(OUTPUT_TERSE, - gettext(" -->Using default HSP scheme...\n")); - - if ((error = get_default_hsp_name(request, &hspname)) != 0) { - volume_set_error( - gettext("error getting HSP name from defaults\n")); - return (error); - } - - if (hspname != NULL) { - if ((error = hsp_get_by_name(dsname, hspname, &default_hsp)) != 0) { - volume_set_error( - gettext("error getting default HSP by name\n")); - return (error); - } - } else { - /* no default HSP name, get diskset's default HSP */ - if ((error = hsp_get_default_for_diskset(dsname, - &default_hsp)) != 0) { - volume_set_error( - gettext("error getting default HSP\n")); - return (error); - } - - if (default_hsp == NULL) { - /* no default HSP name, no default HSP, make one */ - if ((error = get_next_hsp_name(&hspname)) != 0) { - volume_set_error( - gettext("error making default HSP name\n")); - return (error); - } - free_hspname = B_TRUE; - } - } - - if (default_hsp != NULL) { - - /* Found existing default HSP, copy it */ - dlist_t *spares = devconfig_get_components(default_hsp); - - ((error = devconfig_get_name(default_hsp, &hspname)) != 0) || - (error = new_devconfig(hsp, TYPE_HSP)) || - (error = devconfig_set_name(*hsp, hspname)); - - if (error == 0) { - devconfig_set_components(*hsp, spares); - devconfig_set_components(default_hsp, NULL); - - oprintf(OUTPUT_TERSE, - gettext(" --->Using %s from disk set %s...\n"), - hspname, dsname); - } else { - free_devconfig(*hsp); - *hsp = NULL; - } - - } else { - - /* no existing default HSP, make it */ - ((error = new_devconfig(hsp, TYPE_HSP)) != 0) || - (error = devconfig_set_name(*hsp, hspname)); - if (error == 0) { - oprintf(OUTPUT_VERBOSE, - gettext(" --->Created %s for disk set %s...\n "), - hspname, dsname); - } else { - free_devconfig(*hsp); - *hsp = NULL; - } - - if (free_hspname == B_TRUE) { - free(hspname); - } - } - - if (error == 0) { - error = populate_hsp(request, *hsp, devices); - } - - return (error); -} - -/* - * FUNCTION: layout_explicit_hsp(devconfig_t *hsprequest, - * dlist_t *devices, devconfig_t **hsp) - * - * INPUT: hsprequest - pointer to the explicit HSP request devconfig_t - * devices - pointer to a list of devices to be served by the HSP - * - * OUTPUT: hsp - pointer to a HSP devconfig_t to hold resulting HSP - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Layout driver for an explicit HSP request. - * - * a. if the request names an HSP and it already exists, use it - * b. if the request names an HSP and it does not exist, create it - * c. if the request specifies components, use them - * otherwise, add new spares to handle the input list - * of devices. - */ -static int -layout_explicit_hsp( - devconfig_t *hsprequest, - dlist_t *devices, - devconfig_t **hsp) -{ - char *dsname = get_request_diskset(); - char *hspname = NULL; - dlist_t *rspares = NULL; - int error = 0; - - oprintf(OUTPUT_VERBOSE, - gettext(" --->Explicit HSP request...\n")); - - (void) devconfig_get_name(hsprequest, &hspname); - if (hspname != NULL) { - - (void) hsp_get_by_name(dsname, hspname, hsp); - if (*hsp != NULL) { - - oprintf(OUTPUT_VERBOSE, - gettext(" --->Using %s...\n"), - hspname); - } else { - - /* named HSP doesn't exist, create it */ - ((error = new_devconfig(hsp, TYPE_HSP)) != 0) || - (error = devconfig_set_name(*hsp, hspname)); - if (error == 0) { - oprintf(OUTPUT_VERBOSE, - gettext(" --->%s does not exist, " - "created...\n"), hspname); - } else { - free_devconfig(*hsp); - *hsp = NULL; - } - free(hspname); - } - } - - if (error == 0) { - - /* does the hsprequest specify spares? */ - rspares = devconfig_get_components(hsprequest); - if (rspares != NULL) { - - /* put requested spares into HSP */ - dlist_t *list = NULL; - dlist_t *iter = NULL; - - for (iter = rspares; - (iter != NULL) && (error == 0); - iter = iter->next) { - - dlist_t *item = NULL; - if ((dlist_new_item(iter->obj)) == NULL) { - error = ENOMEM; - } else { - list = dlist_append(item, list, AT_TAIL); - } - } - - if (error == 0) { - error = assemble_hsp(*hsp, rspares, devices); - } - - } else { - - /* select new spares */ - error = populate_hsp(hsprequest, *hsp, devices); - } - } - - return (error); -} - -/* - * FUNCTION: populate_hsp(devconfig_t *request, devconfig_t *hsp, - * dlist_t *devices) - * - * INPUT: request - pointer to a request devconfig_t - * hsp - pointer to a HSP devconfig_t - * devices - pointer to a list of devices to be served by the HSP - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Processes the input HSP request and add spares sufficient - * to service the input list of devices. - * - * Determine the available HBAs, disks, and slices. - * Sort thru the input list of devices and determine - * the unique component sizes which need to be spared. - * Filter the available slices and remove those that are - * too small to serve as spares. - * - * Iterate each device and its components and see if the - * HSP currently has a sufficient spare, if not, try - * to select one from the available slices. - * - * If a spare cannot be found for any device component, - * the HSP layout process stops. - * - * If spares are found for all device components, add - * any required new ones to the HSP. - */ -static int -populate_hsp( - devconfig_t *request, - devconfig_t *hsp, - dlist_t *devices) -{ - int error = 0; - uint16_t npaths = 0; - - dlist_t *usable_hbas = NULL; - dlist_t *sel_hbas = NULL; - dlist_t *disks = NULL; - dlist_t *iter = NULL; - - dlist_t *avail = NULL; /* available slices */ - dlist_t *slices = NULL; /* avail slices of sufficient size */ - dlist_t *unique = NULL; /* volume slices that need spares */ - dlist_t *curspares = NULL; /* current spares in the HSP */ - dlist_t *newspares = NULL; /* slices to add to HSP */ - dlist_t *allspares = NULL; /* current and new spares */ - - ((error = get_usable_hbas(&usable_hbas)) != 0) || - (error = select_hbas_with_n_disks(request, usable_hbas, 1, &sel_hbas, - &disks)) || - (error = disks_get_avail_slices(request, disks, &avail)) || - (error = get_volume_npaths(request, &npaths)); - if (error != 0) { - dlist_free_items(sel_hbas, NULL); - dlist_free_items(disks, NULL); - dlist_free_items(avail, NULL); - return (error); - } - - if (disks == NULL || dlist_length(disks) == 0) { - /* all disks have been consumed by the devices */ - volume_set_error( - gettext(" no available disks to populate HSP\n")); - dlist_free_items(sel_hbas, NULL); - dlist_free_items(avail, NULL); - return (-1); - } - - if (avail == NULL || dlist_length(avail) == 0) { - /* all slices have been consumed by the devices */ - volume_set_error( - gettext(" no available slices to populate HSP\n")); - dlist_free_items(sel_hbas, NULL); - dlist_free_items(disks, NULL); - return (-1); - } - - dlist_free_items(sel_hbas, NULL); - dlist_free_items(disks, NULL); - - /* build list of slices needing to be spared */ - ((error = get_uniquely_sized_slices(devices, &unique)) != 0) || - - /* and list of slices of sufficient size to spare for them */ - (error = remove_undersized_slices(unique, &avail)); - - if (error != 0) { - dlist_free_items(avail, NULL); - dlist_free_items(unique, NULL); - dlist_free_items(slices, NULL); - return (error); - } - - /* get spares currently in the HSP */ - curspares = devconfig_get_components(hsp); - - /* clone current spares list */ - for (iter = curspares; - (iter != NULL) && (error == 0); - iter = iter->next) { - dlist_t *item = dlist_new_item(iter->obj); - if (item == NULL) { - error = ENOMEM; - } else { - allspares = dlist_append(item, allspares, AT_TAIL); - } - } - - if (error != 0) { - dlist_free_items(avail, NULL); - dlist_free_items(unique, NULL); - dlist_free_items(slices, NULL); - dlist_free_items(allspares, NULL); - return (error); - } - - /* - * examine device component slices and see if the HSP already - * has a suitable spare. If not, select the best available - * of the same (or larger) size - */ - for (iter = devices; - (iter != NULL) && (error == 0); - iter = iter->next) { - - devconfig_t *device = (devconfig_t *)iter->obj; - dlist_t *components = devconfig_get_components(device); - dlist_t *hbas = NULL; - dlist_t *disks = NULL; - dlist_t *iter1; - - error = get_hbas_and_disks_used_by_volume(device, &hbas, &disks); - for (iter1 = components; (iter1 != NULL) && (error == 0); - iter1 = iter1->next) { - - devconfig_t *comp = (devconfig_t *)iter1->obj; - boolean_t found = B_FALSE; - - if ((error = find_spare_for_component( - comp, allspares, hbas, disks, &found)) == 0) { - if (found != B_TRUE) { - error = choose_spare_for_component( - comp, &allspares, &newspares, - &avail, hbas, disks, npaths); - } - } - } - dlist_free_items(disks, NULL); - dlist_free_items(hbas, NULL); - } - - if (error == 0) { - /* existing spares are no longer needed */ - dlist_free_items(curspares, free_devconfig_object); - curspares = NULL; - - error = assemble_hsp(hsp, newspares, devices); - } else { - dlist_free_items(newspares, free_devconfig_object); - newspares = NULL; - } - - dlist_free_items(avail, NULL); - dlist_free_items(slices, NULL); - dlist_free_items(unique, NULL); - dlist_free_items(allspares, NULL); - - return (error); -} - -/* - * FUNCTION: assemble_hsp(devconfig_t *hsp, dlist_t *newspares, - * dlist_t *devices) - * - * INPUT: request - pointer to a HSP devconfig_t - * newspare - pointer to a list of new spares for the HSP - * devices - pointer to a list of devices to be served by the HSP - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Final assembly of an HSP. Attach new spare components - * and associate the HSP with each device in the input list. - */ -static int -assemble_hsp( - devconfig_t *hsp, - dlist_t *newspares, - dlist_t *devices) -{ - dlist_t *iter; - char *hspname = NULL; - int error = 0; - - /* add new spares to HSP */ - (void) devconfig_set_components(hsp, newspares); - (void) devconfig_get_name(hsp, &hspname); - - /* associate HSP with each of the devices */ - for (iter = devices; - (iter != NULL) && (error == 0); - iter = iter->next) { - - devconfig_t *dev = iter->obj; - devconfig_t *hspcomp = NULL; - dlist_t *item = NULL; - char *devname = NULL; - - ((error = devconfig_get_name(dev, &devname)) != 0) || - (error = new_devconfig(&hspcomp, TYPE_HSP)) || - (error = devconfig_set_name(hspcomp, hspname)); - - if (error != 0) { - - free_devconfig(hspcomp); - - } else if ((item = dlist_new_item(hspcomp)) == NULL) { - - free_devconfig(hspcomp); - error = ENOMEM; - - } else { - - dlist_t *comps = devconfig_get_components(dev); - comps = dlist_append(comps, item, AT_TAIL); - (void) devconfig_set_components(dev, comps); - - oprintf(OUTPUT_VERBOSE, - gettext(" --->volume %s will use HSP %s\n"), - devname, hspname); - } - } - - return (error); -} - -/* - * FUNCTION: get_uniquely_sized_slices(dlist_t *devices, - * dlist_t **unique) - * - * INPUT: devices - pointer to a list of devconfig_t devices - * - * OUTPUT: unique - pointer to a list of uniquely size slices - * from the input list of devices. - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Examine each device's slice components and build a list - * of uniquely sized slices. - */ -static int -get_uniquely_sized_slices( - dlist_t *devices, - dlist_t **unique) -{ - int error = 0; - dlist_t *iter = NULL; - - for (iter = devices; - (iter != NULL) && (error == 0); - iter = iter->next) { - - dlist_t *iter1; - for (iter1 = devconfig_get_components((devconfig_t *)iter->obj); - (iter1 != NULL) && (error == 0); - iter1 = iter1->next) { - - devconfig_t *comp = (devconfig_t *)iter1->obj; - if (dlist_contains(*unique, comp, - compare_devconfig_sizes) != B_TRUE) { - - dlist_t *item = NULL; - if ((item = dlist_new_item(comp)) == NULL) { - error = ENOMEM; - } else { - *unique = dlist_insert_ordered(item, *unique, - ASCENDING, compare_devconfig_sizes); - } - } - } - } - - return (error); -} - -/* - * FUNCTION: remove_undersized_slices(dlist_t *unique, - * dlist_t **avail) - * - * INPUT: avail - pointer to a list of available slices - * unique - pointer to a list of uniquely size slices - * - * OUTPUT: avail - pointer to an updated list of available slices - * that are at least as large as slices in the - * unique list. - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: filter available slices and remove those that aren't - * large enough for the device components which need spares. - * - * For each uniquely sized slice, find all available slices - * that are larger and add them to the filtered list. - */ -static int -remove_undersized_slices( - dlist_t *unique, - dlist_t **avail) -{ - dlist_t *filtered = NULL; - dlist_t *iter = NULL; - int error = 0; - - for (iter = unique; - (iter != NULL) && (error == 0); - iter = iter->next) { - - devconfig_t *uslice = (devconfig_t *)iter->obj; - uint64_t usize = 0; - dlist_t *iter2 = NULL; - - error = devconfig_get_size(uslice, &usize); - - for (iter2 = *avail; - (iter2 != NULL) && (error == 0); - iter2 = iter2->next) { - - dm_descriptor_t aslice = (uintptr_t)iter2->obj; - uint64_t asize = 0; - - error = slice_get_size(aslice, &asize); - if (asize >= usize) { - - /* this slice is large enough */ - dlist_t *item = NULL; - if ((item = dlist_new_item((void *)(uintptr_t)aslice)) == - NULL) { - error = ENOMEM; - } else { - filtered = dlist_insert_ordered(item, filtered, - ASCENDING, compare_slice_sizes); - } - - } - } - } - - if (error == 0) { - dlist_free_items(*avail, NULL); - *avail = filtered; - } else { - dlist_free_items(filtered, NULL); - } - - return (error); -} - -/* - * FUNCTION: find_spare_for_component(devconfig_t *component, - * dlist_t *all_spares, dlist_t *hbas, dlist_t *disks, - * boolean_t *found) - * - * INPUT: comp - pointer to a devconfig_t slice compenent that - * needs to be spared - * all_spares - pointer to a list of spares currently - * in the pool or that will be added - * hbas - pointer to a list of HBAs the component's - * parent device utilizes - * disks - pointer to a list of disks the component's - * parent device utilizes - * - * OUTPUT: found - pointer to a boolean_t to hold the result. - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Find a spare for the input component. - * - * Searches the input list of spares to see if one is - * sufficient. - * - * A suffcient spare is one that is large enough to spare - * for the input component and not on the same disk as any - * of the components in the parent device. - * - * The optimal spare would be on a different controller/HBA - * as the component and any of the components in the parent - * device. We settle for sufficient. - */ -static int -find_spare_for_component( - devconfig_t *component, - dlist_t *all_spares, - dlist_t *hbas, - dlist_t *disks, - boolean_t *found) -{ - dlist_t *iter = NULL; - uint64_t csize = 0; - int error = 0; - - *found = B_FALSE; - - (void) devconfig_get_size(component, &csize); - - for (iter = all_spares; - (iter != NULL) && (*found == B_FALSE) && (error == 0); - iter = iter->next) { - - devconfig_t *spare = (devconfig_t *)iter->obj; - char *spname = NULL; - uint64_t spsize = 0; - - if (((error = devconfig_get_name(spare, &spname)) != 0) || - ((error = devconfig_get_size(spare, &spsize)) != 0)) { - continue; - } - - if (spsize >= csize) { - - dm_descriptor_t disk = NULL; - - /* see if spare's disk is independent of the volume */ - error = get_disk_for_named_slice(spname, &disk); - if ((error == 0) && (dlist_contains(disks, - (void *)(uintptr_t)disk, compare_descriptor_names) == - B_FALSE)) { - *found = B_TRUE; - } - } - } - - if ((*found == B_TRUE) && (get_max_verbosity() >= OUTPUT_DEBUG)) { - char *cname = NULL; - (void) devconfig_get_name(component, &cname); - oprintf(OUTPUT_DEBUG, - gettext(" found existing spare for: %s (%llu)\n"), - cname, csize); - } - - return (error); -} - -/* - * FUNCTION: choose_spare_for_component(devconfig_t *component, - * dlist_t *all_spares, dlist_t **new_spares, - * dlist_t avail, uint16_t npaths, dlist_t *used_hbas, - * dlist_t *used_disks) - * - * INPUT: comp - pointer to a devconfig_t slice compenent that - * needs to be spared - * all_spares - pointer to a list of spares currently - * in the pool and those to be added - * new_spares - pointer to a list of spares that need to - * be added to the pool - * avail - list of available slices - * npaths - required number of paths for the spare - * used_hbas - list of HBAs used by the component's parent - * used_disks - list of disks used by the component's parent - * - * OUTPUT: all_spares - the possibly updated list of all spares - * new_spares - the possibly updated list of spares which - * need to be added to the pool. - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Find a new spare for the input component. - * - * Select a spare from the available slice list and add - * it to the new_spares list. - * - * The spare slice chosen should be on a unique HBA and - * disk relative to the input lists of used HBAs and disks - * and any spares in the pool. - */ -static int -choose_spare_for_component( - devconfig_t *component, - dlist_t **all_spares, - dlist_t **new_spares, - dlist_t **avail, - dlist_t *used_hbas, - dlist_t *used_disks, - uint16_t npaths) -{ - devconfig_t *spare = NULL; - uint64_t csize = 0; - int error = 0; - - (void) devconfig_get_size(component, &csize); - - if (get_max_verbosity() >= OUTPUT_DEBUG) { - char *cname = NULL; - (void) devconfig_get_name(component, &cname); - oprintf(OUTPUT_DEBUG, - gettext(" select new spare for: %s (%llu)\n"), - cname, csize); - } - - /* - * find a spare for the input component. - * select the best one from the available list that - * is on a unique disk. - */ - - /* - * 1st B_TRUE: require a different disk than those used by - * all spares and devices - * 2nd B_TRUE: requested size is the minimum acceptable - * 1st B_FALSE: do not add an extra cylinder when resizing slice, - * this is only necessary for Stripe components whose - * sizes get rounded down to an interlace multiple and - * then down to a cylinder boundary. - */ - error = choose_slice(csize, npaths, *avail, *all_spares, - used_hbas, used_disks, B_TRUE, B_TRUE, B_FALSE, &spare); - - if ((error == 0) && (spare == NULL)) { - /* can't find one on a unique disk, try again on any disk */ - - /* BEGIN CSTYLED */ - /* - * 1st B_FALSE: don't require a different disk than those used - * by all spares and devices - * 2nd B_TRUE: requested size is still the minimum acceptable - * 2nd B_FALSE: do not add an extra cylinder when resizing slice - * this is only necessary for Stripe components whose - * sizes get rounded down to an interlace multiple and - * then down to a cylinder boundary. - */ - /* END CSTYLED */ - error = choose_slice( - csize, npaths, *avail, *all_spares, used_hbas, - used_disks, B_FALSE, B_TRUE, B_FALSE, &spare); - } - - if ((error == 0) && (spare != NULL)) { - - dlist_t *rmvd = NULL; - dlist_t *item = NULL; - char *spname = NULL; - - if ((item = dlist_new_item(spare)) == NULL) { - error = ENOMEM; - } else { - - /* add spare to the all spares list */ - *all_spares = dlist_append(item, *all_spares, AT_HEAD); - - if ((item = dlist_new_item(spare)) == NULL) { - error = ENOMEM; - } else { - - /* add spare to the new spares list */ - *new_spares = dlist_insert_ordered( - item, *new_spares, ASCENDING, - compare_devconfig_sizes); - - /* remove it from the available list */ - *avail = dlist_remove_equivalent_item(*avail, spare, - compare_devconfig_and_descriptor_names, - &rmvd); - - if (rmvd != NULL) { - free(rmvd); - } - - /* add the spare to the used slice list */ - error = devconfig_get_name(spare, &spname); - if (error == 0) { - error = add_used_slice_by_name(spname); - } - } - } - - } else { - - /* no spare, give up on layout */ - oprintf(OUTPUT_TERSE, - gettext(" <---Failed: insufficient suitable spares\n")); - - volume_set_error( - gettext("failed to find sufficient spares for HSP\n")); - - error = -1; - } - - return (error); -} diff --git a/usr/src/cmd/lvm/metassist/layout/layout_hsp.h b/usr/src/cmd/lvm/metassist/layout/layout_hsp.h deleted file mode 100644 index 7c50cc921395..000000000000 --- a/usr/src/cmd/lvm/metassist/layout/layout_hsp.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _LAYOUT_HSP_H -#define _LAYOUT_HSP_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#ifdef __cplusplus -extern "C" { -#endif - -#include "volume_devconfig.h" -#include "volume_dlist.h" - -extern int layout_hsp( - devconfig_t *request, - devconfig_t *hsprequest, - dlist_t *devices, - dlist_t **results); - -#ifdef __cplusplus -} -#endif - -#endif /* _LAYOUT_HSP_H */ diff --git a/usr/src/cmd/lvm/metassist/layout/layout_messages.c b/usr/src/cmd/lvm/metassist/layout/layout_messages.c deleted file mode 100644 index c652972b7bf9..000000000000 --- a/usr/src/cmd/lvm/metassist/layout/layout_messages.c +++ /dev/null @@ -1,407 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include -#include - -#include "volume_error.h" -#include "volume_output.h" -#include "volume_string.h" - -#include "layout_messages.h" - -/* - * FUNCTION: print_layout_volume_msg(char *type, uint64_t nbytes) - * - * PURPOSE: Prints a generic message indicating the start of the - * layout process for a volume of the indicated type and - * capacity. - */ -void -print_layout_volume_msg( - char *type, - uint64_t nbytes) -{ - char *spstr = NULL; - - (void) bytes_to_sizestr(nbytes, &spstr, universal_units, B_FALSE); - - oprintf(OUTPUT_VERBOSE, - gettext(" ->Layout a %s with capacity %s\n"), - type, spstr); - - free(spstr); -} - -/* - * FUNCTION: print_layout_explicit_msg(char *type) - * - * PURPOSE: Prints a generic message indicating the start of the - * layout population process using explicit components - * for a volume of the indicated type. - */ -void -print_layout_explicit_msg( - char *type) -{ - oprintf(OUTPUT_TERSE, - gettext(" ->Layout a %s with explicitly specified " - "components\n"), - type); -} - -/* - * FUNCTION: print_layout_explicit_added_msg(char *comp) - * - * PURPOSE: Prints a generic message indicating the named component - * was added to a volume. - */ -void -print_layout_explicit_added_msg( - char *comp) -{ - oprintf(OUTPUT_TERSE, gettext(" ---->added '%s'\n"), comp); -} - -/* - * FUNCTION: print_success_msg() - * - * PURPOSE: Prints a generic layout success message. - */ -void -print_layout_success_msg() -{ - oprintf(OUTPUT_TERSE, gettext(" <-Success!\n")); -} - -/* - * FUNCTION: print_insufficient_resources_msg(char *type) - * - * PURPOSE: Prints a message indicating that there are insufficient - * resources. - * - * Also sets the metassist error string indicating why - * the metassist command failed. The volume type is included - * for context in this message. - */ -void -print_insufficient_resources_msg( - char *type) -{ - oprintf(OUTPUT_TERSE, - gettext(" <-Failed: insufficient resources available\n")); - - volume_set_error( - gettext("insufficient resources available to complete " - "requested %s\n"), - type); -} - -/* - * FUNCTION: print_insufficient_hbas_msg(int n) - * - * PURPOSE: Prints a status message indicating that there are insufficient - * HBAs and that only 'n' are available. - * - * Used to indicate strategy selection during layouts. - */ -void -print_insufficient_hbas_msg( - int n) -{ - if (n == 0) { - oprintf(OUTPUT_VERBOSE, - gettext(" <--Failed: no HBA has sufficient disks\n")); - } else if (n == 1) { - oprintf(OUTPUT_VERBOSE, - gettext(" <--Failed: only 1 HBA has sufficient disks\n")); - } else { - oprintf(OUTPUT_VERBOSE, - gettext(" <--Failed: only %d HBAs have sufficient disks\n"), - n); - } -} - -/* - * FUNCTION: print_insufficient_disks_msg(int n) - * - * PURPOSE: Prints a status message indicating that there are insufficient - * disks and that only 'n' are available. - * - * Used to indicate strategy selection during layouts. - */ -void -print_insufficient_disks_msg( - int n) -{ - if (n == 0) { - oprintf(OUTPUT_VERBOSE, - gettext(" <--Failed: no disks available\n"), - n); - } else if (n == 1) { - oprintf(OUTPUT_VERBOSE, - gettext(" <--Failed: only 1 disk available\n"), - n); - } else { - oprintf(OUTPUT_VERBOSE, - gettext(" <--Failed: only %d disks available\n"), - n); - } -} - -/* - * FUNCTION: print_no_hbas_msg() - * - * PURPOSE: Prints a layout failure due to no usable HBAs message. - */ -void -print_no_hbas_msg() -{ - oprintf(OUTPUT_TERSE, - gettext(" There are no usable HBAs.\n")); -} - -/* - * FUNCTION: print_debug_failure_msg(char *type, char *err) - * - * PURPOSE: Prints a generic message for unexpected failures - * during layout. - */ -void -print_debug_failure_msg( - char *type, - char *err) -{ - oprintf(OUTPUT_DEBUG, - gettext(" layout of %s failed: %s\n"), - type, err); -} - -/* - * FUNCTION: print_insufficient_components_msg(int ncomp) - * - * INPUT: ncomp - number of available components - * - * PURPOSE: Helper to print out a message indicating that there - * are insufficient components for a volume, only ncomps - * are actually available. - */ -void -print_insufficient_components_msg( - int ncomp) -{ - oprintf(OUTPUT_VERBOSE, - gettext(" <---Failed: only found %d components\n"), ncomp); -} - -/* - * FUNCTION: print_hba_insufficient_space_msg(char *name, uint64_t nbytes) - * - * INPUT: name - a char * HBA name - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Helper to print out a message indicating the the HBA has - * insufficient space for use by the mirror layout strategy. - */ -void -print_hba_insufficient_space_msg( - char *name, - uint64_t nbytes) -{ - char *spstr = NULL; - - (void) bytes_to_sizestr(nbytes, &spstr, universal_units, B_FALSE); - - oprintf(OUTPUT_VERBOSE, - gettext(" <--Failed: '%s' only has %s available\n"), - name, spstr); - - free(spstr); -} - -/* - * FUNCTION: print_insufficient_capacity_msg(uint64_t nbytes) - * - * INPUT: nbytes - available capacity in bytes - * - * PURPOSE: Helper to print out a message indicating that there - * is insufficient space for a volume, only nbytes are - * actually available. - */ -void -print_insufficient_capacity_msg( - uint64_t nbytes) -{ - char *spstr = NULL; - - (void) bytes_to_sizestr(nbytes, &spstr, universal_units, B_FALSE); - - oprintf(OUTPUT_VERBOSE, - gettext(" <---Failed: only found %s capacity\n"), spstr); - - free(spstr); -} - -/* - * FUNCTION: print_layout_submirrors_msg(char *type, uint64_t nbytes, - * int nsubs) - * - * PURPOSE: Prints a generic status message indicating that layout of - * nsub submirrors of the indicated type and size has begun. - */ -void -print_layout_submirrors_msg( - char *type, - uint64_t nbytes, - int nsubs) -{ - char *spstr = NULL; - - (void) bytes_to_sizestr(nbytes, &spstr, universal_units, B_FALSE); - - oprintf(OUTPUT_TERSE, - gettext(" -->Layout %d %s submirrors with capacity %s\n"), - nsubs, type, spstr); - - free(spstr); -} - -/* - * FUNCTION: print_layout_submirrors_failed_msg(char *type, int count, - * int nsubs) - * - * PURPOSE: Prints a generic status message indicating that only count - * submirrors (out of nsubs) of the indicated type could be - * composed. - */ -void -print_layout_submirrors_failed_msg( - char *type, - int count, - int nsubs) -{ - if (count == 0) { - oprintf(OUTPUT_VERBOSE, - gettext(" <---Failed, no %s submirrors could " - "be composed.\n"), - type); - } else { - oprintf(OUTPUT_VERBOSE, - gettext(" <---Failed, only %d of %d %s submirror(s) " - "could be composed.\n"), - count, nsubs, type); - } -} - -/* - * FUNCTION: print_populate_volume_msg(char *type, uint64_t nbytes) - * - * PURPOSE: Prints a generic message indicating a population process - * for a volume of the indicated type and size is beginning. - */ -void -print_populate_volume_msg( - char *type, - uint64_t nbytes) -{ - char *spstr = NULL; - - (void) bytes_to_sizestr(nbytes, &spstr, universal_units, B_FALSE); - - oprintf(OUTPUT_TERSE, - gettext(" --->Populate a %s of capacity %s\n"), - type, spstr); - - free(spstr); -} - -/* - * FUNCTION: print_populate_volume_ncomps_msg(char *type, uint64_t nbytes, - * int ncomps) - * - * PURPOSE: Prints a generic message indicating a population process - * for a volume of the indicated type, size and number of - * components is beginning. - */ -void -print_populate_volume_ncomps_msg( - char *type, - uint64_t nbytes, - int ncomps) -{ - char *spstr = NULL; - - (void) bytes_to_sizestr(nbytes, &spstr, universal_units, B_FALSE); - - oprintf(OUTPUT_TERSE, - gettext(" --->Populate a %s of capacity %s (%d components)\n"), - type, spstr, ncomps); - - free(spstr); -} - -/* - * FUNCTION: print_populate_success_msg() - * - * PURPOSE: Prints a generic message indicating a population process - * completed successfully. - */ -void -print_populate_success_msg() -{ - oprintf(OUTPUT_TERSE, - gettext(" <---Success!\n")); -} - -/* - * FUNCTION: print_populate_choose_slices_msg() - * - * PURPOSE: Prints a generic message indicating a population process - * is beginning to choose slices. - */ -void -print_populate_choose_slices_msg() -{ - oprintf(OUTPUT_VERBOSE, - gettext(" choosing \"best\" slices from " - "those available...\n")); -} - -/* - * FUNCTION: print_populate_no_slices_msg() - * - * PURPOSE: Prints a layout failure due to no available slices message. - */ -void -print_populate_no_slices_msg() -{ - oprintf(OUTPUT_VERBOSE, - gettext(" <---Failed: there are no slices available.\n")); -} diff --git a/usr/src/cmd/lvm/metassist/layout/layout_messages.h b/usr/src/cmd/lvm/metassist/layout/layout_messages.h deleted file mode 100644 index d9c7531b215a..000000000000 --- a/usr/src/cmd/lvm/metassist/layout/layout_messages.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _LAYOUT_MESSAGES_H -#define _LAYOUT_MESSAGES_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#ifdef __cplusplus -extern "C" { -#endif - -#include - -/* - * Functions to print out progress, status and error messages that - * are shared by layout_concat.c, layout_hsp.c, layout_mirror.c, - * layout_stripe.c - */ -extern void print_layout_success_msg(); -extern void print_layout_volume_msg(char *type, uint64_t nbytes); -extern void print_layout_explicit_msg(char *type); -extern void print_layout_explicit_added_msg(char *comp); -extern void print_layout_submirrors_msg(char *type, uint64_t nbytes, int nsubs); -extern void print_layout_submirrors_failed_msg(char *type, int count, - int nsubs); - -extern void print_populate_volume_msg(char *type, uint64_t nbytes); -extern void print_populate_volume_ncomps_msg(char *type, uint64_t nbytes, - int ncomps); -extern void print_populate_success_msg(); -extern void print_populate_choose_slices_msg(); -extern void print_populate_no_slices_msg(); - -extern void print_no_hbas_msg(); -extern void print_debug_failure_msg(); - -extern void print_insufficient_resources_msg(char *type); -extern void print_insufficient_hbas_msg(int n); -extern void print_insufficient_disks_msg(int n); -extern void print_hba_insufficient_space_msg(char *name, uint64_t nbytes); -extern void print_insufficient_capacity_msg(uint64_t nbytes); -extern void print_insufficient_components_msg(int ncomp); - -#ifdef __cplusplus -} -#endif - -#endif /* _LAYOUT_MESSAGES_H */ diff --git a/usr/src/cmd/lvm/metassist/layout/layout_mirror.c b/usr/src/cmd/lvm/metassist/layout/layout_mirror.c deleted file mode 100644 index e0d5f48bbe9a..000000000000 --- a/usr/src/cmd/lvm/metassist/layout/layout_mirror.c +++ /dev/null @@ -1,2413 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include - -#include "volume_error.h" -#include "volume_dlist.h" -#include "volume_output.h" - -#include "layout_concat.h" -#include "layout_device_cache.h" -#include "layout_device_util.h" -#include "layout_discovery.h" -#include "layout_dlist_util.h" -#include "layout_messages.h" -#include "layout_request.h" -#include "layout_slice.h" -#include "layout_stripe.h" -#include "layout_svm_util.h" - -#define _LAYOUT_MIRROR_C - -static int layout_stripe_submirrors( - devconfig_t *request, - dlist_t *cursubs, - uint64_t nbytes, - uint16_t nsubs, - dlist_t **results); - -static int layout_concat_submirrors( - devconfig_t *request, - dlist_t *cursubs, - uint64_t nbytes, - uint16_t nsubs, - dlist_t **results); - -static int compose_stripe_per_hba( - devconfig_t *request, - dlist_t *cursubs, - dlist_t *hbas, - uint64_t nbytes, - uint16_t nsubs, - uint16_t ncomp, - uint16_t mincomp, - dlist_t **results); - -static int compose_stripes_across_hbas( - devconfig_t *request, - dlist_t *cursubs, - dlist_t *hbas, - dlist_t *disks, - uint64_t nbytes, - uint16_t nsubs, - uint16_t ncomp, - uint16_t mincomp, - dlist_t **results); - -static int compose_stripes_within_hba( - devconfig_t *request, - dlist_t *cursubs, - dlist_t *hbas, - uint64_t nbytes, - uint16_t nsubs, - uint16_t ncomp, - uint16_t mincomp, - dlist_t **results); - -static int compose_concat_per_hba( - devconfig_t *request, - dlist_t *cursubs, - dlist_t *hbas, - uint64_t nbytes, - uint16_t nsubs, - dlist_t **results); - -static int compose_concats_across_hbas( - devconfig_t *request, - dlist_t *cursubs, - dlist_t *hbas, - dlist_t *disks, - uint64_t nbytes, - uint16_t nsubs, - dlist_t **results); - -static int compose_concats_within_hba( - devconfig_t *request, - dlist_t *cursubs, - dlist_t *hba, - uint64_t nbytes, - uint16_t nsubs, - dlist_t **results); - -static int assemble_mirror( - devconfig_t *request, - dlist_t *subs, - devconfig_t **mirror); - -static int remove_used_disks( - dlist_t **disks, - devconfig_t *volume); - -static int volume_shares_disk( - dm_descriptor_t disk, - devconfig_t *volume, - boolean_t *bool); - -static int select_mpxio_hbas( - dlist_t *hbas, - dlist_t **mpxio_hbas); - -static int set_explicit_submirror_names( - dlist_t *reqs, - dlist_t *subs); - -static int set_explicit_submirror_name( - devconfig_t *req, - devconfig_t *sub); - -/* - * FUNCTION: layout_mirror(devconfig_t *request, nbytes, dlist_t **results) - * - * INPUT: request - pointer to a request devconfig_t - * nsubs - number of submirrors - * nbytes - desired mirror size - * - * OUTPUT: results - pointer to a list of volume devconfig_t results - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Main driver to handle a mirror request that does not specify - * subcomponents. - * - * Striped submirrors are tried first, then concats. - */ -int -layout_mirror( - devconfig_t *request, - uint16_t nsubs, - uint64_t nbytes, - dlist_t **results) -{ - dlist_t *subs = NULL; - dlist_t *item = NULL; - boolean_t usehsp = B_FALSE; - int error = 0; - - if ((error = get_volume_faultrecov(request, &usehsp)) != 0) { - if (error != ERR_ATTR_UNSET) { - return (error); - } - error = 0; - } - - print_layout_volume_msg(devconfig_type_to_str(TYPE_MIRROR), nbytes); - - /* prefer stripe submirrors */ - if ((error = layout_stripe_submirrors( - request, NULL, nbytes, nsubs, &subs)) != 0) { - return (error); - } - - if (subs == NULL) { - /* second chance: mirrored concats */ - if ((error = layout_concat_submirrors( - request, NULL, nbytes, nsubs, &subs)) != 0) { - return (error); - } - } - - if (subs != NULL) { - - devconfig_t *mirror = NULL; - dlist_t *iter = NULL; - - /* unset submirror names prior to final assembly */ - for (iter = subs; iter != NULL; iter = iter->next) { - devconfig_t *sub = (devconfig_t *)iter->obj; - char *name = NULL; - - (void) devconfig_get_name(sub, &name); - release_volume_name(name); - (void) devconfig_set_name(sub, ""); - } - - error = assemble_mirror(request, subs, &mirror); - if (error == 0) { - - if ((item = dlist_new_item(mirror)) == NULL) { - error = ENOMEM; - } else { - *results = dlist_append(item, *results, AT_TAIL); - - /* remember submirrors that need HSPs */ - if (usehsp == B_TRUE) { - error = add_to_hsp_list( - devconfig_get_components(mirror)); - } - - print_layout_success_msg(); - } - } else { - /* cleanup submirrors */ - dlist_free_items(subs, free_devconfig_object); - subs = NULL; - } - - } else if (error != 0) { - - print_debug_failure_msg(devconfig_type_to_str(TYPE_MIRROR), - get_error_string(error)); - - } else { - - print_insufficient_resources_msg( - devconfig_type_to_str(TYPE_MIRROR)); - error = -1; - } - - return (error); -} - -/* - * FUNCTION: populate_explicit_mirror(devconfig_t *request, - * dlist_t **results) - * - * INPUT: request - pointer to a request devconfig_t - * - * OUTPUT: results - pointer to a list of volume devconfig_t results - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Processes the input mirror request specifying explicit layout - * constraints on the submirrors. - * - * Primary submirror constraint is explicit type, either - * stripe or concat. Submirror types may be mixed. - * - * Submirror sizes or components may be specified explicitly. - * - * If the mirror does not specify a size, assume the first explicit - * submirror size is the desired size. If a submirror does not - * specify a size or components, use the mirror size. - * - * Scan the submirror requests: those with specific components - * get assembled as encountered. The remainder are grouped by - * type and handled by layout_stripe_submirrors() or - * layout_concat_submirrors(). - * - * If all specified submirrors can be assembled, the final mirror - * is assembled and appended to the results list. - */ -int -populate_explicit_mirror( - devconfig_t *request, - dlist_t **results) -{ - dlist_t *composed = NULL; - dlist_t *list = NULL; - dlist_t *iter = NULL; - dlist_t *concats_by_size = NULL; - dlist_t *stripes_by_size = NULL; - int nsubs = 0; - int error = 0; - uint64_t msize = 0; - boolean_t usehsp = B_FALSE; - - list = devconfig_get_components(request); - nsubs = dlist_length(list); - - if ((error = get_volume_faultrecov(request, &usehsp)) != 0) { - if (error != ERR_ATTR_UNSET) { - return (error); - } - error = 0; - } - - if ((error = devconfig_get_size(request, &msize)) != 0) { - if (error == ERR_ATTR_UNSET) { - error = 0; - msize = 0; - } else { - return (error); - } - } - - print_layout_explicit_msg(devconfig_type_to_str(TYPE_MIRROR)); - - /* - * Scan the list of specified submirrors, collect those that only - * specify size (or no size). Process those with explicit components - * immediately. - */ - composed = NULL; - for (iter = list; (iter != NULL) && (error == 0); iter = iter->next) { - - devconfig_t *comp = (devconfig_t *)iter->obj; - component_type_t ctype = TYPE_UNKNOWN; - dlist_t *clist = NULL; - uint64_t csize = 0; - dlist_t *item = NULL; - - (void) devconfig_get_type(comp, &ctype); - (void) devconfig_get_size(comp, &csize); - clist = devconfig_get_components(comp); - - if (clist != NULL) { - - /* components specified */ - - if (ctype == TYPE_STRIPE) { - error = populate_explicit_stripe(comp, &item); - } else { - error = populate_explicit_concat(comp, &item); - } - - if (error == 0) { - set_explicit_submirror_name( - comp, (devconfig_t *)item->obj); - composed = dlist_append(item, composed, AT_TAIL); - } - - } else { - - /* no components specified */ - - /* if no size is specified, it needs to be inferred */ - - if (msize == 0) { - /* mirror specified no size, first explicit submirror */ - /* size is assumed to be the desired mirror size */ - msize = csize; - } - if (csize == 0) { - /* this submirror specified no size, use mirror size */ - devconfig_set_size(comp, msize); - } - - if ((item = dlist_new_item(comp)) == NULL) { - error = ENOMEM; - break; - } - - if (ctype == TYPE_STRIPE) { - stripes_by_size = dlist_append( - item, stripes_by_size, AT_TAIL); - } else { - concats_by_size = dlist_append( - item, concats_by_size, AT_TAIL); - } - - } - } - - /* compose stripes specified by size */ - if ((error == 0) && (stripes_by_size != NULL)) { - uint16_t n = dlist_length(stripes_by_size); - dlist_t *stripes = NULL; - if ((error = layout_stripe_submirrors( - request, composed, msize, n, &stripes)) == 0) { - - /* adjust stripe names */ - set_explicit_submirror_names(stripes_by_size, stripes); - composed = dlist_append(stripes, composed, AT_TAIL); - - } else { - /* these stripes failed, skip concats_by_size */ - dlist_free_items(stripes, free_devconfig_object); - dlist_free_items(concats_by_size, NULL); - concats_by_size = NULL; - } - dlist_free_items(stripes_by_size, NULL); - } - - /* compose concats specified by size */ - if ((error == 0) && (concats_by_size != NULL)) { - uint16_t n = dlist_length(concats_by_size); - dlist_t *concats = NULL; - if ((error = layout_concat_submirrors( - request, composed, msize, n, &concats)) == 0) { - - /* adjust concat names */ - set_explicit_submirror_names(concats_by_size, concats); - composed = dlist_append(concats, composed, AT_TAIL); - - } else { - - /* these concats failed */ - dlist_free_items(concats, free_devconfig_object); - } - - dlist_free_items(concats_by_size, NULL); - } - - if ((composed != NULL) && ((dlist_length(composed) == nsubs))) { - - /* assemble final mirror */ - - devconfig_t *mirror = NULL; - dlist_t *item = NULL; - - if ((error = assemble_mirror(request, composed, &mirror)) == 0) { - - if ((item = dlist_new_item(mirror)) == NULL) { - error = ENOMEM; - } else { - *results = dlist_append(item, *results, AT_TAIL); - if (usehsp == B_TRUE) { - error = add_to_hsp_list( - devconfig_get_components(mirror)); - } - print_layout_success_msg(); - } - } - - } else if (error != 0) { - - print_debug_failure_msg( - devconfig_type_to_str(TYPE_MIRROR), - get_error_string(error)); - - } else { - - dlist_free_items(composed, free_devconfig_object); - print_insufficient_resources_msg( - devconfig_type_to_str(TYPE_MIRROR)); - error = -1; - } - - return (error); -} - -/* - * FUNCTION: assemble_mirror(devconfig_t *request, dlist_t *subs, - * devconfig_t **mirror) - * - * INPUT: request - pointer to a devconfig_t of the current request - * subs - pointer to a list of composed submirrors - * - * OUPUT: mirror - pointer to a devconfig_t to hold final mirror - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Helper which creates and populates a mirror devconfig_t - * struct using information from the input request and the - * list of submirror components. - * - * Determines the name of the mirror either from the request - * or from the default naming scheme and assigns names to - * unnamed submirrors according to the default naming scheme. - * - * Sets the read and write strategies, and the resync pass - * number for the mirror if values are specified in the request. - * - * Attaches the input list of submirrors to the devconfig. - */ -static int -assemble_mirror( - devconfig_t *request, - dlist_t *subs, - devconfig_t **mirror) -{ - dlist_t *iter = NULL; - char *name = NULL; - int error = 0; - - if ((error = new_devconfig(mirror, TYPE_MIRROR)) == 0) { - /* set stripe name, use requested name if specified */ - if ((error = devconfig_get_name(request, &name)) != 0) { - if (error != ERR_ATTR_UNSET) { - volume_set_error(gettext("error getting requested name\n")); - } else { - error = 0; - } - } - - if (error == 0) { - if (name == NULL) { - if ((error = get_next_volume_name(&name, - TYPE_MIRROR)) == 0) { - error = devconfig_set_name(*mirror, name); - free(name); - /* get name for generating submirror names below */ - error = devconfig_get_name(*mirror, &name); - } - } else { - error = devconfig_set_name(*mirror, name); - } - } - } - - /* assign name to any unnamed submirror */ - for (iter = subs; - (error == 0) && (iter != NULL); - iter = iter->next) { - - devconfig_t *sub = (devconfig_t *)iter->obj; - char *subname = NULL; - - error = devconfig_get_name(sub, &subname); - if ((error == ERR_ATTR_UNSET) || (subname == NULL) || - (*subname == '\0')) { - ((error = get_next_submirror_name(name, &subname)) != 0) || - (error = devconfig_set_name(sub, subname)); - free(subname); - } - } - - if (error == 0) { - mirror_read_strategy_t read = 0; - if ((error = get_mirror_read_strategy(request, &read)) == 0) { - error = devconfig_set_mirror_read(*mirror, read); - } else if (error == ERR_ATTR_UNSET) { - error = 0; - } - } - - if (error == 0) { - mirror_write_strategy_t write = 0; - if ((error = get_mirror_write_strategy(request, &write)) == 0) { - error = devconfig_set_mirror_write(*mirror, write); - } else if (error == ERR_ATTR_UNSET) { - error = 0; - } - } - - if (error == 0) { - uint16_t pass = 0; - if ((error = get_mirror_pass(request, &pass)) == 0) { - error = devconfig_set_mirror_pass(*mirror, pass); - } else if (error == ERR_ATTR_UNSET) { - error = 0; - } - } - - /* arrange submirrors in ascending size order */ - if (error == 0) { - dlist_t *sorted = NULL; - dlist_t *next = NULL; - - iter = subs; - while (iter != NULL) { - - next = iter->next; - iter->next = NULL; - iter->prev = NULL; - - sorted = dlist_insert_ordered(iter, - sorted, ASCENDING, compare_devconfig_sizes); - - iter = next; - } - subs = sorted; - } - - if (error == 0) { - devconfig_set_components(*mirror, subs); - } else { - free_devconfig(*mirror); - *mirror = NULL; - } - - return (error); -} - -/* - * FUNCTION: layout_stripe_submirrors(devconfig_t *request, dlist_t *cursubs, - * uint64_t nbytes, uint16_t nsubs, dlist_t **results) - * - * INPUT: request - pointer to a devconfig_t of the current request - * cursubs - pointer to a list of already composed submirrors - * these may affect disk and HBA choices for new - * submirrors being composed and are passed along - * into the component selection functions. - * nbytes - the desired capacity for the stripes - * - * OUPUT: results - pointer to a list of composed volumes - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Main layout driver for composing stripe submirrors. - * - * Attempts to construct nsub submirrors of size nbytes. - * - * Several different layout strategies are tried in order - * of preference until one succeeds or there are none left. - * - * 1 - mirror with all stripes on the MPXIO "controller" - * . requires MPXIO to be enabled - * . requires nsubs * mincomp available disks on the - * MPXIO HBA - * - * 2 - mirror with stripes within separate HBAs of same type - * . requires nsubs HBAs with mincomp disks - * . stripe width is driven by number of disks on HBA - * - * 3 - mirror with stripes across HBAs of same type - * . requires mincomp HBAs with nsubs disks - * (each stripe has a disk per HBA) - * . stripe width is driven by number of HBAs - * - * 4 - mirror with stripes within separate HBAs of mixed type - * . requires nsubs HBAs with mincomp disks - * . stripe width is driven by number of disks on HBA - * - * 5 - mirror with stripes across HBAs of mixed type - * . requires mincomp HBAs with nsubs disks - * (each stripe has a disk per HBA) - * . stripe width is driven by number of HBAs - * - * 6 - mirror with all stripes within the same HBA - * . requires an HBA with mincomp * nsubs disks - * - * get available HBAs - * - * group HBAs by characteristics - * for (each HBA grouping) and (nsub stripes not composed) { - * select next HBA group - * for (strategy[1,2,3]) and (nsub stripes not composed) { - * compose nsub stripes using HBAs in group - * } - * } - * - * if (nsub stripes not composed) { - * for (strategy[4,5,6]) and (nsub stripes not composed) { - * compose nsub stripes using all HBAs - * } - * } - * - * if (all stripes composed) { - * append composed stripes to results - * } - * - */ -static int -layout_stripe_submirrors( - devconfig_t *request, - dlist_t *cursubs, - uint64_t nbytes, - uint16_t nsubs, - dlist_t **results) -{ - /* - * these enums define the # of strategies and the preference order - * in which they are tried - */ - typedef enum { - ALL_STRIPES_ON_MPXIO = 0, - STRIPE_PER_SIMILAR_HBA, - STRIPE_ACROSS_SIMILAR_HBAS, - N_SIMILAR_HBA_STRATEGIES - } similar_hba_strategy_order_t; - - typedef enum { - STRIPE_PER_ANY_HBA = 0, - STRIPE_ACROSS_ANY_HBAS, - STRIPE_WITHIN_ANY_HBA, - N_ANY_HBA_STRATEGIES - } any_hba_strategy_order_t; - - dlist_t *usable_hbas = NULL; - dlist_t *similar_hba_groups = NULL; - dlist_t *iter = NULL; - dlist_t *subs = NULL; - - boolean_t usehsp = B_FALSE; - uint16_t mincomp = 0; - uint16_t maxcomp = 0; - - int error = 0; - - (error = get_usable_hbas(&usable_hbas)); - if (error != 0) { - return (error); - } - - print_layout_submirrors_msg(devconfig_type_to_str(TYPE_STRIPE), - nbytes, nsubs); - - if (dlist_length(usable_hbas) == 0) { - print_no_hbas_msg(); - volume_set_error(gettext("There are no usable HBAs.")); - return (-1); - } - - similar_hba_groups = NULL; - ((error = group_similar_hbas(usable_hbas, &similar_hba_groups)) != 0) || - - /* - * determine the min/max number of stripe components - * based on the request, the diskset defaults or the - * global defaults. These are absolute limits, the - * actual values are determined by the number of HBAs - * and/or disks available. - */ - (error = get_stripe_min_comp(request, &mincomp)) || - (error = get_stripe_max_comp(request, &maxcomp)) || - (error = get_volume_faultrecov(request, &usehsp)); - if (error != 0) { - return (error); - } - - for (iter = similar_hba_groups; - (error == 0) && (subs == NULL) && (iter != NULL); - iter = iter->next) { - - dlist_t *hbas = (dlist_t *)iter->obj; - - similar_hba_strategy_order_t order; - - for (order = ALL_STRIPES_ON_MPXIO; - (order < N_SIMILAR_HBA_STRATEGIES) && - (subs == NULL) && (error == 0); - order++) { - - dlist_t *selhbas = NULL; - dlist_t *disks = NULL; - int n = 0; - - switch (order) { - - case ALL_STRIPES_ON_MPXIO: - - if (is_mpxio_enabled() == B_TRUE) { - dlist_t *mpxio_hbas = NULL; - - /* see if any HBA supports MPXIO */ - error = select_mpxio_hbas(hbas, &mpxio_hbas); - if ((error == 0) && (mpxio_hbas != NULL)) { - -/* BEGIN CSTYLED */ -oprintf(OUTPUT_TERSE, -gettext(" -->Strategy 1: use %d-%d MPXIO disks\n"), - mincomp * nsubs, maxcomp * nsubs); -/* END CSTYLED */ - - /* see if MPXIO HBA has enough disks */ - error = select_hbas_with_n_disks( - request, mpxio_hbas, (mincomp * nsubs), - &selhbas, &disks); - - if ((error == 0) && (dlist_length(selhbas) > 0)) { - error = compose_stripes_within_hba( - request, cursubs, mpxio_hbas, nbytes, - nsubs, maxcomp, mincomp, &subs); - } else { - print_insufficient_hbas_msg(n); - } - } - - dlist_free_items(mpxio_hbas, NULL); - } - - break; - - case STRIPE_PER_SIMILAR_HBA: - - error = select_hbas_with_n_disks( - request, hbas, mincomp, &selhbas, &disks); - - if (error == 0) { - -/* BEGIN CSTYLED */ -oprintf(OUTPUT_TERSE, - gettext(" -->Strategy 2: use %d-%d disks from %d similar HBAs - stripe per HBA\n"), - mincomp, maxcomp, nsubs); -/* END CSTYLED */ - - if ((n = dlist_length(selhbas)) >= nsubs) { - error = compose_stripe_per_hba( - request, cursubs, selhbas, nbytes, - nsubs, maxcomp, mincomp, &subs); - } else { - print_insufficient_hbas_msg(n); - } - } - - break; - - case STRIPE_ACROSS_SIMILAR_HBAS: - - error = select_hbas_with_n_disks( - request, hbas, nsubs, &selhbas, &disks); - - if (error == 0) { - -/* BEGIN CSTYLED */ -oprintf(OUTPUT_TERSE, -gettext(" -->Strategy 3: use %d disks from %d-%d similar HBAs - stripe across HBAs \n"), - nsubs, mincomp, maxcomp); -/* END CSTYLED */ - - if ((n = dlist_length(selhbas)) >= mincomp) { - error = compose_stripes_across_hbas( - request, cursubs, selhbas, disks, - nbytes, nsubs, maxcomp, mincomp, &subs); - } else { - print_insufficient_hbas_msg(n); - } - } - - break; - - default: - break; - } - - dlist_free_items(selhbas, NULL); - dlist_free_items(disks, NULL); - } - } - - for (iter = similar_hba_groups; iter != NULL; iter = iter->next) { - dlist_free_items((dlist_t *)iter->obj, NULL); - } - dlist_free_items(similar_hba_groups, NULL); - - /* retry using all available HBAs */ - if (subs == NULL) { - - any_hba_strategy_order_t order; - - for (order = STRIPE_PER_ANY_HBA; - (order < N_ANY_HBA_STRATEGIES) && - (subs == NULL) && (error == 0); - order++) { - - dlist_t *selhbas = NULL; - dlist_t *disks = NULL; - int n = 0; - - switch (order) { - - case STRIPE_PER_ANY_HBA: - - error = select_hbas_with_n_disks( - request, usable_hbas, nsubs, &selhbas, &disks); - - if (error == 0) { - -/* BEGIN CSTYLED */ -oprintf(OUTPUT_TERSE, -gettext(" -->Strategy 4: use %d-%d disks from any %d HBAs - stripe per HBA\n"), - mincomp, maxcomp, nsubs); -/* END CSTYLED */ - - if ((n = dlist_length(selhbas)) >= nsubs) { - error = compose_stripe_per_hba( - request, cursubs, selhbas, nbytes, - nsubs, maxcomp, mincomp, &subs); - } else { - print_insufficient_hbas_msg(n); - } - } - - break; - - case STRIPE_ACROSS_ANY_HBAS: - - error = select_hbas_with_n_disks( - request, usable_hbas, nsubs, &selhbas, &disks); - - if (error == 0) { - -/* BEGIN CSTYLED */ -oprintf(OUTPUT_TERSE, -gettext(" -->Strategy 5: use %d disks from %d-%d HBAs - stripe across HBAs \n"), - nsubs, mincomp, maxcomp); -/* END CSTYLED */ - - if ((n = dlist_length(selhbas)) >= mincomp) { - error = compose_stripes_across_hbas( - request, cursubs, selhbas, disks, - nbytes, nsubs, maxcomp, mincomp, &subs); - } else { - print_insufficient_hbas_msg(n); - } - } - - break; - - case STRIPE_WITHIN_ANY_HBA: - - error = select_hbas_with_n_disks( - request, usable_hbas, (mincomp * nsubs), - &selhbas, &disks); - - if (error == 0) { - -/* BEGIN CSTYLED */ -oprintf(OUTPUT_TERSE, -gettext(" -->Strategy 6: use %d-%d disks from any single HBA - %d stripes within HBA\n"), - mincomp * nsubs, maxcomp * nsubs, nsubs); -/* END CSTYLED */ - if ((n = dlist_length(selhbas)) > 0) { - error = compose_stripes_within_hba( - request, cursubs, selhbas, nbytes, - nsubs, maxcomp, mincomp, &subs); - } else { - print_insufficient_hbas_msg(n); - } - } - - break; - - default: - break; - } - - dlist_free_items(selhbas, NULL); - dlist_free_items(disks, NULL); - } - } - - if (error == 0) { - *results = dlist_append(subs, *results, AT_TAIL); - } - return (error); -} - -/* - * FUNCTION: layout_concat_submirrors(devconfig_t *request, dlist_t *cursubs, - * uint64_t nbytes, uint16_t nsubs, dlist_t **results) - * - * INPUT: request - pointer to a devconfig_t of the current request - * cursubs - pointer to a list of already composed submirrors - * nbytes - the desired capacity for the concats - * - * OUPUT: results - pointer to a list of composed volumes - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Main layout driver for composing concat submirrors. - * - * Attempts to construct nsub submirrors of size nbytes. - * - * Several different layout strategies are tried in order - * of preference until one succeeds or there are none left. - * - * 1 - mirror with all concats on the MPXIO "controller" - * . requires MPXIO to be enabled - * . requires nsubs available disks on the MPXIO HBA - * - * 2 - mirror with concats on separate HBAs of same type - * . requires nsubs HBAs with available disks - * - * 3 - mirror with concats across HBAs of same type - * . requires an HBA with at least 1 available disk - * - * 4 - mirror with concats on separate HBAs of mixed type - * . requires nsubs HBAs with available disks - * - * 5 - mirror with concats across HBAs of mixed type - * . requires an HBA with at least 1 available disk - * - * 6 - mirror with all concats on the same HBA - * . requires an HBA with at least nsubs available disks - * - * get available HBAs - * - * group HBAs by characteristics - * for (each HBA grouping) and (nsub concats not composed) { - * select next HBA group - * for (strategy[1,2,3]) and (nsub concats not composed) { - * compose nsub concats, nbytes in size - * } - * } - * - * if (nsub concats not composed) { - * for (strategy[4,5,6]) and (nsub concats not composed) { - * compose nsub concats, nbytes in size - * } - * } - * - * if (all concats composed) { - * append composed concats to results - * } - * - */ -static int -layout_concat_submirrors( - devconfig_t *request, - dlist_t *cursubs, - uint64_t nbytes, - uint16_t nsubs, - dlist_t **results) -{ - /* - * these enums define the # of strategies and the preference order - * in which they are tried - */ - typedef enum { - ALL_CONCATS_ON_MPXIO = 0, - CONCAT_PER_SIMILAR_HBA, - CONCAT_ACROSS_SIMILAR_HBAS, - N_SIMILAR_HBA_STRATEGIES - } similar_hba_strategy_order_t; - - typedef enum { - CONCAT_PER_ANY_HBA = 0, - CONCAT_ACROSS_ANY_HBAS, - CONCAT_WITHIN_ANY_HBA, - N_ANY_HBA_STRATEGIES - } any_hba_strategy_order_t; - - dlist_t *usable_hbas = NULL; - dlist_t *similar_hba_groups = NULL; - dlist_t *iter = NULL; - dlist_t *subs = NULL; - - boolean_t usehsp = B_FALSE; - - int error = 0; - - (error = get_usable_hbas(&usable_hbas)); - if (error != 0) { - return (error); - } - - print_layout_submirrors_msg(devconfig_type_to_str(TYPE_CONCAT), - nbytes, nsubs); - - if (dlist_length(usable_hbas) == 0) { - print_no_hbas_msg(); - volume_set_error(gettext("There are no usable HBAs.")); - return (-1); - } - - similar_hba_groups = NULL; - ((error = group_similar_hbas(usable_hbas, &similar_hba_groups)) != 0) || - (error = get_volume_faultrecov(request, &usehsp)); - if (error != 0) { - return (error); - } - - for (iter = similar_hba_groups; - (error == 0) && (subs == NULL) && (iter != NULL); - iter = iter->next) { - - dlist_t *hbas = (dlist_t *)iter->obj; - - similar_hba_strategy_order_t order; - - for (order = ALL_CONCATS_ON_MPXIO; - (order < N_SIMILAR_HBA_STRATEGIES) && - (subs == NULL) && (error == 0); - order++) { - - dlist_t *selhbas = NULL; - dlist_t *disks = NULL; - int n = 0; - - switch (order) { - - case ALL_CONCATS_ON_MPXIO: - - if (is_mpxio_enabled() == B_TRUE) { - dlist_t *mpxio_hbas = NULL; - - /* see if any HBA supports MPXIO */ - error = select_mpxio_hbas(hbas, &mpxio_hbas); - if ((error == 0) && (mpxio_hbas != NULL)) { - -/* BEGIN CSTYLED */ -oprintf(OUTPUT_TERSE, - gettext(" -->Strategy 1: use at least %d MPXIO disks\n"), - nsubs); -/* END CSTYLED */ - - /* see if MPXIO HBA has enough disks */ - error = select_hbas_with_n_disks( - request, hbas, nsubs, &selhbas, &disks); - - if ((error == 0) && - ((n = dlist_length(selhbas)) > 0)) { - error = compose_concats_within_hba( - request, cursubs, mpxio_hbas, nbytes, - nsubs, &subs); - } else { - print_insufficient_hbas_msg(n); - } - } - - dlist_free_items(mpxio_hbas, NULL); - } - - break; - - case CONCAT_PER_SIMILAR_HBA: - - error = select_hbas_with_n_disks( - request, hbas, 1, &selhbas, &disks); - - if (error == 0) { - -/* BEGIN CSTYLED */ -oprintf(OUTPUT_TERSE, - gettext(" -->Strategy 2: use any disks from %d similar HBAs - concat per HBA\n"), - nsubs); -/* END CSTYLED */ - - if ((n = dlist_length(selhbas)) >= nsubs) { - error = compose_concat_per_hba( - request, cursubs, selhbas, - nbytes, nsubs, &subs); - } else { - print_insufficient_hbas_msg(n); - } - } - - break; - - case CONCAT_ACROSS_SIMILAR_HBAS: - - error = select_hbas_with_n_disks( - request, hbas, 1, &selhbas, &disks); - - if (error == 0) { - -/* BEGIN CSTYLED */ -oprintf(OUTPUT_TERSE, - gettext(" -->Strategy 3: use any disks from any similar HBAs - " - "%d concats across HBAs\n"), - nsubs); -/* END CSTYLED */ - error = compose_concats_across_hbas( - request, cursubs, selhbas, disks, - nbytes, nsubs, &subs); - } - - break; - - default: - break; - } - - dlist_free_items(selhbas, NULL); - dlist_free_items(disks, NULL); - } - } - - for (iter = similar_hba_groups; iter != NULL; iter = iter->next) { - dlist_free_items((dlist_t *)iter->obj, NULL); - } - dlist_free_items(similar_hba_groups, NULL); - - /* retry using all available HBAs */ - if (subs == NULL) { - - any_hba_strategy_order_t order; - - for (order = CONCAT_PER_ANY_HBA; - (order < N_ANY_HBA_STRATEGIES) && - (subs == NULL) && (error == 0); - order++) { - - dlist_t *selhbas = NULL; - dlist_t *disks = NULL; - int n = 0; - - switch (order) { - - case CONCAT_PER_ANY_HBA: - - error = select_hbas_with_n_disks( - request, usable_hbas, 1, &selhbas, &disks); - - if (error == 0) { - -/* BEGIN CSTYLED */ -oprintf(OUTPUT_TERSE, - gettext(" -->Strategy 4: use any disks from %d HBAs - concat per HBA\n"), - nsubs); -/* END CSTYLED */ - if ((n = dlist_length(selhbas)) >= nsubs) { - error = compose_concat_per_hba( - request, cursubs, selhbas, - nbytes, nsubs, &subs); - } else { - print_insufficient_hbas_msg(n); - } - } - break; - - case CONCAT_ACROSS_ANY_HBAS: - - error = select_hbas_with_n_disks( - request, usable_hbas, 1, &selhbas, &disks); - - if (error == 0) { - -/* BEGIN CSTYLED */ -oprintf(OUTPUT_TERSE, - gettext(" -->Strategy 5: use any disks from any HBA - %d concats across HBAs\n"), - nsubs); -/* END CSTYLED */ - error = compose_concats_across_hbas( - request, cursubs, selhbas, disks, - nbytes, nsubs, &subs); - } - - break; - - case CONCAT_WITHIN_ANY_HBA: - - error = select_hbas_with_n_disks( - request, usable_hbas, 1, &selhbas, &disks); - - if (error == 0) { - -/* BEGIN CSTYLED */ -oprintf(OUTPUT_TERSE, -gettext(" -->Strategy 6: use any disks from any single HBA - %d concats within an HBA\n"), - nsubs); -/* END CSTYLED */ - - if ((n = dlist_length(selhbas)) > 0) { - error = compose_concats_within_hba( - request, cursubs, selhbas, - nbytes, nsubs, &subs); - } else { - print_insufficient_hbas_msg(n); - } - - } - break; - - default: - break; - } - - dlist_free_items(selhbas, NULL); - dlist_free_items(disks, NULL); - } - } - - if (error == 0) { - *results = dlist_append(subs, *results, AT_TAIL); - } - - return (error); -} - -/* - * FUNCTION: compose_stripe_per_hba(devconfig_t *request, - * dlist_t *cursubs, dlist_t *hbas, uint64_t nbytes, - * uint16_t nsubs, int maxcomp, int mincomp, - * dlist_t **results) - * - * INPUT: request - pointer to a devconfig_t of the current request - * cursubs - pointer to a list of already composed submirrors - * hbas - pointer to a list of available HBAs - * nbytes - the desired capacity for the stripes - * nsubs - the desired number of stripes - * maxcomp - the maximum number of stripe components - * mincomp - the minimum number of stripe components - * - * OUPUT: results - pointer to a list of composed volumes - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Layout function which composes the requested number of stripes - * of the desired size using available disks on any of the HBAs - * from the input list. - * - * The number of components within the composed stripes will be - * in the range of mincomp to ncomp, preferring more components - * over fewer. All stripes composed by a single call to this - * function will have the same number of components. - * - * Each stripe will use disks from a single HBA. - * - * All input HBAs are expected to have at least mincomp available - * disks. - * - * If the stripes can be composed, they are appended to the list - * of result volumes. - * - * while (more HBAs and more stripes to compose) { - * select next HBA - * get available space for this HBA - * get available disks for this HBA - * if (not enough space or disks) { - * continue - * } - * - * use # disks as # of stripe components - limit to maxcomp - * for ((ncomps downto mincomp) && (more stripes to compose)) { - * while (more stripes to compose) { - * if a stripe can be composed using disks { - * save stripe - * increment stripe count - * } - * while (more HBAs and more stripes to compose) { - * select next HBA - * get available space for this HBA - * get available disks for this HBA - * if (not enough space or disks) { - * continue - * } - * if a stripe can be composed using disks { - * save stripe - * increment stripe count - * } - * } - * - * if (not all stripes composed) { - * delete any compose stripes - * } - * } - * } - * - * if (not all stripes composed) { - * delete any stripes composed - * } - * } - * - * if (not all stripes composed) { - * delete any stripes composed - * } - * - * append composed stripes to results - */ -static int -compose_stripe_per_hba( - devconfig_t *request, - dlist_t *cursubs, - dlist_t *hbas, - uint64_t nbytes, - uint16_t nsubs, - uint16_t maxcomp, - uint16_t mincomp, - dlist_t **results) -{ - int error = 0; - dlist_t *list = NULL; - dlist_t *iter = NULL; - - oprintf(OUTPUT_VERBOSE, - gettext(" --->Trying to compose %d Stripes with " - "%d-%d components on separate HBAs.\n"), - nsubs, mincomp, maxcomp); - - for (iter = hbas; - (list == NULL) && (iter != NULL) && (error == 0); - iter = iter->next) { - - dm_descriptor_t hba = (uintptr_t)iter->obj; - dlist_t *disks = NULL; - uint64_t space = 0; - int ncomp = 0; - char *name; - - ((error = get_display_name(hba, &name)) != 0) || - (error = hba_get_avail_disks_and_space(request, - hba, &disks, &space)); - if (error != 0) { - continue; - } - - /* check for sufficient space and minimum # of disks */ - if (space < nbytes) { - (void) print_hba_insufficient_space_msg(name, space); - dlist_free_items(disks, NULL); - continue; - } - - if ((ncomp = dlist_length(disks)) < mincomp) { - print_insufficient_disks_msg(ncomp); - dlist_free_items(disks, NULL); - continue; - } - - /* make the stripe as wide as possible, up to maxcomp */ - for (ncomp = ((ncomp > maxcomp) ? maxcomp : ncomp); - (list == NULL) && (ncomp >= mincomp) && (error == 0); - ncomp--) { - - int count = 0; - - /* try composing nsubs stripes with ncomp components */ - while (count < nsubs) { - - devconfig_t *stripe = NULL; - dlist_t *item = NULL; - dlist_t *iter1 = NULL; - - /* build first stripe using disks on this HBA */ - if (((error = populate_stripe(request, nbytes, - disks, ncomp, cursubs, &stripe)) != 0) || - (stripe == NULL)) { - /* first stripe failed at the current width */ - /* break while loop and try a different width */ - break; - } - - /* composed a stripe */ - if ((item = dlist_new_item((void*)stripe)) == NULL) { - error = ENOMEM; - break; - } - ++count; - list = dlist_append(item, list, AT_TAIL); - - /* compose stripes on remaining HBAs */ - for (iter1 = iter->next; - (count < nsubs) && (iter1 != NULL) && (error == 0); - iter1 = iter1->next) { - - dm_descriptor_t hba1 = (uintptr_t)iter1->obj; - uint64_t space1 = 0; - dlist_t *disks1 = NULL; - - error = hba_get_avail_disks_and_space(request, - hba1, &disks1, &space1); - if (error != 0) { - continue; - } - - /* enough space/disks on this HBA? */ - if ((dlist_length(disks1) < ncomp) || - (space1 < nbytes)) { - dlist_free_items(disks1, NULL); - continue; - } - - stripe = NULL; - error = populate_stripe( - request, nbytes, disks1, - ncomp, cursubs, &stripe); - - if (stripe != NULL) { - /* prepare to compose another */ - if ((item = dlist_new_item( - (void *)stripe)) == NULL) { - error = ENOMEM; - break; - } - list = dlist_append(item, list, AT_TAIL); - ++count; - } - - dlist_free_items(disks1, NULL); - disks1 = NULL; - } - - if ((iter1 == NULL) && (count < nsubs)) { - /* - * no HBAs remain and haven't composed - * enough stripes at the current width. - * break while loop and try another width. - */ - break; - } - } - - if (count < nsubs) { - /* - * stripe composition at current width failed... - * prepare to try a narrower width. - * NB: narrower widths may work since some HBA(s) - * may have fewer available disks - */ - print_layout_submirrors_failed_msg( - devconfig_type_to_str(TYPE_STRIPE), - count, nsubs); - - dlist_free_items(list, free_devconfig_object); - list = NULL; - } - } - - dlist_free_items(disks, NULL); - disks = NULL; - } - - if (error == 0) { - *results = dlist_append(list, *results, AT_TAIL); - } else { - dlist_free_items(list, free_devconfig_object); - } - - return (error); -} - -/* - * FUNCTION: compose_stripes_across_hbas(devconfig_t *request, - * dlist_t *cursubs, dlist_t *hbas, dlist_t *disks, - * uint64_t nbytes, uint16_t nsubs, int maxcomp, - * int mincomp, dlist_t **results) - * - * INPUT: request - pointer to a devconfig_t of the current request - * cursubs - pointer to a list of already composed submirrors - * hbas - pointer to a list of available HBAs - * disks - pointer to a list of available disks on the HBAs - * nbytes - the desired capacity for the stripes - * nsubs - the desired number of stripes - * ncomp - the maximum number of stripe components - * mincomp - the minimum number of stripe components - * - * OUPUT: results - pointer to a list of composed volumes - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Layout function which composes the requested number of stripes - * of the desired size using available disks on any of the HBAs - * from the input list. - * - * The number of components within the composed stripes will be - * in the range of mincomp to ncomp, preferring more components - * over fewer. All stripes composed by a single call to this - * function will have the same number of components. - * - * Each stripe will use a disk from several different HBAs. - * - * All input HBAs are expected to have at least nsubs available - * disks. - * - * If the stripes can be composed, they are appended to the list - * of result volumes. - * - * for (ncomps downto mincomp) { - * - * copy the input disk list - * while (more stripes to compose) { - * if a stripe can be composed using disks { - * save stripe - * remove used disks from disk list - * increment stripe count - * } else - * end while loop - * } - * - * free copied disk list - * if (not all stripes composed) { - * delete any stripes composed - * decrement ncomps - * } - * } - * - * if (not all stripes composed) { - * delete any stripes composed - * } - * - * append composed stripes to results - */ -static int -compose_stripes_across_hbas( - devconfig_t *request, - dlist_t *cursubs, - dlist_t *hbas, - dlist_t *disks, - uint64_t nbytes, - uint16_t nsubs, - uint16_t ncomp, - uint16_t mincomp, - dlist_t **results) -{ - int error = 0; - int count = 0; - - dlist_t *list = NULL; - - while ((ncomp >= mincomp) && (count < nsubs) && (error == 0)) { - - dlist_t *iter; - dlist_t *item; - dlist_t *disks_copy = NULL; - - oprintf(OUTPUT_VERBOSE, - gettext(" --->Trying to compose %d Stripes with " - "%d components across %d HBAs.\n"), - nsubs, ncomp, dlist_length(hbas)); - - /* copy disk list, it is modified by the while loop */ - for (iter = disks; iter != NULL; iter = iter->next) { - if ((item = dlist_new_item(iter->obj)) == NULL) { - error = ENOMEM; - } else { - disks_copy = dlist_append(item, disks_copy, AT_HEAD); - } - } - - /* compose nsubs stripe submirrors of ncomp components */ - while ((count < nsubs) && (error == 0)) { - - devconfig_t *stripe = NULL; - dlist_t *item = NULL; - - error = populate_stripe( - request, nbytes, disks_copy, ncomp, cursubs, &stripe); - - if ((error == 0) && (stripe != NULL)) { - if ((item = dlist_new_item((void *)stripe)) == NULL) { - error = ENOMEM; - } else { - ++count; - list = dlist_append(item, list, AT_TAIL); - error = remove_used_disks(&disks_copy, stripe); - } - } else if (stripe == NULL) { - break; - } - } - - /* free copy of disk list */ - dlist_free_items(disks_copy, NULL); - disks_copy = NULL; - - if ((error == 0) && (count < nsubs)) { - /* failed to compose enough stripes at this width, */ - /* prepare to try again with the next narrower width. */ - print_layout_submirrors_failed_msg( - devconfig_type_to_str(TYPE_STRIPE), - count, nsubs); - - dlist_free_items(list, free_devconfig_object); - list = NULL; - count = 0; - --ncomp; - } - } - - if (count < nsubs) { - dlist_free_items(list, free_devconfig_object); - list = NULL; - } else { - *results = dlist_append(list, *results, AT_TAIL); - } - - return (error); -} - -/* - * FUNCTION: compose_stripes_within_hba(devconfig_t *request, - * dlist_t *cursubs, dlist_t *hbas, uint64_t nbytes, - * uint16_t nsubs, int maxcomp, int mincomp, - * dlist_t **results) - * - * INPUT: request - pointer to a devconfig_t of the current request - * cursubs - pointer to a list of already composed submirrors - * hbas - pointer to a list of available HBAs - * nbytes - the desired capacity for the stripes - * nsubs - the desired number of stripes - * maxcomp - the maximum number of stripe components - * mincomp - the minimum number of stripe components - * nsubs - the number of stripes to be composed - * - * OUPUT: results - pointer to a list of composed volumes - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Layout function which composes the requested number of stripes - * of the desired size using available disks within any single - * HBA from the input list. - * - * The number of components within the composed stripes will be - * in the range of mincomp to maxcomp, preferring more components - * over fewer. All stripes composed by a single call to this - * function will have the same number of components. - * - * All stripes will use disks from a single HBA. - * - * All input HBAs are expected to have at least nsubs * mincomp - * available disks and total space sufficient for subs stripes. - * - * If the stripes can be composed, they are appended to the list - * of result volumes. - * - * while (more HBAs and more stripes need to be composed) { - * select next HBA - * if (not enough available space on this HBA) { - * continue; - * } - * get available disks for HBA - * use # disks as # of stripe components - limit to maxcomp - * for (ncomps downto mincomp) { - * if ((ncomps * nsubs) > ndisks) { - * continue; - * } - * while (more stripes need to be composed) { - * if a stripe can be composed using disks { - * save stripe - * remove used disks from disk list - * } else - * end while loop - * } - * if (not all stripes composed) { - * delete any stripes composed - * } - * } - * } - * - * if (not all stripes composed) { - * delete any stripes composed - * } - * - * append composed stripes to results - */ -static int -compose_stripes_within_hba( - devconfig_t *request, - dlist_t *cursubs, - dlist_t *hbas, - uint64_t nbytes, - uint16_t nsubs, - uint16_t maxcomp, - uint16_t mincomp, - dlist_t **results) -{ - int error = 0; - int count = 0; - - dlist_t *list = NULL; - dlist_t *iter = NULL; - - for (iter = hbas; - (count < nsubs) && (iter != NULL) && (error == 0); - iter = iter->next) { - - dm_descriptor_t hba = (uintptr_t)iter->obj; - uint64_t space = 0; - dlist_t *disks = NULL; - int ndisks = 0; - int ncomp = 0; - char *name = NULL; - - ((error = get_display_name(hba, &name)) != 0) || - (error = hba_get_avail_disks_and_space(request, - hba, &disks, &space)); - if (error != 0) { - dlist_free_items(disks, NULL); - continue; - } - - if (space < (nsubs * nbytes)) { - (void) print_hba_insufficient_space_msg(name, space); - dlist_free_items(disks, NULL); - continue; - } - - ndisks = dlist_length(disks); - - /* - * try composing stripes from ncomp down to mincomp. - * stop when nsubs stripes have been composed, or when the - * minimum stripe width has been tried - */ - for (ncomp = maxcomp; - (ncomp >= mincomp) && (count != nsubs) && (error == 0); - ncomp--) { - - oprintf(OUTPUT_VERBOSE, - gettext(" --->Trying to compose %d Stripes with " - "%d components on a single HBA.\n"), - nsubs, ncomp); - - if (ndisks < (ncomp * nsubs)) { - print_insufficient_disks_msg(ndisks); - continue; - } - - /* try composing nsubs stripes, each ncomp wide */ - for (count = 0; (count < nsubs) && (error == 0); count++) { - - devconfig_t *stripe = NULL; - - error = populate_stripe( - request, nbytes, disks, ncomp, cursubs, &stripe); - - if ((error == 0) && (stripe != NULL)) { - - dlist_t *item = dlist_new_item((void *)stripe); - if (item == NULL) { - error = ENOMEM; - } else { - list = dlist_append(item, list, AT_TAIL); - error = remove_used_disks(&disks, stripe); - } - } else if (stripe == NULL) { - break; - } - } - - if (count < nsubs) { - /* failed to compose enough stripes at this width, */ - /* prepare to try again with fewer components */ - print_layout_submirrors_failed_msg( - devconfig_type_to_str(TYPE_STRIPE), - count, nsubs); - - dlist_free_items(list, free_devconfig_object); - list = NULL; - } - } - - dlist_free_items(disks, NULL); - } - - if (count < nsubs) { - dlist_free_items(list, free_devconfig_object); - list = NULL; - } - - *results = list; - - return (error); -} - -/* - * FUNCTION: compose_concats_per_hba(devconfig_t *request, - * dlist_t *cursubs, dlist_t *hbas, uint64_t nbytes, - * uint16_t nsubs, dlist_t **results) - * - * INPUT: request - pointer to a devconfig_t of the current request - * cursubs - pointer to a list of already composed submirrors - * hbas - pointer to a list of available HBAs - * nbytes - the desired capacity for the concats - * nsubs - the number of concats to be composed - * - * OUPUT: results - pointer to a list of composed volumes - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Layout function which composes the requested number of concats - * of the desired size using available disks within HBAs from the - * input list. Each concat will be composed using disks from a - * single HBA. - * - * If the concats can be composed, they are appended to the list - * of result volumes. - * - * while (more HBAs AND more concats need to be composed) { - * if (not enough available space on this HBA) { - * continue; - * } - * - * get available disks for HBA - * if (concat can be composed) { - * save concat - * increment count - * } - * } - * - * if (not all stripes composed) { - * delete any concats composed - * } - * - * append composed concats to results - */ -static int -compose_concat_per_hba( - devconfig_t *request, - dlist_t *cursubs, - dlist_t *hbas, - uint64_t nbytes, - uint16_t nsubs, - dlist_t **results) -{ - int error = 0; - int count = 0; - - dlist_t *list = NULL; - dlist_t *iter = NULL; - - oprintf(OUTPUT_VERBOSE, - gettext(" --->Trying to compose %d Concats on " - "separate HBAs.\n"), nsubs); - - for (iter = hbas; - (iter != NULL) && (error == 0) && (count < nsubs); - iter = iter->next) { - - dm_descriptor_t hba = (uintptr_t)iter->obj; - uint64_t space = 0; - devconfig_t *concat = NULL; - dlist_t *disks = NULL; - - error = hba_get_avail_disks_and_space(request, hba, &disks, &space); - if ((error == 0) && (space >= nbytes)) { - error = populate_concat( - request, nbytes, disks, cursubs, &concat); - } - - if ((error == 0) && (concat != NULL)) { - dlist_t *item = dlist_new_item((void *)concat); - if (item == NULL) { - error = ENOMEM; - } else { - ++count; - list = dlist_append(item, list, AT_TAIL); - } - } - - dlist_free_items(disks, NULL); - } - - if (count != nsubs) { - print_layout_submirrors_failed_msg( - devconfig_type_to_str(TYPE_CONCAT), - count, nsubs); - - dlist_free_items(list, free_devconfig_object); - list = NULL; - } else { - *results = dlist_append(list, *results, AT_TAIL); - } - - return (error); -} - -/* - * FUNCTION: compose_concats_across_hbas(devconfig_t *request, - * dlist_t *cursubs, dlist_t *hbas, dlist_t *disks, - * uint64_t nbytes, uint16_t nsubs, dlist_t **results) - * - * INPUT: request - pointer to a devconfig_t of the current request - * cursubs - pointer to a list of already composed submirrors - * hbas - pointer to a list of available HBAs - * disks - pointer to a list of available disks on the HBAs - * nbytes - the desired capacity for the concats - * nsubs - the number of concats to be composed - * - * OUPUT: results - pointer to a list of composed volumes - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Layout function which composes the requested number of concats - * of the desired size using any available disks from the input - * list of available HBAs. - * - * If the concats can be composed, they are appended to the list - * of result volumes. - * - * copy the input disk list - * while (more concats need to be composed) { - * if (a concat can be composed using remaining disks) { - * save concat - * remove used disks from disk list - * increment count - * } else { - * end while loop - * } - * } - * - * if (not all concats composed) { - * delete any concats composed - * } - * - * append composed concats to results - */ -static int -compose_concats_across_hbas( - devconfig_t *request, - dlist_t *cursubs, - dlist_t *hbas, - dlist_t *disks, - uint64_t nbytes, - uint16_t nsubs, - dlist_t **results) -{ - int error = 0; - int count = 0; - - dlist_t *list = NULL; - dlist_t *item = NULL; - dlist_t *iter = NULL; - dlist_t *disks_copy = NULL; - - /* copy disk list, it is modified by the while loop */ - for (iter = disks; iter != NULL; iter = iter->next) { - if ((item = dlist_new_item(iter->obj)) == NULL) { - error = ENOMEM; - } else { - disks_copy = dlist_append(item, disks_copy, AT_HEAD); - } - } - - while ((count < nsubs) && (error == 0)) { - - devconfig_t *concat = NULL; - - error = populate_concat( - request, nbytes, disks_copy, cursubs, &concat); - - if ((error == 0) && (concat != NULL)) { - - item = dlist_new_item((void *)concat); - if (item == NULL) { - error = ENOMEM; - } else { - count++; - list = dlist_append(item, list, AT_TAIL); - error = remove_used_disks(&disks_copy, concat); - } - } else if (concat == NULL) { - break; - } - } - - /* free copy of disk list */ - dlist_free_items(disks_copy, NULL); - disks_copy = NULL; - - if (count != nsubs) { - print_layout_submirrors_failed_msg( - devconfig_type_to_str(TYPE_CONCAT), - count, nsubs); - - dlist_free_items(list, free_devconfig_object); - list = NULL; - } else { - *results = dlist_append(list, *results, AT_TAIL); - } - - return (error); -} - -/* - * FUNCTION: compose_concats_within_hba(devconfig_t *request, - * dlist_t *cursubs, dlist_t *hbas, uint64_t nbytes, - * uint16_t nsubs, dlist_t **results) - * - * INPUT: request - pointer to a devconfig_t of the current request - * cursubs - pointer to a list of already composed submirrors - * hbas - pointer to a list of available HBAs - * nbytes - the desired capacity for the concats - * nsubs - the number of concats to be composed - * - * OUPUT: results - pointer to a list of composed volumes - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Layout function which composes the requested number of concats - * of the desired size using available disks within any single - * HBA from the input list. - * - * - * HBAs in the list are expected to have at least 2 available - * disks and total space sufficient for the submirrors. - * - * If the concats can be composed, they are appended to the list - * of result volumes. - * - * while (more HBAs) { - * if (not enough available space on this HBA) { - * continue; - * } - * - * get available disks for HBA - * while (more concats need to be composed) { - * if a concat can be composed using disks { - * save concat - * remove used disks from disk list - * increment count - * } else { - * delete any concats composed - * end while loop - * } - * } - * } - * - * if (not all concats composed) { - * delete any concats composed - * } - * - * append composed concats to results - */ -static int -compose_concats_within_hba( - devconfig_t *request, - dlist_t *cursubs, - dlist_t *hbas, - uint64_t nbytes, - uint16_t nsubs, - dlist_t **results) -{ - int error = 0; - - dlist_t *iter = NULL; - dlist_t *list = NULL; - int count = 0; - - oprintf(OUTPUT_VERBOSE, - gettext(" --->Trying to compose %d Concats within " - "a single HBA.\n"), nsubs); - - for (iter = hbas; - (count < nsubs) && (error == 0) && (iter != NULL); - iter = iter->next) { - - dm_descriptor_t hba = (uintptr_t)iter->obj; - dlist_t *disks = NULL; - uint64_t space = 0; - - error = hba_get_avail_disks_and_space(request, hba, &disks, &space); - if ((error == 0) && (space >= (nsubs * nbytes))) { - - /* try composing nsubs concats all on this HBA */ - count = 0; - while ((count < nsubs) && (error == 0)) { - devconfig_t *concat = NULL; - dlist_t *item = NULL; - - error = populate_concat( - request, nbytes, disks, cursubs, &concat); - - if ((error == 0) && (concat != NULL)) { - item = dlist_new_item((void*)concat); - if (item == NULL) { - error = ENOMEM; - } else { - count++; - list = dlist_append(item, list, AT_TAIL); - error = remove_used_disks(&disks, concat); - } - } else if (concat == NULL) { - dlist_free_items(list, free_devconfig_object); - list = NULL; - break; - } - } - } - - dlist_free_items(disks, NULL); - } - - if (count < nsubs) { - print_layout_submirrors_failed_msg( - devconfig_type_to_str(TYPE_CONCAT), - count, nsubs); - - dlist_free_items(list, free_devconfig_object); - list = NULL; - } else { - *results = dlist_append(list, *results, AT_TAIL); - } - - return (error); -} - -/* - * FUNCTION: remove_used_disks(dlist_t **disks, devconfig_t *volume) - * - * INPUT: disks - pointer to a list of disks - * volume - pointer to a devconfig_t volume - * - * OUPUT: disks - pointer to new list of disks - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Helper which updates the input list of disks by removing - * those which have slices used by the input volume. - * - * Constructs a new list containing only disks not used by - * the volume. - * - * The original list is freed. - */ -static int -remove_used_disks( - dlist_t **disks, - devconfig_t *volume) -{ - dlist_t *list = NULL; - dlist_t *iter = NULL; - dlist_t *item = NULL; - int error = 0; - - for (iter = *disks; (iter != NULL) && (error == 0); iter = iter->next) { - - dm_descriptor_t diskp = (uintptr_t)iter->obj; - boolean_t shares = B_FALSE; - - error = volume_shares_disk(diskp, volume, &shares); - if ((error == 0) && (shares != B_TRUE)) { - /* disk is unused */ - if ((item = dlist_new_item((void*)(uintptr_t)diskp)) == NULL) { - error = ENOMEM; - } else { - list = dlist_append(item, list, AT_TAIL); - } - } - } - - if (error != 0) { - dlist_free_items(list, NULL); - } else { - - /* free original disk list, return new list */ - dlist_free_items(*disks, NULL); - - *disks = list; - } - - return (error); -} - -/* - * FUNCTION: volume_shares_disk(dm_descriptor_t disk, - * devconfig_t *volume, boolean_t *shares) - * - * INPUT: disk - a dm_descriptor_t handle for the disk of interest - * volume - a devconfig_t pointer to a volume - * bool - a boolean_t pointer to hold the result - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Determines if the input disk has a slice that is used - * as a component by the input volume. - * - * If the disk contributes a slice component, bool is set - * to B_TRUE, B_FALSE otherwise. - */ -static int -volume_shares_disk( - dm_descriptor_t disk, - devconfig_t *volume, - boolean_t *shares) -{ - dlist_t *iter = NULL; - int error = 0; - - *shares = B_FALSE; - - /* look at all slices in the volume */ - for (iter = devconfig_get_components(volume); - (iter != NULL) && (*shares == B_FALSE) && (error == 0); - iter = iter->next) { - - devconfig_t *dev = (devconfig_t *)iter->obj; - - if (devconfig_isA(dev, TYPE_SLICE)) { - - /* get disk for volume's slice */ - dm_descriptor_t odisk = NULL; - char *oname = NULL; - - ((error = devconfig_get_name(dev, &oname)) != 0) || - (error = get_disk_for_named_slice(oname, &odisk)); - - if (error == 0) { - if (compare_descriptor_names( - (void*)(uintptr_t)disk, (void*)(uintptr_t)odisk) == 0) { - /* otherslice is on same disk, stop */ - *shares = B_TRUE; - } - } - } - } - - return (error); -} - -/* - * FUNCTION: select_mpxio_hbas(dlist_t *hbas, dlist_t **mpxio_hbas) - * - * INPUT: hbas - pointer to a list of dm_descriptor_t HBA handles - * - * OUTPUT: mpxio_hbas - pointer to a new list of containing HBAs that - * are multiplex enabled. - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Iterates the input list of HBAs and builds a new list - * containing those that are multiplex enabled. - * - * The output list should be passed to dlist_free_items() - * when no longer needed. - */ -static int -select_mpxio_hbas( - dlist_t *hbas, - dlist_t **mpxio_hbas) -{ - dlist_t *iter; - int error = 0; - - for (iter = hbas; (iter != NULL) && (error == 0); iter = iter->next) { - dm_descriptor_t hba = (uintptr_t)iter->obj; - boolean_t ismpxio = B_FALSE; - if ((error = hba_is_multiplex(hba, &ismpxio)) == 0) { - if (ismpxio == B_TRUE) { - dlist_t *item = dlist_new_item((void *)(uintptr_t)hba); - if (item != NULL) { - *mpxio_hbas = - dlist_append(item, *mpxio_hbas, AT_TAIL); - } else { - error = ENOMEM; - } - } - } - } - - if (error != 0) { - dlist_free_items(*mpxio_hbas, NULL); - *mpxio_hbas = NULL; - } - - return (error); -} - -/* - * FUNCTION: set_explicit_submirror_names(dlist_t *reqs, dlist_t *subs) - * - * INPUT: reqs - pointer to a list of request devconfig_ts - * subs - pointer to a list of volume devconfig_ts - * - * SIDEEFFECT: Modifies the volume names. - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Iterates the lists of volumes and requests and calls - * set_explicit_mirror_name for each pair. - */ -static int -set_explicit_submirror_names( - dlist_t *reqs, - dlist_t *subs) -{ - int error = 0; - - while ((reqs != NULL) && (subs != NULL) && (error == 0)) { - - error = set_explicit_submirror_name( - (devconfig_t *)reqs->obj, - (devconfig_t *)subs->obj); - - reqs = reqs->next; - subs = subs->next; - } - - return (error); -} - -/* - * FUNCTION: set_explicit_submirror_name(dlist_t *req, dlist_t *sub) - * - * INPUT: req - pointer to a request devconfig_t - * sub - pointer to a volume devconfig_t - * - * SIDEEFFECT: Modifies the volume name. - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Clears the volume's current name and returns the name - * to the available pool. - * - * If a name is specified in the request, the name is used - * as the volume's name. - * - * (Unnamed submirrors will have default names assigned - * during final mirror assembly.) - */ -static int -set_explicit_submirror_name( - devconfig_t *req, - devconfig_t *sub) -{ - char *name = NULL; - int error = 0; - - /* unset current submirror name */ - (void) devconfig_get_name(sub, &name); - release_volume_name(name); - (void) devconfig_set_name(sub, ""); - - if (devconfig_get_name(req, &name) != ERR_ATTR_UNSET) { - (void) devconfig_set_name(sub, name); - } - - return (error); -} diff --git a/usr/src/cmd/lvm/metassist/layout/layout_mirror.h b/usr/src/cmd/lvm/metassist/layout/layout_mirror.h deleted file mode 100644 index 36c5b5a8bf51..000000000000 --- a/usr/src/cmd/lvm/metassist/layout/layout_mirror.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _LAYOUT_MIRROR_H -#define _LAYOUT_MIRROR_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#ifdef __cplusplus -extern "C" { -#endif - -#include "volume_devconfig.h" -#include "volume_dlist.h" - -extern int layout_mirror( - devconfig_t *request, - uint16_t nsubs, - uint64_t nbytes, - dlist_t **results); - -extern int populate_explicit_mirror( - devconfig_t *req, - dlist_t **results); - -#ifdef __cplusplus -} -#endif - -#endif /* _LAYOUT_MIRROR_H */ diff --git a/usr/src/cmd/lvm/metassist/layout/layout_request.c b/usr/src/cmd/lvm/metassist/layout/layout_request.c deleted file mode 100644 index 2ba31a2d3193..000000000000 --- a/usr/src/cmd/lvm/metassist/layout/layout_request.c +++ /dev/null @@ -1,3419 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include -#include -#include - -#include "volume_error.h" -#include "volume_defaults.h" -#include "volume_dlist.h" -#include "volume_output.h" -#include "volume_request.h" - -#include "layout_device_cache.h" -#include "layout_discovery.h" -#include "layout_dlist_util.h" -#include "layout_request.h" -#include "layout_slice.h" -#include "layout_validate.h" - -#define _LAYOUT_REQUEST_C - -static char *_request_diskset = NULL; -static devconfig_t *_toplevel_request = NULL; -static defaults_t *_defaults = NULL; - -/* - * This file contains code which handles various aspects of the - * request and defaults devconfig_t structs passed to the layout - * module. - * - * Functions are provided which determine what devices are available - * for use by the various volume layout mechanisms. These are based - * on the user specified available/unavailable devices included in - * a request or in the defaults associated with the destination diskset. - */ - -/* - * A struct to hold device "specifications" extracted from a user - * specified device name. This struct is used to compare the user's - * available and unavailable device specifications against physical - * devices attached to the system. - * - * The spec struct holds one of two different specifications: if the - * user supplied device name is parsable as a CTD name, it is parsed - * into the component ids. Otherwise, it is stored as is. - * - * The CTD name space implies a device hierarchy and metassist - * supports an implied wildcarding scheme for the CTD name space. - * A CTD specification from the user is of the form cX, cXdX, - * cXdXsX, cXtX, cXtXdX, or cXtXdXsX, so it may or may nor - * correspond to an individual physical device depending on - * the context. - * - * For example, "c1" can mean the controller/HBA with the - * name "c1" or it can mean all devices attached to the - * controller named "c1". - * - * The ctd specs make matching physical devices against a - * user specification easier since the matching is based on - * the numeric values extracted from the cXtXdXsX string - * and not on the strings themselves. The strings are - * troublesome because of situations like "c1" being - * compared to "c11t1d0s0" and getting false matches. - * - * The ID_UNSPECIFIED value is used to flag components - * that were not in the CTD name: - * - * "c3" -> { ctrl=3, target=ID_UNSPECIFIED, - * lun=ID_UNSPECIFIED, slice=ID_UNSPECIFIED } - * - * "c3t2" -> { ctrl=3, target=2, - * lun=ID_UNSPECIFIED, slice=ID_UNSPECIFIED } - */ - -#define ID_UNSPECIFIED -1 -typedef struct { - int ctrl; - int target; - int lun; - int slice; - boolean_t is_ide; -} ctd_spec_t; - -typedef enum { - SPEC_TYPE_CTD = 0, - SPEC_TYPE_RAW, - SPEC_TYPE_OTHER -} spec_type_t; - -typedef struct { - spec_type_t type; - union { - ctd_spec_t *ctd; - char *raw; - } data; -} device_spec_t; - -static int get_spec_for_name( - char *name, - device_spec_t **id); - -static int create_device_spec( - char *name, - device_spec_t **spec); - -static int create_device_ctd_spec( - char *name, - device_spec_t **spec); - -static int create_device_raw_spec( - char *name, - device_spec_t **spec); - -static void destroy_device_spec( - device_spec_t *spec); - -static boolean_t ctd_spec_includes_device( - device_spec_t *spec, - device_spec_t *device); - -static boolean_t raw_spec_includes_device( - device_spec_t *spec, - device_spec_t *device); - -/* - * get_spec_for_name builds up a cached mapping of device - * names to the corresponding device_spec_t structs. - * - * This saves repeatedly converting the device names, which - * could get expensive since devices are checked against the - * user specified available/unavailable devices a lot. - * - * The cache is implemented as a list of these structs: - */ -typedef struct { - - char *name; - device_spec_t *device_spec; - -} spec_cache_t; - -static dlist_t *_spec_cache = NULL; - -static int destroy_spec_cache(); -static int compare_name_to_spec_cache_name( - void *name, void *list_item); - -/* - * The user specified available/unavailable devices are - * accessed frequently during layout. To make this more - * efficient, the char *arrays of available/unavailable - * specifications for a request or defaults devconfig_t - * object are converted to device_spec_ts the first time - * they're accessed and then cached using this struct: - */ -typedef struct { - - devconfig_t *request; - - /* - * avail_specs_list is a list of device spec_t - * corresponding to available devices specified - * in the request object - */ - dlist_t *avail_specs_list; - - /* - * unavail_specs_list is a list of device spec_t - * corresponding to unavailable devices specified - * in the request object - */ - dlist_t *unavail_specs_list; - -} request_spec_list_t; - -dlist_t *_request_spec_list_cache = NULL; - -static int destroy_request_spec_list_cache(); -static void destroy_request_spec_list_entry(void *obj); - -static int compare_request_to_request_spec_list_request( - void *object, - void *list_item); - -static int convert_usernames_to_specs( - char **specs, - dlist_t **list); - -/* other private functions */ -static int is_device_avail( - dm_descriptor_t desc, - devconfig_t *request, - boolean_t *avail); - -static int is_named_device_avail( - devconfig_t *request, - char *device_name, - boolean_t check_aliases, - boolean_t *avail); - -static int avail_list_includes_device_name( - dlist_t *list, - char *device_name, - boolean_t check_aliases, - boolean_t *includes); - -static int unavail_list_includes_device_name( - dlist_t *list, - char *device_name, - boolean_t check_aliases, - boolean_t *includes); - -static int spec_includes_device_name( - device_spec_t *spec, - char *device_name, - boolean_t check_aliases, - boolean_t *includes); - -static boolean_t spec_includes_device( - device_spec_t *spec, - device_spec_t *device); - -static int disk_get_avail_space( - devconfig_t *request, - dm_descriptor_t disk, - uint64_t *avail); - -static int compare_hba_n_avail_disks( - void *obj1, - void *obj2); - -/* - * FUNCTION: release_request_caches() - * - * RETURNS: 0 - * - * PURPOSE: cleanup the module private caches. - */ -int -release_request_caches() -{ - (void) destroy_request_spec_list_cache(); - (void) destroy_spec_cache(); - - return (0); -} -/* - * FUNCTION: int set_request_diskset(char *) - * - * INPUT: char * - pointer to the diskset name - * OUTPUT: 0 - success - * !0 - validation failure - * RETURNS: - * - * PURPOSE: set the module global diskset name. - */ -int -set_request_diskset( - char *dsname) -{ - _request_diskset = dsname; - - if (dsname == NULL || dsname[0] == '\0') { - volume_set_error( - gettext("No disk set specified in request\n")); - return (-1); - } - - return (0); -} - -/* - * FUNCTION: char *get_request_diskset() - * - * INPUT: none - - * OUTPUT: none - - * RETURNS: char * - pointer to the currently set diskset name - * - * PURPOSE: get the global name of the current diskset. - */ -char * -get_request_diskset() -{ - return (_request_diskset); -} - -/* - * FUNCTION: void unset_request_diskset() - * - * PURPOSE: unset the module global diskset name. - */ -void -unset_request_diskset( - char *dsname) -{ - _request_diskset = NULL; -} - -/* - * FUNCTION: int set_toplevel_request(devconfig_t *) - * - * INPUT: devconfig_t * - pointer to the diskset request - * OUTPUT: 0 - success - * !0 - validation failure - * RETURNS: - * - * PURPOSE: set the module global toplevel request struct. - * this will be set within the only public entry - * point to the module -- get_layout() - * - * SIDEEFFECT: The devconfig_t's list of available and unavailable - * devices will be validated. - */ -int -set_toplevel_request( - devconfig_t *req) -{ - _toplevel_request = req; - - return (validate_request_avail_unavail(req)); -} - -/* - * FUNCTION: void unset_toplevel_request() - * - * PURPOSE: unset the layout module global toplevel request struct. - * - */ -void -unset_toplevel_request() -{ - _toplevel_request = NULL; -} - -/* - * FUNCTION: int set_defaults(devconfig_t *) - * - * INPUT: devconfig_t * - pointer to the global defaults devconfig_t - * OUTPUT: 0 - success - * !0 - validation failure - * RETURNS: - * - * PURPOSE: set the module global defaults struct. - * this will be set within the only public entry - * point to the module -- get_layout() - * - * SIDEEFFECT: The devconfig_t's list of available and unavailable - * devices will be validated. - */ -int -set_request_defaults( - defaults_t *defaults) -{ - int error = 0; - devconfig_t *diskset = NULL; - - _defaults = defaults; - - if ((error = defaults_get_diskset_by_name( - _defaults, get_request_diskset(), &diskset)) == 0) { - - error = validate_request_avail_unavail(diskset); - - } else if (error == ENOENT) { - /* no defaults to verify */ - error = 0; - } - - return (error); -} - -/* - * FUNCTION: void unset_request_defaults() - * - * PURPOSE: unset the layout module global defaults struct. - * - */ -void -unset_request_defaults() -{ - _defaults = NULL; -} - -/* - * FUNCTION: get_stripe_min_comp(devconfig_t *req, uint16_t *val) - * INPUT: req - a devconfig_t pointer to the current request - * val - pointer to a uint64_t to hold the result - * - * RETURNS: int - 0 - on success - * !0 - otherwise - * - * PURPOSE: helper which determines the minimum of components - * for striped volumes satisfying the input request. - * - * The value to use is taken from the input request, the - * toplevel diskset request, the diskset defaults or the - * global defaults. - */ -int -get_stripe_min_comp( - devconfig_t *req, - uint16_t *val) -{ - int error = 0; - - *val = 0; - - if ((error = devconfig_get_stripe_mincomp(req, val)) != 0) { - if (error != ERR_ATTR_UNSET) { - return (error); - } - } - - if (*val == 0) { - if ((error = defaults_get_stripe_mincomp( - _defaults, get_request_diskset(), val)) != 0) { - if (error != ERR_ATTR_UNSET) { - return (error); - } - } - } - - return (error); -} - -/* - * FUNCTION: get_stripe_max_comp(devconfig_t *req, uint16_t *val) - * INPUT: req - a devconfig_t pointer to the current request - * val - pointer to a uint64_t to hold the result - * - * RETURNS: int - 0 - on success - * !0 - otherwise - * - * PURPOSE: helper which determines the maximum number of components - * for striped volumes satisfying the input request. - * - * The value to use is taken from the input request, the - * toplevel diskset request, the diskset defaults or the - * global defaults. - */ -int -get_stripe_max_comp( - devconfig_t *req, - uint16_t *val) -{ - int error = 0; - - *val = 0; - - if ((error = devconfig_get_stripe_maxcomp(req, val)) != 0) { - if (error != ERR_ATTR_UNSET) { - return (error); - } - } - - if (*val == 0) { - if ((error = defaults_get_stripe_maxcomp( - _defaults, get_request_diskset(), val)) != 0) { - if (error != ERR_ATTR_UNSET) { - return (error); - } - } - } - - return (error); -} - -/* - * FUNCTION: get_stripe_interlace(devconfig_t *req, uint64_t *val) - * INPUT: req - a devconfig_t pointer to the current request - * val - pointer to a uint64_t to hold the result - * - * RETURNS: int - 0 - on success - * !0 - otherwise - * - * PURPOSE: helper which determines the interlace value for striped - * volumes satisfying the input request. - * - * The value to use is taken from the input request, the - * toplevel diskset request, the diskset defaults or the - * global defaults. - * - * If no value is explictly specified, ERR_ATTR_UNSET is - * returned. - */ -int -get_stripe_interlace( - devconfig_t *req, - uint64_t *val) -{ - int error = 0; - - *val = 0; - - if ((error = devconfig_get_stripe_interlace(req, val)) != 0) { - if (error != ERR_ATTR_UNSET) { - return (error); - } - error = 0; - } - - if (*val == 0) { - if ((error = defaults_get_stripe_interlace( - _defaults, get_request_diskset(), val)) != 0) { - if (error != ERR_ATTR_UNSET) { - return (error); - } - } - } - - return (error); -} - -/* - * FUNCTION: get_mirror_read_strategy(devconfig_t *req, - * mirror_read_strategy_t *val) - * INPUT: req - a devconfig_t pointer to the current request - * val - pointer to a mirror_read_strategy_t to hold the result - * - * RETURNS: int - 0 - on success - * !0 - otherwise - * - * PURPOSE: helper which determines the write strategy mirrored volumes - * should have for volumes satisfying the input request. - * - * The value to use is taken from the input request, the - * toplevel diskset request, the diskset defaults or the - * global defaults. - * - * If no value is explictly specified, ERR_ATTR_UNSET is - * returned. - */ -int -get_mirror_read_strategy( - devconfig_t *req, - mirror_read_strategy_t *val) -{ - int error = 0; - - *val = 0; - - if ((error = devconfig_get_mirror_read(req, val)) != 0) { - if (error != ERR_ATTR_UNSET) { - return (error); - } - } - - if (*val == 0) { - if ((error = defaults_get_mirror_read( - _defaults, get_request_diskset(), val)) != 0) { - if (error != ERR_ATTR_UNSET) { - return (error); - } - } - } - - return (error); -} - -/* - * FUNCTION: get_mirror_write_strategy(devconfig_t *req, - * mirror_write_strategy_t *val) - * INPUT: req - a devconfig_t pointer to the current request - * val - pointer to a mirror_write_strategy_t to hold result - * - * RETURNS: int - 0 - on success - * !0 - otherwise - * - * PURPOSE: helper which determines the write strategy mirrored volumes - * should have for volumes satisfying the input request. - * - * The value to use is taken from the input request, the - * toplevel diskset request, the diskset defaults or the - * global defaults. - * - * If no value is explictly specified, ERR_ATTR_UNSET is - * returned. - */ -int -get_mirror_write_strategy( - devconfig_t *req, - mirror_write_strategy_t *val) -{ - int error = 0; - - *val = 0; - - if ((error = devconfig_get_mirror_write(req, val)) != 0) { - if (error != ERR_ATTR_UNSET) { - return (error); - } - } - - if (*val == 0) { - if ((error = defaults_get_mirror_write( - _defaults, get_request_diskset(), val)) != 0) { - if (error != ERR_ATTR_UNSET) { - return (error); - } - } - } - - return (error); -} - -/* - * FUNCTION: get_mirror_pass(devconfig_t *req, uint16_t *val) - * INPUT: req - a devconfig_t pointer to the current request - * val - pointer to a uint16_t to hold the result - * - * RETURNS: int - 0 - on success - * !0 - otherwise - * - * PURPOSE: helper which determines the resync pass mirrored volumes - * should have for volumes satisfying the input request. - * - * The value to use is taken from the input request, the - * toplevel diskset request, the diskset defaults or the - * global defaults. - * - * If no value is explictly specified, ERR_ATTR_UNSET is - * returned. - */ -int -get_mirror_pass( - devconfig_t *req, - uint16_t *val) -{ - int error = 0; - - *val = 0; - - if ((error = devconfig_get_mirror_pass(req, val)) != 0) { - if (error != ERR_ATTR_UNSET) { - return (error); - } - } - - if (*val == 0) { - if ((error = defaults_get_mirror_pass( - _defaults, get_request_diskset(), val)) != 0) { - if (error != ERR_ATTR_UNSET) { - return (error); - } - } - } - - return (error); -} - -/* - * FUNCTION: get_mirror_nsubs(devconfig_t *req, uint16_t *val) - * INPUT: req - a devconfig_t pointer to the current request - * val - pointer to a uint16_t to hold the result - * - * RETURNS: int - 0 - on success - * !0 - otherwise - * - * PURPOSE: helper which determines how many submirrors mirrored - * volumes should have for volumes satisfying the input - * request. - * - * The value to use is taken from the input request, the - * toplevel diskset request, the diskset defaults or the - * global defaults. - */ -int -get_mirror_nsubs( - devconfig_t *req, - uint16_t *val) -{ - int error = 0; - - *val = 0; - - if ((error = devconfig_get_mirror_nsubs(req, val)) != 0) { - if (error != ERR_ATTR_UNSET) { - return (error); - } - } - - if (*val == 0) { - if ((error = defaults_get_mirror_nsubs( - _defaults, get_request_diskset(), val)) != 0) { - if (error != ERR_ATTR_UNSET) { - return (error); - } - } - } - - return (error); -} - -/* - * FUNCTION: get_volume_faultrecov(devconfig_t *req, boolean_t *val) - * INPUT: req - a devconfig_t pointer to the current request - * val - pointer to a boolean_t to hold the result - * - * RETURNS: int - 0 - on success - * !0 - otherwise - * - * PURPOSE: helper which determines whether data redundant volumes - * should also have fault recovery (e.g., HSPs) for volumes - * satisfying the input request. - * - * The value to use is taken from the input request, the - * toplevel diskset request, the diskset defaults or the - * global defaults. - */ -int -get_volume_faultrecov( - devconfig_t *req, - boolean_t *val) -{ - int error = 0; - - *val = B_FALSE; - - if ((error = devconfig_get_volume_usehsp(req, val)) != 0) { - if (error == ERR_ATTR_UNSET) { - component_type_t type = TYPE_UNKNOWN; - (void) devconfig_get_type(req, &type); - - switch (type) { - case TYPE_MIRROR: - error = defaults_get_mirror_usehsp( - _defaults, get_request_diskset(), val); - break; - - case TYPE_STRIPE: - error = defaults_get_stripe_usehsp( - _defaults, get_request_diskset(), val); - break; - - case TYPE_CONCAT: - error = defaults_get_concat_usehsp( - _defaults, get_request_diskset(), val); - break; - - case TYPE_VOLUME: - error = defaults_get_volume_usehsp( - _defaults, get_request_diskset(), val); - break; - } - } - } - - return (error); -} - -/* - * FUNCTION: get_volume_redundancy_level(devconfig_t *req, uint16_t val) - * INPUT: req - a devconfig_t pointer to the current request - * val - pointer to a uint16-t to hold the result - * - * RETURNS: int - 0 - on success - * !0 - otherwise - * - * PURPOSE: helper which determines the appropriate level of data - * redundancy a volume should have for volumes satisfying - * the input request. - * - * The value to use is taken from the input request, the - * toplevel diskset request, the diskset defaults or the - * global defaults. - */ -int -get_volume_redundancy_level( - devconfig_t *req, - uint16_t *val) -{ - int error = 0; - - *val = 0; - - if ((error = devconfig_get_volume_redundancy_level(req, val)) != 0) { - if (error != ERR_ATTR_UNSET) { - return (error); - } - } - - if (*val == 0) { - if ((error = defaults_get_volume_redundancy_level( - _defaults, get_request_diskset(), val)) != 0) { - if (error != ERR_ATTR_UNSET) { - return (error); - } - } - } - - return (error); -} - -/* - * FUNCTION: get_volume_npaths(devconfig_t *req, uint16_t val) - * INPUT: req - a devconfig_t pointer to the current request - * val - pointer to a uint16-t to hold the result - * - * RETURNS: int - 0 - on success - * !0 - otherwise - * - * PURPOSE: helper which determines the appropriate level of datapath - * redundancy a slice component should have for volumes - * satisfying the input request. - * - * The value to use is taken from the input request, the - * toplevel diskset request, the diskset defaults or the - * global defaults. - */ -int -get_volume_npaths( - devconfig_t *req, - uint16_t *val) -{ - int error = 0; - - *val = 0; - - if ((error = devconfig_get_volume_npaths(req, val)) != 0) { - if (error != ERR_ATTR_UNSET) { - return (error); - } - } - - if (*val == 0) { - if ((error = defaults_get_volume_npaths( - _defaults, get_request_diskset(), val)) != 0) { - if (error != ERR_ATTR_UNSET) { - return (error); - } - } - } - - return (error); -} - -/* - * FUNCTION: get_default_hsp_name(devconfig_t *req, char **hspname) - * INPUT: req - a devconfig_t pointer to the current request - * hspname - pointer to a char * to hold the result, if any - * - * RETURNS: int - 0 - on success - * !0 - otherwise - * - * PURPOSE: helper which determines the default HSP name for the - * input request. - * - * The value to use is taken from the input request, the - * toplevel diskset request, the diskset defaults or the - * global defaults. - */ -int -get_default_hsp_name( - devconfig_t *req, - char **name) -{ - int error = 0; - - *name = NULL; - - if ((error = defaults_get_hsp_name(_defaults, - get_request_diskset(), name)) != 0) { - if (error != ENOENT) { - return (error); - } - error = 0; - } - - return (error); -} - -/* - * FUNCTION: slice_is_available(char *sname, devconfig_t *request, - * boolean_t bool) - * INPUT: sname - a slice name - * request - pointer to a devconfig_t struct representing - * the current layout request being processed - * bool - pointer to a boolean to hold the result - * - * RETURNS: int - 0 - on success - * !0 - otherwise - * - * PURPOSE: Validation helper which determines if the named slice can - * be used as a volume component when satisfying the input - * request. - * - * Check if the slice appears in the known slice list, - * then check the request's available and unavailable - * device specifications. - */ -int -slice_is_available( - char *sname, - devconfig_t *request, - boolean_t *bool) -{ - dm_descriptor_t slice = (dm_descriptor_t)0; - int error = 0; - - *bool = B_FALSE; - - if ((error = slice_get_by_name(sname, &slice)) != 0) { - return (error); - } - - if (slice == (dm_descriptor_t)0) { - /* no slice found */ - return (ENODEV); - } - - if (error == 0) { - error = is_named_device_avail(request, sname, B_TRUE, bool); - } - - return (error); -} - -/* - * FUNCTION: get_disks_for_target(char *name, dlist_t **disks) - * - * INPUT: name - a char* device CTD name - * - * OUTPUT: disks - disks matching the input target name - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Validation helper function which finds all disks "on" the - * input target. - * - * The input name is assumed to be a target name, cXtX, and - * the list of known disks is searched to find any disk that - * looks to be "on" that target. - * - * "On" is determined by comparing a disk's name and - * aliases to the target to see if they match. - */ -int -get_disks_for_target( - char *name, - dlist_t **disks) -{ - int error = 0; - device_spec_t *targetid = NULL; - - error = get_spec_for_name(name, &targetid); - if (error == 0) { - dlist_t *known_disks = NULL; - dlist_t *iter = NULL; - - get_known_disks(&known_disks); - for (iter = known_disks; - (iter != NULL) && (error == 0); - iter = iter->next) { - - dm_descriptor_t disk = (uintptr_t)iter->obj; - device_spec_t *diskid = NULL; - char *diskname = NULL; - dlist_t *diskaliases = NULL; - dlist_t *item; - - ((error = get_display_name(disk, &diskname)) != 0) || - (error = get_aliases(disk, &diskaliases)) || - (error = get_spec_for_name(diskname, &diskid)); - - if (error == 0) { - if (spec_includes_device(targetid, diskid) == B_TRUE) { - /* add disk */ - if ((item = dlist_new_item((void *)(uintptr_t)disk)) == - NULL) { - error = ENOMEM; - } else { - *disks = dlist_append(item, *disks, AT_HEAD); - } - } else { - /* check disk's aliases */ - dlist_t *iter2; - for (iter2 = diskaliases; - (iter2 != NULL) && (error == 0); - iter2 = iter2->next) { - - char *aliasname = NULL; - device_spec_t *aliasid = NULL; - error = get_display_name(disk, &aliasname); - error = get_spec_for_name(aliasname, &aliasid); - - if (spec_includes_device( - targetid, aliasid) == B_TRUE) { - - /* alias matched, add disk */ - item = dlist_new_item((void *)(uintptr_t)disk); - if (item == NULL) { - error = ENOMEM; - } else { - *disks = - dlist_append(item, *disks, AT_HEAD); - } - } - } - } - } - } - } - - return (error); -} - -/* - * FUNCTION: select_hbas_with_n_disks(devconfig_t *request, - * dlist_t *hbas, int mindisks, dlist_t **selhbas, - * dlist_t **seldisks) - * - * INPUT: request - pointer to a devconfig_t struct representing - * the current layout request being processed - * hbas - pointer to a list of HBAs - * mindisks - minimum number of disks required on the HBAs - * - * OUTPUT: selhbas - pointer to a list containing the HBAs with at - * least mindisks available disks. - * seldisks - pointer to a list containing the available disks - * for the HBAs in selhbas - * - * RETURNS: int - 0 - on success - * !0 - otherwise - * - * PURPOSE: helper which counts the number of available disks associated - * with each of the input HBAs and adds those that have at - * least mindisks to the output list. - * - * Only available disks that have available space are counted. - * - * Disks connected thru multiple HBAs are only counted for - * the first HBA they're accessed through. - * - * The list of HBAs returned will be in descending order, - * i.e., HBAs with more disks come before those with fewer. - * - * The returned lists of HBAs and disks must be passed to - * dlist_free_items() to recover the space allocated to hold - * each list item. - * - * for (each HBA) { - * - * select HBA - * get available disks on HBA - * - * for (each disk) { - * if (disk is not in selected disk list) - * add it to the list - * else - * count it as a distinct disk on this HBA - * } - * - * if (this HBA has >= mindisks distinct disks) - * add this HBA to the list of returned HBAs - * - * } - */ -int -select_hbas_with_n_disks( - devconfig_t *request, - dlist_t *hbas, - int mindisks, - dlist_t **selhbas, - dlist_t **seldisks) -{ - dlist_t *iter = NULL; - int error = 0; - - *selhbas = NULL; - *seldisks = NULL; - - /* for each input HBA */ - for (iter = hbas; (error == 0) && (iter != NULL); iter = iter->next) { - - dm_descriptor_t hba = (uintptr_t)iter->obj; - dlist_t *iter2 = NULL; - dlist_t *disks = NULL; - uint64_t space = 0; - uint16_t ndistinct = 0; - - error = hba_get_avail_disks_and_space(request, hba, &disks, &space); - - /* for each of this HBA's disks */ - for (iter2 = disks; - (iter2 != NULL) && (error == 0); - iter2 = iter2->next) { - - dm_descriptor_t disk = (uintptr_t)iter2->obj; - - /* unique disk? has it been seen thru some other HBA? */ - if (dlist_contains(*seldisks, (void *)(uintptr_t)disk, - compare_descriptor_names) != B_TRUE) { - - /* distinct, add to list of all_distinct */ - dlist_t *item = dlist_new_item((void *)(uintptr_t)disk); - if (item == NULL) { - error = ENOMEM; - } else { - - *seldisks = - dlist_append(item, *seldisks, AT_HEAD); - - /* increment this HBA's distinct disk count */ - ++ndistinct; - } - } - } - - if (ndistinct >= mindisks) { - - /* this HBA has minimum # of disks, add to output list */ - dlist_t *item = dlist_new_item((void *)(uintptr_t)hba); - if (item == NULL) { - error = ENOMEM; - } else { - *selhbas = - dlist_insert_ordered( - item, *selhbas, DESCENDING, - compare_hba_n_avail_disks); - - /* save # of disks for ordering the list */ - hba_set_n_avail_disks(hba, ndistinct); - } - } - - dlist_free_items(disks, NULL); - } - - if (error != 0) { - oprintf(OUTPUT_TERSE, - gettext("failed selecting HBAs with n disks: %d\n"), - error); - - dlist_free_items(*selhbas, NULL); - *selhbas = NULL; - dlist_free_items(*seldisks, NULL); - *seldisks = NULL; - } - - return (error); -} - -/* - * FUNCTION: hba_get_avail_disks_and_space(devconfig_t *request, - * dm_descriptor_t hba, dlist_t **disks, uint64_t *space) - * - * INPUT: request - pointer to a devconfig_t struct representing - * the current layout request being processed - * hba - dm_descriptor_t handle for an HBA - * - * OUTPUT: disks - pointer to a list to hold the computed available - * disks - * avail - pointer to a uint64_t to hold the aggregate - * available space on the available disks - * - * RETURNS: int - 0 - on success - * !0 - otherwise - * - * PURPOSE: helper which examines the disks associated with the - * input HBA and assembles a list of those that are available. - * - * Available is defined as being in the usable list, having - * unused space and not specifically excluded by the request's - * list of unavailable devices. - * - * The returned list must be passed to dlist_free_items() - * to recover the memory allocated to hold each list item. - */ -int -hba_get_avail_disks_and_space( - devconfig_t *request, - dm_descriptor_t hba, - dlist_t **disks, - uint64_t *space) -{ - dlist_t *usable_disks = NULL; - dlist_t *iter = NULL; - int error = 0; - - *disks = NULL; - - /* for each usable disk */ - error = get_usable_disks(&usable_disks); - for (iter = usable_disks; - (error == 0) && (iter != NULL); - iter = iter->next) { - - dm_descriptor_t disk = (uintptr_t)iter->obj; - boolean_t avail = B_FALSE; - dlist_t *hbas = NULL; - - /* is disk attached to HBA in question? */ - error = disk_get_hbas(disk, &hbas); - if (error != 0) { - continue; - } - - if (dlist_contains(hbas, (void *)(uintptr_t)hba, - compare_descriptor_names) == B_TRUE) { - - /* is disk available? */ - error = is_device_avail(disk, request, &avail); - if ((error == 0) && (avail == B_TRUE)) { - uint64_t disk_space = 0; - - /* does disk have available space? */ - error = disk_get_avail_space(request, disk, &disk_space); - if ((error == 0) && (disk_space > 0)) { - - dlist_t *item = dlist_new_item((void *)(uintptr_t)disk); - if (item == NULL) { - error = ENOMEM; - } else { - *disks = dlist_append(item, *disks, AT_HEAD); - } - - *space += disk_space; - } - } - } - - dlist_free_items(hbas, NULL); - } - - if (error != 0) { - dlist_free_items(*disks, NULL); - *disks = NULL; - } - - return (error); -} - -/* - * FUNCTION: disk_get_avail_space(devconfig_t *request, - * dlist_t *disks, uint64_t space) - * - * INPUT: request - pointer to a devconfig_t struct representing - * the current layout request being processed - * disks - pointer to a list of disks - * space - pointer to a uint64_t to hold the computed available - * space - * - * RETURNS: int - 0 - on success - * !0 - otherwise - * - * PURPOSE: helper which iterates the input list of disks and determines - * the aggregate amount of available space they represent. - * - * Only disk slices that are in the usable slice list and not - * specifically excluded by the request's list of unavailable - * devices will contribute to the aggregate space computation. - */ -static int -disk_get_avail_space( - devconfig_t *request, - dm_descriptor_t disk, - uint64_t *space) -{ - dlist_t *usable_slices = NULL; - dlist_t *iter = NULL; - int error = 0; - - *space = 0; - - /* for each usable slice */ - error = get_usable_slices(&usable_slices); - for (iter = usable_slices; - (error == 0) && (iter != NULL); - iter = iter->next) { - - dm_descriptor_t slice = (uintptr_t)iter->obj; - dm_descriptor_t slice_disk; - boolean_t avail = B_FALSE; - boolean_t reserved = B_FALSE; - boolean_t used = B_FALSE; - - /* is slice on disk in question? */ - if (((error = slice_get_disk(slice, &slice_disk)) != 0) || - (compare_descriptor_names((void *)(uintptr_t)slice_disk, - (void *)(uintptr_t)disk) != 0)) { - continue; - } - - /* is slice reserved by an explicit layout request? */ - if (((error = is_reserved_slice(slice, &reserved)) != 0) || - (reserved == B_TRUE)) { - continue; - } - - /* is slice used by a pending layout request? */ - if (((error = is_used_slice(slice, &used)) != 0) || - (used == B_TRUE)) { - continue; - } - - /* is slice available? */ - if (((error = is_device_avail(slice, request, &avail)) == 0) && - (avail == B_TRUE)) { - - /* does slice have usable space? */ - uint64_t size = 0; - if ((error = slice_get_size(slice, &size)) == 0) { - *space += size; - } - } - } - - return (error); -} - -/* - * FUNCTION: disks_get_avail_slices(devconfig_t *request, - * dlist_t *disks, dlist_t **slices) - * - * INPUT: request - pointer to a devconfig_t struct representing - * the current layout request being processed - * disks - pointer to a list of disks - * slices - pointer to an output list of disks - * - * RETURNS: int - 0 - on success - * !0 - otherwise - * - * PURPOSE: helper which iterates the input list of disks and builds a - * new list which contains disks that are determined to be - * available for satisfying the input request. - * - * A disk must contain at least one slice in the available - * slice list as well as have available space in order - * to be available. - */ -int -disks_get_avail_slices( - devconfig_t *request, - dlist_t *disks, - dlist_t **slices) -{ - dlist_t *usable_slices = NULL; - dlist_t *iter = NULL; - int error = 0; - - *slices = NULL; - - /* for each usable slice */ - error = get_usable_slices(&usable_slices); - for (iter = usable_slices; - (error == 0) && (iter != NULL); - iter = iter->next) { - - dm_descriptor_t slice = (uintptr_t)iter->obj; - dm_descriptor_t disk = (dm_descriptor_t)0; - boolean_t avail = B_FALSE; - boolean_t reserved = B_FALSE; - boolean_t used = B_FALSE; - - /* is slice on a disk in the input list? */ - if (((error = slice_get_disk(slice, &disk)) != 0) || - (dlist_contains(disks, (void *)(uintptr_t)disk, - compare_descriptor_names) != B_TRUE)) { - continue; - } - - /* is slice reserved by an explicit layout request? */ - if (((error = is_reserved_slice(slice, &reserved)) != 0) || - (reserved == B_TRUE)) { - continue; - } - - /* is slice used by a pending layout request? */ - if (((error = is_used_slice(slice, &used)) != 0) || - (used == B_TRUE)) { - continue; - } - - /* is slice available? */ - if (((error = is_device_avail(slice, request, &avail)) == 0) && - (avail == B_TRUE)) { - - /* does slice have available space? */ - uint64_t size = 0; - error = slice_get_size(slice, &size); - if ((error == 0) && (size > 0)) { - dlist_t *item = dlist_new_item((void *)(uintptr_t)slice); - if (item == NULL) { - error = ENOMEM; - } else { - *slices = dlist_append(item, *slices, AT_TAIL); - } - } - } - } - - if (error != 0) { - dlist_free_items(*slices, NULL); - *slices = NULL; - } - - return (error); -} - - -/* - * FUNCTION: get_hbas_and_disks_used_by_volumes(dlist_t *volumes, - * dlist_t **hbas, dlist_t **disks) - * - * INPUT: volumes - pointer to a list of devconfig_t volumes - * - * OUTPUT: hbas - a list of HBAs utilized by the input volumes - * disks - a list of disks utilized by the input volumes - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: An aggregate list of HBAs and disks used by the input volumes - * is built up by iterating the list of volumes and calling - * get_hbas_disks_used_by_volume() to determine the HBAs and disk - * used by each volume. - * - * The returned lists of HBAs and disks may contain duplicates. - */ -int -get_hbas_and_disks_used_by_volumes( - dlist_t *volumes, - dlist_t **hbas, - dlist_t **disks) -{ - dlist_t *iter = NULL; - int error = 0; - - for (iter = volumes; - (iter != NULL) && (error == 0); - iter = iter->next) { - error = get_hbas_and_disks_used_by_volume( - (devconfig_t *)iter->obj, hbas, disks); - } - - return (error); -} - -/* - * FUNCTION: get_hbas_and_disks_used_by_volume(devconfig_t *volume, - * dlist_t **hbas, dlist_t **disks) - * - * INPUT: volume - pointer to a devconfig_t volume - * - * OUTPUT: hbas - a list of HBAs updated to include those utilized - * by the input volume - * disks - a list of disks updated to inlclude those utilized - * by the input volume - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: The volume's components are iterated and the disks and HBAs - * for each component are determined and appended to the input - * lists of HBAs and disks. - * - * The returned lists of HBAs and disks may contain duplicates. - */ -int -get_hbas_and_disks_used_by_volume( - devconfig_t *volume, - dlist_t **hbas, - dlist_t **disks) -{ - dlist_t *iter = NULL; - int error = 0; - - for (iter = devconfig_get_components(volume); - (iter != NULL) && (error == 0); - iter = iter->next) { - - devconfig_t *dev = (devconfig_t *)iter->obj; - if (devconfig_isA(dev, TYPE_SLICE)) { - - dm_descriptor_t disk = NULL; - char *name = NULL; - - /* get disk for component slice */ - ((error = devconfig_get_name(dev, &name)) != 0) || - (error = get_disk_for_named_slice(name, &disk)); - if (error == 0) { - dlist_t *item = dlist_new_item((void *)(uintptr_t)disk); - if (item == NULL) { - error = ENOMEM; - } else { - *disks = dlist_append(item, *disks, AT_HEAD); - } - } - - /* get HBAs for disk */ - if (error == 0) { - dlist_t *disk_hbas = NULL; - if ((error = disk_get_hbas(disk, &disk_hbas)) == 0) { - /* the hba list may contain dups, but that's ok */ - *hbas = dlist_append(disk_hbas, *hbas, AT_HEAD); - } - } - - } else if (devconfig_isA(dev, TYPE_MIRROR)) { - - /* collect info for submirrors */ - dlist_t *iter1; - for (iter1 = devconfig_get_components(dev); - (iter1 != NULL) && (error == 0); - iter1 = iter1->next) { - error = get_hbas_and_disks_used_by_volume( - (devconfig_t *)iter1->obj, hbas, disks); - } - - } - } - - return (error); -} - -/* - * FUNCTION: compare_hba_n_avail_disks(void *obj1, void *obj2) - * - * INPUT: obj1 - opaque pointer - * obj2 - opaque pointer - * - * RETURNS: int - <0 - if obj1 has fewer available disks than obj2 - * 0 - if obj1 has the same # of available disks as obj2 - * >0 - if obj1 has more available disks than obj2 - * - * PURPOSE: dlist_t helper which compares the number of available disks - * for two HBAs represented as dm_descriptor_t handles. - * - * Both input objects are assumed to be dm_descriptor_t handles. - * - * The number of available disks associated with the HBAs was - * computed and saved in select_hbas_with_n_disks(), this - * function just checks the saved values. - */ -static int -compare_hba_n_avail_disks( - void *obj1, - void *obj2) -{ - uint16_t n1 = 0; - uint16_t n2 = 0; - - assert(obj1 != NULL); - assert(obj2 != NULL); - - (void) hba_get_n_avail_disks((uintptr_t)obj1, &n1); - (void) hba_get_n_avail_disks((uintptr_t)obj2, &n2); - - return ((int)n1 - n2); -} - -/* - * FUNCTION: is_device_avail(dm_descriptor_t desc, - * devconfig_t *request, boolean_t *avail) - * - * INPUT: desc - a dm_descriptor_t device handle - * request - pointer to a devconfig_t struct representing - * the current layout request being processed - * avail - pointer to a boolean to hold the result - * - * RETURNS: int - 0 - on success - * !0 - otherwise - * - * PURPOSE: Internal helper which determines if the input device can - * be used as a volume component when satisfying the input - * request. - * - * The device is assumed to be a known valid device. - * - * The function checks if the device passes the request's - * available and unavailable device specifications. - * - * The input device name may be either a DID name or a CTD - * name. All name comparisons are done using the CTD name. - */ -static int -is_device_avail( - dm_descriptor_t desc, - devconfig_t *request, - boolean_t *avail) -{ - char *name = NULL; - int error = 0; - - *avail = B_FALSE; - - if ((error = get_display_name(desc, &name)) == 0) { - error = is_named_device_avail(request, name, B_TRUE, avail); - } - - return (error); -} - -/* - * FUNCTION: compare_request_to_request_spec_list_request( - * void *request, void *list_item) - * - * INPUT: request - opaque pointer to a devconfig_t - * list_item - opaque pointer to a request_spec_list_t - * - * RETURNS: int - 0 - if request is the same as list_item->request - * !0 - otherwise - * - * PURPOSE: dlist_t helper which compares the input request pointer - * to the list_item's request pointer for equality. - * - * This function is the lookup mechanism for the lists of - * cached device_spec_ts representing available/unavailable - * devices for a given defaults_t request/defaults struct. - * - * The defaults_t struct pointer is the lookup key. - */ -static int -compare_request_to_request_spec_list_request( - void *request, - void *list_item) -{ - request_spec_list_t *entry = - (request_spec_list_t *)list_item; - - assert(request != NULL); - assert(entry != NULL); - - /* compare two devconfig_t pointers, if identical, return 0 */ - return ((devconfig_t *)request != entry->request); -} - -/* - * FUNCTION: compare_device_spec_specificity(void *spec1, void *spec2) - * - * INPUT: spec1 - opaque pointer to a device_spec_t - * spec2 - opaque pointer to a device_spec_t - * - * RETURNS: int - <0 - if spec1 is less specific than spec2 - * 0 - if spec1 is as specific than spec2 - * >0 - if spec1 is more specific than spec2 - * - * PURPOSE: dlist_t helper which compares the level of specificity - * in the two input device_spec_t structs. The one - * which specifies more "components" of a cXtXdXsX device - * name is considered more specific. - */ -static int -compare_device_spec_specificity( - void *spec1, - void *spec2) -{ - if (spec1 == NULL || spec2 == NULL) { - return (-1); - } - - if ((((device_spec_t *)spec1)->data.ctd->slice != ID_UNSPECIFIED) && - (((device_spec_t *)spec2)->data.ctd->slice == ID_UNSPECIFIED)) { - /* spec1 has slice, spec2 does not, spec1 more specific */ - return (1); - } - - if ((((device_spec_t *)spec2)->data.ctd->slice != ID_UNSPECIFIED) && - (((device_spec_t *)spec1)->data.ctd->slice == ID_UNSPECIFIED)) { - /* spec2 has slice, spec1 does not, spec2 more specific */ - return (-1); - } - - if ((((device_spec_t *)spec2)->data.ctd->slice != ID_UNSPECIFIED) && - (((device_spec_t *)spec1)->data.ctd->slice != ID_UNSPECIFIED)) { - /* both spec1 and spec2 have slice */ - return (0); - } - - if ((((device_spec_t *)spec1)->data.ctd->lun != ID_UNSPECIFIED) && - (((device_spec_t *)spec2)->data.ctd->lun == ID_UNSPECIFIED)) { - /* spec1 has lun, spec2 does not, spec1 more specific */ - return (1); - } - - if ((((device_spec_t *)spec2)->data.ctd->lun != ID_UNSPECIFIED) && - (((device_spec_t *)spec1)->data.ctd->lun == ID_UNSPECIFIED)) { - /* spec2 has lun, spec1 does not, spec2 more specific */ - return (-1); - } - - if ((((device_spec_t *)spec2)->data.ctd->lun != ID_UNSPECIFIED) && - (((device_spec_t *)spec1)->data.ctd->lun != ID_UNSPECIFIED)) { - /* both spec1 and spec2 have lun */ - return (0); - } - - if ((((device_spec_t *)spec1)->data.ctd->target != ID_UNSPECIFIED) && - (((device_spec_t *)spec2)->data.ctd->target == ID_UNSPECIFIED)) { - /* spec1 has target, spec2 does not, spec1 more specific */ - return (1); - } - - if ((((device_spec_t *)spec2)->data.ctd->target != ID_UNSPECIFIED) && - (((device_spec_t *)spec1)->data.ctd->target == ID_UNSPECIFIED)) { - /* spec2 has target, spec1 does not, spec2 more specific */ - return (-1); - } - - if ((((device_spec_t *)spec2)->data.ctd->target != ID_UNSPECIFIED) && - (((device_spec_t *)spec1)->data.ctd->target != ID_UNSPECIFIED)) { - /* both spec1 and spec2 have target */ - return (0); - } - - /* both specify just ctrl */ - return (0); -} - -/* - * FUNCTION: find_request_spec_list_entry(devconfig_t *request) - * - * INPUT: request - pointer to a devconfig_t struct - * - * RETURNS: request_spec_list_entry - pointer to a - * request_spec_list_entry struct - * - * PURPOSE: Lookup function which encapsulates the details of locating - * the device_spec_list_t cache entry for the input request. - */ -static request_spec_list_t * -find_request_spec_list_entry( - devconfig_t *request) -{ - dlist_t *list_item = NULL; - request_spec_list_t *entry = NULL; - - list_item = dlist_find( - _request_spec_list_cache, - (void *)request, - compare_request_to_request_spec_list_request); - - if (list_item != NULL) { - entry = (request_spec_list_t *)list_item->obj; - } - - return (entry); -} - -/* - * FUNCTION: add_request_spec_list_entry(devconfig_t *request, - * char **avail_device_specs, char **unavail_device_specs, - * request_spec_list_entry_t **entry) - * - * INPUT: entry - pointer to the request_spec_list_entry struct to be - * added to the cache. - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Function which encapsulates the details of adding a - * device_spec_list_t cache entry. - */ -static int -add_request_spec_list_entry( - request_spec_list_t *entry) -{ - dlist_t *list_item = dlist_new_item((void *)entry); - - if (list_item == NULL) { - return (ENOMEM); - } - - _request_spec_list_cache = dlist_append(list_item, - _request_spec_list_cache, AT_HEAD); - - return (0); -} - -/* - * FUNCTION: make_request_spec_list_entry(devconfig_t *request, - * char **avail_device_specs, char **unavail_device_specs, - * request_spec_list_entry_t **entry) - * - * INPUT: request - pointer to a devconfig_t struct - * avail_device_specs - char * array of user specified available - * devices associated with the input request - * unavail_device_specs - char * array of user specified - * unavailable devices associated with the input - * request - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Function which encapsulates the details of generating a new - * the device_spec_list_t cache entry for the input request - * and its lists of avail/unavail devices. - * - * Converts the input arrays of (un)available device names into - * equivalent lists of device_spec_t structs. - * - * Creates a new cache entry, populates it and adds it to the - * cache. - */ -static int -make_request_spec_list_entry( - devconfig_t *request, - char **avail_device_specs, - char **unavail_device_specs, - request_spec_list_t **entry) -{ - int error = 0; - dlist_t *list = NULL; - - *entry = calloc(1, sizeof (request_spec_list_t)); - if (*entry == NULL) { - return (ENOMEM); - } - - (*entry)->request = request; - - /* - * map the avail_device_name array into a list of device_spec_t - * and save the list as the entry's available list - */ - error = convert_usernames_to_specs( - avail_device_specs, &list); - - if (error == 0) { - (*entry)->avail_specs_list = list; - } - - /* - * map the unavail_device_name array into a list of device_spec_t - * and save the list as the entry's unavailable list - */ - list = NULL; - error = convert_usernames_to_specs( - unavail_device_specs, &list); - - if (error == 0) { - (*entry)->unavail_specs_list = list; - } - - if (error != 0) { - /* delete the partial entry */ - destroy_request_spec_list_entry((void *)*entry); - *entry = NULL; - } - - return (error); -} - -/* - * FUNCTION: convert_usernames_to_specs(char **specs, dlist_t **list) - * - * INPUT: specs - char * array of device CTD names - * - * OUTPUT: list - pointer to a list of device_spec_t corresponding - * to each name in the input array - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Function which converts the input CTD device names to the - * equivalent device_spec_t structs. - * - * Iterates the input array and converts each CTD name to a - * device_spec_t using get_spec_for_name(). - */ -static int -convert_usernames_to_specs( - char **specs, - dlist_t **list) -{ - int i = 0; - int error = 0; - - /* - * For each spec in the array, get the corresponding - * device_spec_t and add it to the list. - * - * Any spec in the array that looks to be a DID name - * is first converted to its equivalent CTD name. - */ - for (i = 0; - (specs != NULL) && (specs[i] != NULL) && (error == 0); - i++) { - - device_spec_t *spec = NULL; - char *userspec = specs[i]; - - error = get_spec_for_name(userspec, &spec); - if ((error == 0) && (spec != NULL)) { - dlist_t *list_item = dlist_new_item((void *)spec); - if (spec == NULL) { - error = ENOMEM; - } else { - *list = dlist_insert_ordered - (list_item, *list, DESCENDING, - compare_device_spec_specificity); - } - } - } - - if (error != 0) { - /* the device_spec_t in the list items are maintained */ - /* in a cache elsewhere, so don't free them here. */ - dlist_free_items(*list, NULL); - *list = NULL; - } - - return (error); -} - -/* - * FUNCTION: destroy_request_spec_list_entry(void *entry) - * - * INPUT: entry - opaque pointer to a request_spec_list_t - * - * RETURNS: nothing - * - * PURPOSE: Function which reclaims memory allocated to a - * request_spec_list_t. - * - * Frees memory allocated to the avail_spec_list and - * unavail_spec_list. Entries in the list are not freed, - * since they are owned by the device_spec cache. - */ -static void -destroy_request_spec_list_entry( - void *obj) -{ - request_spec_list_t *entry = (request_spec_list_t *)obj; - - if (entry != NULL) { - /* items in the list are in the spec_cache and will */ - /* be cleaned up when it is destroyed. */ - dlist_free_items(entry->avail_specs_list, NULL); - dlist_free_items(entry->unavail_specs_list, NULL); - free(entry); - } -} - -/* - * FUNCTION: destroy_request_spec_list_cache() - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Function which destroys all entries in the request_spec_list - * cache. - */ -static int -destroy_request_spec_list_cache() -{ - dlist_free_items(_request_spec_list_cache, - destroy_request_spec_list_entry); - _request_spec_list_cache = NULL; - - return (0); -} - -/* - * FUNCTION: get_request_avail_spec_list(devconfig_t *request, - * dlist_t **list) - * - * INPUT: request - a pointer to a devconfig_t - * - * OUTPUT: list - pointer to a list of device_spec_t corresponding - * to the devices specified as available by the - * input request. - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Function which locates or builds the list of device_spec_t - * for the available devices specified in the input request. - * - * Looks up the input request in the request_spec_list cache. - * If there is currently no entry in the cache for the request, - * an entry is built and added. - * - * The entry's list of available device_spec_t is returned. - */ -static int -get_request_avail_spec_list( - devconfig_t *request, - dlist_t **list) -{ - request_spec_list_t *entry = NULL; - int error = 0; - - if ((entry = find_request_spec_list_entry(request)) == NULL) { - - /* create cache entry for this request */ - error = make_request_spec_list_entry( - request, - devconfig_get_available(request), - devconfig_get_unavailable(request), - &entry); - - if ((error == 0) && (entry != NULL)) { - if ((error = add_request_spec_list_entry(entry)) != 0) { - destroy_request_spec_list_entry(entry); - entry = NULL; - } - } - } - - if ((error == 0) && (entry != NULL)) { - *list = entry->avail_specs_list; - } - - return (error); -} - -/* - * FUNCTION: get_request_unavail_spec_list(devconfig_t *request, - * dlist_t **list) - * - * INPUT: request - a pointer to a devconfig_t - * - * OUTPUT: list - pointer to a list of device_spec_t corresponding - * to the devices specified as unavailable by the - * input request. - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Function which locates or builds the list of device_spec_t - * for the unavailable devices specified in the input request. - * - * Looks up the input request in the request_spec_list cache. - * If there is currently no entry in the cache for the request, - * an entry is built and added. - * - * The entry's list of unavailable device_spec_t is returned. - */ -static int -get_request_unavail_spec_list( - devconfig_t *request, - dlist_t **list) -{ - request_spec_list_t *entry = NULL; - int error = 0; - - if ((entry = find_request_spec_list_entry(request)) == NULL) { - - /* create new entry for this request */ - error = make_request_spec_list_entry( - request, - devconfig_get_available(request), - devconfig_get_unavailable(request), - &entry); - - if ((error == 0) && (entry != NULL)) { - if ((error = add_request_spec_list_entry(entry)) != 0) { - destroy_request_spec_list_entry(entry); - entry = NULL; - } - } - } - - if ((error == 0) && (entry != NULL)) { - *list = entry->unavail_specs_list; - } - - return (error); -} - -/* - * FUNCTION: get_default_avail_spec_list(defaults_t *defaults, - * char *dsname, dlist_t **list) - * - * INPUT: defaults - a pointer to a defaults_t struct - * dsname - the name of the diskset whose defaults should be used - * - * OUTPUT: list - pointer to a list of device_spec_t corresponding - * to the devices specified as available by the - * defaults for the named diskset, or the global - * defaults for all disksets. - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Function which locates or builds the list of device_spec_t - * for the available devices for the named diskset. - * - * Locates the defaults for the named diskset, if there are none, - * locates the global defaults for all disksets. - * - * The defaults devconfig_t struct is then used to look up the - * the corresponding entry in the request_spec_list cache. - * - * If there is currently no entry in the cache for the defaults, - * an entry is built and added. - * - * The entry's list of available device_spec_t is returned. - */ -static int -get_default_avail_spec_list( - defaults_t *alldefaults, - char *dsname, - dlist_t **list) -{ - request_spec_list_t *entry = NULL; - devconfig_t *defaults = NULL; - int error = 0; - - /* Get diskset defaults, or global if none for diskset */ - error = defaults_get_diskset_by_name( - alldefaults, dsname, &defaults); - - if (error != 0) { - if (error == ENOENT) { - /* to get global defaults, pass a NULL diskset name */ - error = defaults_get_diskset_by_name( - alldefaults, NULL, &defaults); - } - - if (error != 0) { - if (error != ENOENT) { - oprintf(OUTPUT_DEBUG, - gettext("get defaults for %s returned %d\n"), - dsname, error); - } else { - error = 0; - } - } - } - - if ((entry = find_request_spec_list_entry(defaults)) == NULL) { - - /* create new entry for these defaults */ - error = make_request_spec_list_entry( - defaults, - devconfig_get_available(defaults), - devconfig_get_unavailable(defaults), - &entry); - - if ((error == 0) && (entry != NULL)) { - if ((error = add_request_spec_list_entry(entry)) != 0) { - destroy_request_spec_list_entry(entry); - entry = NULL; - } - } - } - - if ((error == 0) && (entry != NULL)) { - *list = entry->avail_specs_list; - } - - return (error); -} - -/* - * FUNCTION: get_default_unavail_spec_list(defaults_t *defaults, - * char *dsname, dlist_t **list) - * - * INPUT: defaults - a pointer to a defaults_t struct - * dsname - the name of the diskset whose defaults should be used - * - * OUTPUT: list - pointer to a list of device_spec_t corresponding - * to the devices specified as unavailable by the - * defaults for the named diskset, or the global - * defaults for all disksets. - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Function which locates or builds the list of device_spec_t - * for the unavailable devices for the named diskset. - * - * Locates the defaults for the named diskset, if there are none, - * locates the global defaults for all disksets. - * - * The defaults devconfig_t struct is then used to look up the - * the corresponding entry in the request_spec_list cache. - * - * If there is currently no entry in the cache for the defaults, - * an entry is built and added. - * - * The entry's list of unavailable device_spec_t is returned. - */ -static int -get_default_unavail_spec_list( - defaults_t *alldefaults, - char *dsname, - dlist_t **list) -{ - request_spec_list_t *entry = NULL; - devconfig_t *defaults = NULL; - int error = 0; - - /* Get diskset defaults, or global if none for diskset */ - error = defaults_get_diskset_by_name( - alldefaults, dsname, &defaults); - - if (error != 0) { - - if (error == ENOENT) { - /* to get global defaults, pass a NULL diskset name */ - error = defaults_get_diskset_by_name( - alldefaults, NULL, &defaults); - } - - if (error != 0) { - if (error != ENOENT) { - oprintf(OUTPUT_DEBUG, - gettext("get defaults for %s returned %d\n"), - dsname, error); - } else { - error = 0; - } - } - } - - if ((entry = find_request_spec_list_entry(defaults)) == NULL) { - - /* create new entry for these defaults */ - error = make_request_spec_list_entry( - defaults, - devconfig_get_available(defaults), - devconfig_get_unavailable(defaults), - &entry); - - if ((error == 0) && (entry != NULL)) { - if ((error = add_request_spec_list_entry(entry)) != 0) { - destroy_request_spec_list_entry(entry); - entry = NULL; - } - } - } - - if ((error == 0) && (entry != NULL)) { - *list = entry->unavail_specs_list; - } - - return (error); -} - -/* - * FUNCTION: is_named_device_avail(devconfig_t *request, char *device_name, - * boolean_t check_aliases, boolean_t *avail) - * - * INPUT: request - the current request devconfig_t - * device_name - char * device name - * check_aliases - boolean_t which indicates whether the device's - * aliases should be considered by the availability checks. - * - * OUTPUT: avail - a boolean_t * to hold the result - * - * RETURNS: int - !0 on error - * - * avail is set to B_TRUE if the named device is available for - * the input request, B_FALSE otherwise. - * - * PURPOSE: Determine if the named device can be used to satisfy the - * input request. - * - * There are several levels at which device availabiity or - * unavailability may be specifed: - * - * 1. the volume subrequest, - * 2. the toplevel (diskset) request, - * 3. the diskset-specific defaults - * 4. the global defaults - * - * If the diskset-specific defaults exist, only they are checked. - * - * The precedence ordering that is enforced: - * - * 1. if request has an avail list, the name must be in it - * and not in the request's unavail list. - * 2. if request has an unavail list, the name must not be in it. - * 3. if toplevel request has an avail list, the name must be - * in it and not in the toplevel request's unavailable - * list. - * 4. if toplevel request has an unavail list, the name must - * not be in it. - * 5. if defaults have an avail list, the name must be in it - * and not in the defaults unavailable list. - * 6. if defaults have an unavail list, the name must not be - * in it. - */ -static int -is_named_device_avail( - devconfig_t *request, - char *device_name, - boolean_t check_aliases, - boolean_t *avail) -{ - typedef enum check_types { - DEVICE_REQUEST = 0, - DISKSET_REQUEST, - DEFAULTS, - N_CHECKS - } check_type_t; - - check_type_t check_type; - - typedef enum list_types { - AVAIL = 0, - UNAVAIL, - N_LISTS - } list_type_t; - - dlist_t *lists[N_CHECKS][N_LISTS]; - boolean_t includes; - int error = 0; - - memset(lists, 0, (N_CHECKS * N_LISTS) * sizeof (dlist_t *)); - - if (request != NULL) { - /* get avail/unavail specs for request */ - ((error = get_request_avail_spec_list( - request, &lists[DEVICE_REQUEST][AVAIL])) != 0) || - (error = get_request_unavail_spec_list( - request, &lists[DEVICE_REQUEST][UNAVAIL])); - } - - if ((error == 0) && (_toplevel_request != NULL)) { - /* diskset request */ - ((error = get_request_avail_spec_list( - _toplevel_request, &lists[DISKSET_REQUEST][AVAIL])) != 0) || - (error = get_request_unavail_spec_list( - _toplevel_request, &lists[DISKSET_REQUEST][UNAVAIL])); - } - - if ((error == 0) && (_defaults != NULL)) { - /* and diskset/global defaults */ - ((error = get_default_avail_spec_list(_defaults, - get_request_diskset(), &lists[DEFAULTS][AVAIL])) != 0) || - (error = get_default_unavail_spec_list(_defaults, - get_request_diskset(), &lists[DEFAULTS][UNAVAIL])); - } - - if (error != 0) { - return (error); - } - - *avail = B_TRUE; - - for (check_type = DEVICE_REQUEST; - (check_type < N_CHECKS) && (error == 0); - check_type++) { - - if (lists[check_type][AVAIL] != NULL) { - - /* does avail spec list include named device? */ - if ((error = avail_list_includes_device_name( - lists[check_type][AVAIL], device_name, check_aliases, - &includes)) == 0) { - - if (includes != B_TRUE) { - *avail = B_FALSE; - } - - if ((includes == B_TRUE) && - (lists[check_type][UNAVAIL] != NULL)) { - - /* device is available, is it in the unavail list? */ - if ((error = unavail_list_includes_device_name( - lists[check_type][UNAVAIL], device_name, - check_aliases, &includes)) == 0) { - - if (includes == B_TRUE) { - *avail = B_FALSE; - } - } - } - } - - /* lists at this level checked, skip remainder */ - break; - - } else if (lists[check_type][UNAVAIL] != NULL) { - - /* does unavail spec list include named device? */ - if ((error = unavail_list_includes_device_name( - lists[check_type][UNAVAIL], device_name, - check_aliases, &includes)) == 0) { - - if (includes == B_TRUE) { - *avail = B_FALSE; - } - } - - /* list at this level checked, skip remainder */ - break; - } - } - - return (error); -} - -/* - * FUNCTION: avail_list_includes_device_name(dlist_t *list, - * char *device_name, boolean_t check_aliases, - * boolean_t *includes) - * - * INPUT: list - a dlist_t list of available device_spec_t - * device_name - a char * device CTD name - * check_aliases - boolean_t which indicates if the device's - * aliases should be considered in the availability - * checking. - * - * OUTPUT: includes - B_TRUE - if named device is "included" by any - * specification in the input list - * B_FALSE - otherwise - * - * RETURNS: int - 0 on success - * - !0 otherwise - * - * PURPOSE: Helper used by is_named_device_avail that determines - * if the input list of device specifications "includes" - * a specific device. - * - * Iterates the elements of the input array and searches - * for a match using spec_includes_device_name(). - */ -static int -avail_list_includes_device_name( - dlist_t *list, - char *device_name, - boolean_t check_aliases, - boolean_t *includes) -{ - dlist_t *iter = NULL; - int error = 0; - - *includes = B_FALSE; - - for (iter = list; - (*includes == B_FALSE) && (iter != NULL) && (error == 0); - iter = iter->next) { - - device_spec_t *spec = (device_spec_t *)iter->obj; - error = spec_includes_device_name(spec, device_name, - check_aliases, includes); - } - - return (0); -} - -/* - * FUNCTION: unavail_list_includes_device_name(dlist_t *list, - * char *device_name, boolean_t check_aliases, - * boolean_t *includes) - * - * INPUT: list - a dlist_t list of unavailable device_spec_t - * device_name - a char * device CTD name - * check_aliases - boolean_t which indicates if the device's - * aliases should be considered in the availability - * checking. - * - * OUTPUT: includes - B_TRUE - if named device is "included" by any - * specification in the input list - * B_FALSE - otherwise - * - * RETURNS: int - 0 on success - * - !0 otherwise - * - * PURPOSE: Helper used by is_named_device_avail that determines - * if the input list of device specifications "includes" - * a specific device. - * - * Iterates the elements of the input array and searches - * for a match using spec_includes_device_name_or_alias(). - */ -static int -unavail_list_includes_device_name( - dlist_t *list, - char *device_name, - boolean_t check_aliases, - boolean_t *includes) -{ - dlist_t *iter = NULL; - int error = 0; - device_spec_t *unavail_spec; - boolean_t check_for_alternate_hba = B_FALSE; - - *includes = B_FALSE; - - /* - * the specs in the list are in descending order of specificity. - * so a more exact spec will rule the device out before a less - * exact spec. - * - * Meaning: if the list has { "c3t0d0", ..., "c3", ... } and the - * input device name is "c3t0d0s0", it will match "c3t0d0" - * before "c3". - * - * This is important for the multi-path alias checking below. - * If the input device name is ruled out by a non-controller - * specification, it is really unavailable. - */ - for (iter = list; - (*includes == B_FALSE) && (iter != NULL); - iter = iter->next) { - - unavail_spec = (device_spec_t *)iter->obj; - error = spec_includes_device_name( - unavail_spec, device_name, check_aliases, includes); - - } - - if ((error == 0) && (*includes == B_TRUE)) { - - /* matched an unavailable spec, was it a controller/HBA? */ - oprintf(OUTPUT_DEBUG, - "device \"%s\" is unavailable, " - "it matched \"c(%d)t(%d)d(%d)s(%d)\"\n", - device_name, - unavail_spec->data.ctd->ctrl, - unavail_spec->data.ctd->target, - unavail_spec->data.ctd->lun, - unavail_spec->data.ctd->slice); - - if ((unavail_spec->data.ctd->ctrl != ID_UNSPECIFIED) && - (unavail_spec->data.ctd->target == ID_UNSPECIFIED) && - (unavail_spec->data.ctd->lun == ID_UNSPECIFIED) && - (unavail_spec->data.ctd->slice == ID_UNSPECIFIED)) { - - /* - * Need to see if the named device is a disk or slice, - * and if so check to see if the it is multipathed - * and possibly accessible thru another controller/HBA. - */ - check_for_alternate_hba = B_TRUE; - } - } - - if ((error == 0) && (check_for_alternate_hba == B_TRUE)) { - - dm_descriptor_t slice = (dm_descriptor_t)0; - dm_descriptor_t disk = (dm_descriptor_t)0; - - ((error = slice_get_by_name(device_name, &slice)) != 0) || - (error = disk_get_by_name(device_name, &disk)); - if (error != 0) { - return (error); - } - - /* if it is a slice, get its disk */ - if ((error == 0) && (slice != (dm_descriptor_t)0)) { - error = slice_get_disk(slice, &disk); - } - - if ((error == 0) && (disk != (dm_descriptor_t)0)) { - - /* see if all the disk's HBAs are unavailable */ - dlist_t *hbas = NULL; - dlist_t *iter = NULL; - - error = disk_get_hbas(disk, &hbas); - - if (hbas != NULL) { - oprintf(OUTPUT_DEBUG, - gettext(" checking alternate paths for %s\n"), - device_name); - } else { - oprintf(OUTPUT_DEBUG, - gettext(" no alternate paths for %s\n"), - device_name); - } - - /* for each of the disk's HBAs */ - for (iter = hbas; - (iter != NULL) && (*includes == B_TRUE) && (error == 0); - iter = iter->next) { - - dm_descriptor_t hba = (uintptr_t)iter->obj; - device_spec_t *hbaspec; - char *hbaname = NULL; - dlist_t *iter2 = NULL; - - *includes = B_FALSE; - - ((error = get_display_name(hba, &hbaname)) != 0) || - (error = get_spec_for_name(hbaname, &hbaspec)); - - /* is HBA unavailable? */ - for (iter2 = list; - (iter2 != NULL) && (error == 0) && - (*includes == B_FALSE); - iter2 = iter2->next) { - - device_spec_t *spec = - (device_spec_t *)iter2->obj; - - *includes = spec_includes_device(spec, hbaspec); - } - } - dlist_free_items(hbas, NULL); - - /* if *includes==B_TRUE here, all HBAs are unavailable */ - } - } - - return (error); -} - -/* - * FUNCTION: spec_includes_device_name(device_spec_t *spec, - * char *device_name, boolean_t check_aliases, - * boolean_t *includes) - * - * INPUT: spec - a device_spec_t CTD specification. - * device_name - a char * device CTD name - * check_aliases - boolean_t which indicates if the device's - * aliases should be considered in the checking. - * - * OUTPUT: includes - B_TRUE - if device is "included" by the input - * specification - * B_FALSE - otherwise - * - * RETURNS: int - 0 on success - * - !0 otherwise - * - * PURPOSE: Helper used by (un)avail_specs_includes_device_name() that - * determines if the input device specification "includes" - * the named device. - * - * If check_aliases is true and the named device is a slice or - * a disk drive, its multi-pathed aliases are also checked - * against the spec. - */ -static int -spec_includes_device_name( - device_spec_t *spec, - char *device_name, - boolean_t check_aliases, - boolean_t *includes) -{ - device_spec_t *device_spec; - int error = 0; - - error = get_spec_for_name(device_name, &device_spec); - if (error == 0) { - - *includes = spec_includes_device(spec, device_spec); - - if ((*includes == B_FALSE) && (check_aliases == B_TRUE)) { - - /* spec doesn't include name, check aliases */ - - dm_descriptor_t device = (dm_descriptor_t)0; - dlist_t *aliases = NULL; - - /* only slices and disks have aliases */ - error = slice_get_by_name(device_name, &device); - if (device != (dm_descriptor_t)0) { - error = get_aliases(device, &aliases); - } else if (error == 0) { - error = disk_get_by_name(device_name, &device); - if (device != (dm_descriptor_t)0) { - error = get_aliases(device, &aliases); - } - } - - if ((error == 0) && (aliases != NULL)) { - - dlist_t *iter; - for (iter = aliases; - (iter != NULL) && (*includes == B_FALSE) && - (error == 0); - iter = iter->next) { - - char *alias = (char *)iter->obj; - device_spec_t *alias_spec; - - error = get_spec_for_name(alias, &alias_spec); - if (error == 0) { - /* does spec include alias? */ - *includes = spec_includes_device(spec, alias_spec); - } - } - } - dlist_free_items(aliases, free); - } - } - - return (error); -} - -/* - * FUNCTION: destroy_device_spec(device_spec_t *spec) - * - * INPUT: spec - pointer to a device_spec_t - * - * RETURNS: nothing - * - * PURPOSE: Function which reclaims memory allocated to a device_spec_t. - * - * Frees memory allocated to hold the specific data in the spec. - */ -static void -destroy_device_spec( - device_spec_t *spec) -{ - if (spec != NULL) { - if (spec->type == SPEC_TYPE_CTD) { - free(spec->data.ctd); - } else if (spec->type == SPEC_TYPE_RAW) { - free(spec->data.raw); - } - free(spec); - } -} - -/* - * FUNCTION: create_device_spec(char *name, device_spec_t **spec); - * - * INPUT: name - pointer to a char* device name - * - * OUTPUT: spec - pointer to a device_spec_t to hold the result - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Function which creates a device_spec_t for the input - * device name. - * - */ -static int -create_device_spec( - char *name, - device_spec_t **spec) -{ - int error = 0; - - /* allocate the device spec and try various parsing schemes */ - *spec = (device_spec_t *)calloc(1, sizeof (device_spec_t)); - if (*spec == NULL) { - error = ENOMEM; - } else { - if (((error = create_device_ctd_spec(name, spec)) != 0) && - (error != ENOMEM)) { - /* CTD failed, try other parsing schemes */ - error = create_device_raw_spec(name, spec); - } - } - - return (error); -} - -/* - * FUNCTION: create_device_ctd_spec(char *name, device_spec_t **spec); - * - * INPUT: name - pointer to a char* device name - * - * OUTPUT: spec - pointer to a device_spec_t updated with the parsed - * CTD spec, if successful - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Function which atttempts to parse the input device name into - * cXtXdXsX component ids. The ids are the integer values of each - * specified segment of the input name. - * - * If the name doesn't contain a segment, the id is set to - * ID_UNSPECIFIED. - * - * The input name must be well-formed. - * - * These are the acceptable forms: - * - * cXtXdXsX - * cXtXdX - * cXtX - * cXdXsX - * cXdX - * cX - */ -static int -create_device_ctd_spec( - char *name, - device_spec_t **spec) -{ - uint_t ctrl; - uint_t target; - uint_t lun; - uint_t slice; - - uint_t nscan; - uint_t nchars; - - char *device_str; - char *target_str; - char *ctd_str; - char *t_ptr; - char *d_ptr; - char *s_ptr; - - boolean_t is_ide = B_FALSE; - boolean_t got_slice = B_FALSE; - boolean_t got_lun = B_FALSE; - boolean_t got_target = B_FALSE; - boolean_t got_ctrl = B_FALSE; - - int error = 0; - - ctd_str = strdup(name); - if (ctd_str == NULL) { - return (ENOMEM); - } - - /* trim any leading path (/dev/dsk/cXtXdXsX) */ - if ((device_str = strrchr(ctd_str, '/')) != NULL) { - ++device_str; - } else { - device_str = ctd_str; - } - - /* find each segment start position */ - t_ptr = strrchr(device_str, 't'); - d_ptr = strrchr(device_str, 'd'); - s_ptr = strrchr(device_str, 's'); - - /* - * scan ids from each existing segment working backwards - * so as to leave the device_str in the correct state - * for the next expected segment - */ - if (s_ptr != NULL) { - - /* found 's', try to get slice */ - nchars = strlen(s_ptr); - if ((sscanf(s_ptr, "s%u%n", &slice, &nscan) != 1) || - (nscan != nchars)) { - - error = -1; - oprintf(OUTPUT_DEBUG, - gettext("no slice component in device " - "name \"%s\".\n"), - name); - - } else { - got_slice = B_TRUE; - *s_ptr = '\0'; - } - } - - if ((error == 0) && (d_ptr != NULL)) { - - /* found 'd', try to get disk/lun */ - nchars = strlen(d_ptr); - if ((sscanf(d_ptr, "d%u%n", &lun, &nscan) != 1) || - (nscan != nchars)) { - - error = -1; - oprintf(OUTPUT_DEBUG, - gettext("no disk/lun component " - "in device name \"%s\".\n"), - name); - - } else { - got_lun = B_TRUE; - *d_ptr = '\0'; - } - } - - if ((error == 0) && (t_ptr != NULL)) { - - /* found 't', try to get target, it may be a hex WWN id */ - - /* skip leading 't' and add two for the 'OX' */ - nchars = strlen(t_ptr + 1) + 2; - if ((target_str = (char *)malloc(nchars+1)) == NULL) { - - error = ENOMEM; - - } else { - - strcpy(target_str, "0X"); - strcpy(target_str+2, t_ptr + 1); - target_str[nchars] = '\0'; - - if ((sscanf(target_str, "%x%n", &target, &nscan) != 1) || - (nscan != nchars)) { - - error = -1; - oprintf(OUTPUT_DEBUG, - gettext("no target/WWN component " - "in device name \"%s\".\n"), - name); - - } else { - got_target = B_TRUE; - *t_ptr = '\0'; - } - - free(target_str); - } - - } else { - is_ide = B_TRUE; - } - - if ((error == 0) && (device_str != NULL)) { - - /* get controller/hba/channel */ - nchars = strlen(device_str); - if ((sscanf(device_str, "c%u%n", &ctrl, &nscan) != 1) || - (nscan != nchars)) { - - error = -1; - oprintf(OUTPUT_DEBUG, - gettext("no channel/HBA component " - "in device name \"%s\".\n"), - name); - - } else { - got_ctrl = B_TRUE; - } - } - - free(ctd_str); - - if (error == 0) { - - /* allocate the ctd_spec_t struct and store the ids */ - (*spec)->type = SPEC_TYPE_CTD; - (*spec)->data.ctd = (ctd_spec_t *)calloc(1, sizeof (ctd_spec_t)); - - if ((*spec)->data.ctd == NULL) { - error = ENOMEM; - } - - (*spec)->data.ctd->slice = ID_UNSPECIFIED; - (*spec)->data.ctd->lun = ID_UNSPECIFIED; - (*spec)->data.ctd->target = ID_UNSPECIFIED; - (*spec)->data.ctd->ctrl = ID_UNSPECIFIED; - - if (got_slice == B_TRUE) { - (*spec)->data.ctd->slice = slice; - } - - if (got_lun == B_TRUE) { - (*spec)->data.ctd->lun = lun; - } - - if (got_target == B_TRUE) { - (*spec)->data.ctd->target = target; - } - - if (got_ctrl == B_TRUE) { - (*spec)->data.ctd->ctrl = ctrl; - } - - (*spec)->data.ctd->is_ide = is_ide; - } - - return (error); -} - -/* - * FUNCTION: create_device_raw_spec(char *name, device_spec_t **spec); - * - * INPUT: name - pointer to a char* device name - * - * OUTPUT: spec - pointer to a device_spec_t updated with the raw spec - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Function which creates a "raw" spec for the input name. - * - * This is a last resort if all other spec parsing schemes failed, - * the "raw" spec is just the input device name. - */ -static int -create_device_raw_spec( - char *name, - device_spec_t **spec) -{ - int error = 0; - char *ctd_str = strdup(name); - - if (ctd_str == NULL) { - return (ENOMEM); - } - - (*spec)->type = SPEC_TYPE_RAW; - (*spec)->data.raw = ctd_str; - - oprintf(OUTPUT_DEBUG, - gettext("made raw device spec for \"%s\"\n"), ctd_str); - - return (error); -} - -/* - * FUNCTION: get_spec_for_name(char *name, device_spec_t **id); - * - * INPUT: name - pointer to a char* device name - * - * OUTPUT: id - pointer to a device_spec_t to hold the result - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Function which finds the device_spec_t that already - * exists for the input name or creates it. - * - * The returned struct should not be freed, it is maintained - * in a cache that will be purged when the layout process - * is complete. - */ -int -get_spec_for_name( - char *name, - device_spec_t **id) -{ - dlist_t *item; - int error = 0; - - item = dlist_find(_spec_cache, (void *)name, - compare_name_to_spec_cache_name); - - if (item == NULL) { - if ((error = create_device_spec(name, id)) == 0) { - - spec_cache_t *entry = (spec_cache_t *) - calloc(1, sizeof (spec_cache_t)); - - if (entry == NULL) { - destroy_device_spec(*id); - error = ENOMEM; - } else { - char *dup = strdup(name); - if (dup == NULL) { - free(entry); - destroy_device_spec(*id); - *id = NULL; - error = ENOMEM; - } else { - entry->name = dup; - entry->device_spec = *id; - } - - if (error == 0) { - dlist_t *item = dlist_new_item((void *)entry); - if (item == NULL) { - free(entry); - destroy_device_spec(*id); - *id = NULL; - error = ENOMEM; - } else { - _spec_cache = - dlist_append(item, _spec_cache, AT_HEAD); - } - } - } - } - } else { - *id = ((spec_cache_t *)item->obj)->device_spec; - } - - return (error); -} - -/* - * FUNCTION: spec_includes_device(device_spec_t *spec, - * device_spec_t *device) - * - * INPUT: spec - pointer to a device_spec struct - * device - pointer to a device_spec struct - * - * RETURNS: boolean_t - B_TRUE if the device is included in the spec - * B_FALSE otherwise - * - * PURPOSE: Function which determines if the input device matches the - * input spec. - * - * If both specs are of the same type, the appropriate - * comparison function is called. - * - * If the two specs are of different types, no comparison - * is done and B_FALSE is returned. - */ -boolean_t -spec_includes_device( - device_spec_t *spec, - device_spec_t *device) -{ - if ((spec->type == SPEC_TYPE_CTD) && (device->type == SPEC_TYPE_CTD)) { - return (ctd_spec_includes_device(spec, device)); - } else if ((spec->type == SPEC_TYPE_RAW) && - (device->type == SPEC_TYPE_RAW)) { - return (raw_spec_includes_device(spec, device)); - } - - return (B_FALSE); -} - -/* - * FUNCTION: ctd_spec_includes_device(device_spec_t *spec, - * device_spec_t *device) - * - * INPUT: spec - pointer to a device_spec struct - * device - pointer to a device_spec struct - * - * RETURNS: boolean_t - B_TRUE if the device is included in the spec - * B_FALSE otherwise - * - * PURPOSE: Function which determines if the input CTD device spec - * matches the input CTD spec. - * - * The device_spec_t structs contain component "ids" for - * both the specification and the device. - * - * The device must match each of the ids in the spec that - * are specified. - * - * spec devices matched - * -------------------------------------------------------- - * cX cX, cXtX, cXtXdX, cXtXdXsX, cXdX, cXdXsX - * cXtX cXtX, cXtXdX, cXtXdXsX - * cXtXdX cXtXdX, cXtXdXsX - * cXtXdXsX cXtXdXsX - * cXdX cXdX, cXdXsX - * cXdXsX cXdXsX - */ -static boolean_t -ctd_spec_includes_device( - device_spec_t *spec, - device_spec_t *device) -{ - boolean_t match = B_FALSE; - - if (spec->data.ctd->is_ide) { - - /* valid IDE names are cX, cXdX, cXdXsX, no target */ - - if ((spec->data.ctd->ctrl != ID_UNSPECIFIED) && - (spec->data.ctd->lun != ID_UNSPECIFIED) && - (spec->data.ctd->slice != ID_UNSPECIFIED)) { - - match = (spec->data.ctd->ctrl == device->data.ctd->ctrl) && - (spec->data.ctd->lun == device->data.ctd->lun) && - (spec->data.ctd->slice == device->data.ctd->slice); - - } else if ((spec->data.ctd->ctrl != ID_UNSPECIFIED) && - (spec->data.ctd->lun != ID_UNSPECIFIED)) { - - match = (spec->data.ctd->ctrl == device->data.ctd->ctrl) && - (spec->data.ctd->lun == device->data.ctd->lun); - - } else if (spec->data.ctd->ctrl != ID_UNSPECIFIED) { - - match = (spec->data.ctd->ctrl == device->data.ctd->ctrl); - - } - - } else { - - /* valid names are cX, cXtX, cXtXdX, cXtXdXsX */ - - if ((spec->data.ctd->ctrl != ID_UNSPECIFIED) && - (spec->data.ctd->target != ID_UNSPECIFIED) && - (spec->data.ctd->lun != ID_UNSPECIFIED) && - (spec->data.ctd->slice != ID_UNSPECIFIED)) { - - match = (spec->data.ctd->ctrl == device->data.ctd->ctrl) && - (spec->data.ctd->target == device->data.ctd->target) && - (spec->data.ctd->lun == device->data.ctd->lun) && - (spec->data.ctd->slice == device->data.ctd->slice); - - } else if ((spec->data.ctd->ctrl != ID_UNSPECIFIED) && - (spec->data.ctd->target != ID_UNSPECIFIED) && - (spec->data.ctd->lun != ID_UNSPECIFIED)) { - - match = (spec->data.ctd->ctrl == device->data.ctd->ctrl) && - (spec->data.ctd->target == device->data.ctd->target) && - (spec->data.ctd->lun == device->data.ctd->lun); - - } else if ((spec->data.ctd->ctrl != ID_UNSPECIFIED) && - (spec->data.ctd->target != ID_UNSPECIFIED)) { - - match = (spec->data.ctd->ctrl == device->data.ctd->ctrl) && - (spec->data.ctd->target == device->data.ctd->target); - - } else if (spec->data.ctd->ctrl != ID_UNSPECIFIED) { - - match = (spec->data.ctd->ctrl == device->data.ctd->ctrl); - - } - } - - oprintf(OUTPUT_DEBUG, - gettext("spec: c(%d) t(%d) d(%d) s(%d) " - "%s: c(%d) t(%d) d(%d) s(%d)\n"), - spec->data.ctd->ctrl, spec->data.ctd->target, - spec->data.ctd->lun, spec->data.ctd->slice, - (match ? gettext("includes") : gettext("does not include")), - device->data.ctd->ctrl, device->data.ctd->target, - device->data.ctd->lun, device->data.ctd->slice); - - return (match); -} - -/* - * FUNCTION: raw_spec_includes_device(device_spec_t *spec, - * device_spec_t *device) - * - * INPUT: spec - pointer to a device_spec struct - * device - pointer to a device_spec struct - * - * RETURNS: boolean_t - B_TRUE if the device is included in the spec - * B_FALSE otherwise - * - * PURPOSE: Function which determines if the input raw device spec - * matches the input spec. - * - * The device_spec_t raw elements are checked. - * - * If the spec's raw device name is exactly contained at the - * beginning of the device spec's raw name, then the function - * evaluates to true. - */ -static boolean_t -raw_spec_includes_device( - device_spec_t *spec, - device_spec_t *device) -{ - return (strncasecmp(spec->data.raw, - device->data.raw, strlen(spec->data.raw)) == 0); -} - -/* - * FUNCTION: compare_name_to_spec_cache_name(void *name, void *list_item) - * - * INPUT: name - opaque pointer to a char * device name - * list_item - opaque pointer to a spec_cache_t entry - * - * RETURNS: int - 0 - if request is the same as list_item->request - * !0 - otherwise - * - * PURPOSE: dlist_t helper which compares the input device name - * to the list_item's device name for equality. - * - * This function is the lookup mechanism for the device_spec - * associated with the name. - */ -static int -compare_name_to_spec_cache_name( - void *name, - void *list_item) -{ - spec_cache_t *entry = (spec_cache_t *)list_item; - - assert(name != NULL); - assert(entry != NULL); - - return (string_case_compare((char *)name, entry->name)); -} - -/* - * FUNCTION: destroy_spec_cache_entry(void *entry) - * - * INPUT: entry - opaque pointer to a spec_cache_t - * - * RETURNS: nothing - * - * PURPOSE: Function which reclaims memory allocated to a - * spec_cache_t entry. - * - * Frees memory allocated to hold the CTD name and the - * corresponding device_spec_t. - */ -static void -destroy_spec_cache_entry( - void *obj) -{ - spec_cache_t *entry = (spec_cache_t *)obj; - - if (entry != NULL) { - free(entry->name); - destroy_device_spec(entry->device_spec); - free(entry); - } -} - -/* - * FUNCTION: destroy_spec_cache() - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Function which destroys all entries in the device_spec - * cache. - */ -static int -destroy_spec_cache() -{ - dlist_free_items(_spec_cache, destroy_spec_cache_entry); - _spec_cache = NULL; - - return (0); -} - -/* - * FUNCTION: get_device_access_name(devconfig_t *request, - * dm_descriptor_t desc, char **name) - * - * INPUT: request - a devconfig_t request - * desc - a dm_descriptor_t device handle - * - * OUTPUT: name - a char * pointer to hold the preferred name - * - * RETURNS: int - 0 - if request is the same as list_item->request - * !0 - otherwise - * - * PURPOSE: Utility function to determine which of the possible device - * names should be used to access a known available device. - * - * Devices handled are slices and disks. - * - * If the input device is a multipathed disk or slice, it - * can have several possible names. Determine which of the - * names should be used based on the input request's available - * or unavailable device specifications. - * - */ -int -get_device_access_name( - devconfig_t *request, - dm_descriptor_t desc, - char **name) -{ - int error = 0; - boolean_t avail = B_FALSE; - dlist_t *aliases = NULL; - - assert(desc != (dm_descriptor_t)0); - - *name = NULL; - - if ((error = get_display_name(desc, name)) != 0) { - return (error); - } - - if (is_did_name(*name) == B_TRUE) { - oprintf(OUTPUT_DEBUG, - gettext("device DID name %s is preferred\n"), - *name); - return (0); - } - - error = is_named_device_avail(request, *name, B_FALSE, &avail); - if (error != 0) { - return (error); - } - - if (avail == B_TRUE) { - oprintf(OUTPUT_DEBUG, - gettext("device name %s is accessible\n"), - *name); - return (0); - } - - /* search aliases for an 'available' name, prefer DID names */ - if ((error = get_aliases(desc, &aliases)) == 0) { - - dlist_t *iter = aliases; - char *availname = NULL; - char *didname = NULL; - - for (; (iter != NULL) && (error == 0); iter = iter->next) { - - char *alias = (char *)iter->obj; - error = is_named_device_avail(request, alias, B_FALSE, &avail); - - if ((error == 0) && (avail == B_TRUE)) { - oprintf(OUTPUT_DEBUG, - gettext("device alias %s is accessible for %s\n"), - alias, *name); - - availname = alias; - - if (is_did_name(availname) == B_TRUE) { - didname = alias; - break; - } - } - } - - if (error == 0) { - if (didname != NULL) { - *name = didname; - } else if (availname != NULL) { - *name = availname; - } - } - } - - return (error); -} diff --git a/usr/src/cmd/lvm/metassist/layout/layout_request.h b/usr/src/cmd/lvm/metassist/layout/layout_request.h deleted file mode 100644 index 0388e59752bc..000000000000 --- a/usr/src/cmd/lvm/metassist/layout/layout_request.h +++ /dev/null @@ -1,161 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _LAYOUT_REQUEST_H -#define _LAYOUT_REQUEST_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#ifdef __cplusplus -extern "C" { -#endif - -#include "libdiskmgt.h" - -#include "volume_dlist.h" -#include "volume_defaults.h" -#include "volume_devconfig.h" - -/* XXX these are really in layout.c */ -extern int string_case_compare(char *str1, char *str2); -extern int add_modified_disk(devconfig_t *request, dm_descriptor_t diskx); -extern int add_to_hsp_list(dlist_t *devices); - -extern int release_request_caches(); - -extern int set_request_diskset(char *disksset); -extern char *get_request_diskset(); -extern void unset_request_diskset(); - -extern int set_toplevel_request(devconfig_t *request); -extern void unset_toplevel_request(); - -extern int set_request_defaults(defaults_t *defaults); -extern void unset_request_defaults(); - -extern int get_device_access_name( - devconfig_t *request, - dm_descriptor_t desc, - char **name); - -/* - * get list of HBAs, disks or slices that are available - * to satisfy the given request - */ -extern int slice_is_available( - char *name, - devconfig_t *request, - boolean_t *bool); - -extern int disks_get_avail_slices( - devconfig_t *request, - dlist_t *disks, - dlist_t **slices); - -extern int select_hbas_with_n_disks( - devconfig_t *request, - dlist_t *hbas, - int mindisks, - dlist_t **selhbas, - dlist_t **seldisks); - -extern int hba_get_avail_disks_and_space( - devconfig_t *request, - dm_descriptor_t hba, - dlist_t **list, - uint64_t *space); - -/* - * get lists of HBAs and disks that are used by volumes - */ -extern int get_hbas_and_disks_used_by_volumes( - dlist_t *volumes, - dlist_t **hbas, - dlist_t **disks); - -extern int get_hbas_and_disks_used_by_volume( - devconfig_t *volume, - dlist_t **hbas, - dlist_t **disks); - -/* - * accessors to get user-settable device parameters, - * values come from either the request or the diskset - * or global defaults - */ -extern int get_stripe_min_comp( - devconfig_t *request, - uint16_t *val); - -extern int get_stripe_max_comp( - devconfig_t *request, - uint16_t *val); - -extern int get_stripe_interlace( - devconfig_t *request, - uint64_t *val); - -extern int get_mirror_read_strategy( - devconfig_t *request, - mirror_read_strategy_t *val); - -extern int get_mirror_write_strategy( - devconfig_t *request, - mirror_write_strategy_t *val); - -extern int get_mirror_pass( - devconfig_t *request, - uint16_t *val); - -extern int get_mirror_nsubs( - devconfig_t *request, - uint16_t *val); - -extern int get_volume_faultrecov( - devconfig_t *request, - boolean_t *val); - -extern int get_volume_redundancy_level( - devconfig_t *request, - uint16_t *val); - -extern int get_volume_npaths( - devconfig_t *request, - uint16_t *val); - -extern int get_default_hsp_name( - devconfig_t *req, - char **name); - -extern int get_disks_for_target( - char *name, - dlist_t **disks); - -#ifdef __cplusplus -} -#endif - -#endif /* _LAYOUT_REQUEST_H */ diff --git a/usr/src/cmd/lvm/metassist/layout/layout_slice.c b/usr/src/cmd/lvm/metassist/layout/layout_slice.c deleted file mode 100644 index ae36f967458d..000000000000 --- a/usr/src/cmd/lvm/metassist/layout/layout_slice.c +++ /dev/null @@ -1,2336 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include -#include - -#include "volume_string.h" - -#include "volume_devconfig.h" -#include "volume_error.h" -#include "volume_dlist.h" -#include "volume_output.h" - -#include "layout_device_cache.h" -#include "layout_device_util.h" -#include "layout_discovery.h" -#include "layout_dlist_util.h" -#include "layout_messages.h" -#include "layout_request.h" -#include "layout_slice.h" - -#define _LAYOUT_SLICE_C - -static int pick_from_best_hba_and_disk( - dlist_t *list, - dlist_t *used, - dm_descriptor_t *chosen); - -static int slice_has_same_disk_geom( - dm_descriptor_t slice, - dlist_t *used, - boolean_t *bool); - -static int slice_on_unique_disk( - dm_descriptor_t slice, - dlist_t *used, - dlist_t *othervols, - boolean_t *bool); - -static int slice_on_unique_hba( - dm_descriptor_t slice, - dlist_t *used, - dlist_t *othervols, - boolean_t *bool); - -static int slice_on_similar_bus( - dm_descriptor_t slice, - dlist_t *used, - boolean_t *bool); - -static int slice_has_n_paths( - dm_descriptor_t slice, - uint16_t npaths, - boolean_t *bool); - -static int compare_modslice_names( - void *obj1, - void *obj2); - -static int compare_string_to_modslice_name( - void *str, - void *modslice); - -static int create_new_slice( - dm_descriptor_t oslice, - uint64_t nbytes, - boolean_t add_extra_cyl, - devconfig_t **nslice); - -static int create_modified_slice( - dm_descriptor_t oslice, - char *oname, - uint32_t oindex, - uint64_t ostart, - uint64_t osize, - uint64_t bps, - char *nname, - uint32_t nindex, - uint64_t nsize, - devconfig_t **nslice); - -/* - * list to track resized slices - */ -static dlist_t *_modified_slices = NULL; - -/* - * struct to track used slices and their disks... - */ -typedef struct { - char *slicename; - dm_descriptor_t disk; -} usedslice_t; - -/* - * list to of usedslice_t to track slices that have been - * used for any reason. - */ -static dlist_t *_used_slices = NULL; - -static int add_used_slice_list_entry(char *slicename, dm_descriptor_t disk); -static int compare_usedslice_name_to_string(void *obj1, void *obj2); -static void free_used_slice(void *obj); - -/* - * list of slices reserved to be used for explicit - * volume requests - */ -static dlist_t *_rsvd_slices = NULL; - -/* - * list of slices needing to be removed (zeroed out) prior to - * applying any metassist modifications to the system. - */ -static dlist_t *_rmvd_slices = NULL; - -/* - * FUNCTION: choose_slice( - * uint64_t nbytes, - * uint16_t npaths, - * dlist_t *slices, - * dlist_t *used, - * dlist_t *used_hbas, - * dlist_t *used_disks, - * boolean_t unused_disk, - * boolean_t nbytes_is_min, - * boolean_t add_extra_cyl, - * devconfig_t **chosen) - * - * INPUT: nbytes - required size - * npaths - minimum required data paths - * *slices - slices from which to choose - * *used - slices used by the volume under construction - * *used_hbas - hbas used by other volumes relevant to - * the volume under construction - * *used_disks - disks used by other volumes relevant to - * the volume under construction - * unused_disk - if true, the chosen slice must be from an - * unused disk - * nbytes_is_min - if true, the chosen slice may be larger than - * nbytes. - * add_extra_cyl - passed to create_new_slice, see comment there. - * **chosen - pointer to hold the chosen slice - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Choosen a slice from the list of those available. - * - * Of those available, choose in order of preference: - * - * - one on a unique HBA and disk that is of the exact size - * - one on a unique HBA and disk that is of sufficient size - * - one on unique HBA that is of the exact size - * - one on unique HBA that is of sufficient size - * - one on unique disk that is of the exact size - * - one on unique disk that is of sufficient size - * - one on any HBA that is of exact size - * - one on any HBA that is of sufficient size - * - one on a unique HBA that is the largest size - * - one on a unique disk that is the largest size - * - one on any HBA that is the largest size - * - * The function scans the available slices and builds lists of - * those meeting the criteria above. After the scan is complete, - * the lists are examined in order, the first non-empty list is - * chosen. If there are several possibilities in the chosen list, - * see if it is possible select the slice from the least used HBA - * and/or disk. - * - * If nbytes_is_min is true, the returned slice will be - * at least nbytes in capacity. - * - * If unused_disk is true, the returned slice will be from - * a disk with no other known uses. - */ -int -choose_slice( - uint64_t nbytes, - uint16_t npaths, - dlist_t *slices, - dlist_t *used, - dlist_t *used_hbas, - dlist_t *used_disks, - boolean_t unused_disk, - boolean_t nbytes_is_min, - boolean_t add_extra_cyl, - devconfig_t **chosen) -{ - dlist_t *iter = NULL; - - dm_descriptor_t slice = NULL; - boolean_t resize = B_FALSE; - boolean_t verbose = (get_max_verbosity() == OUTPUT_VERBOSE); - - int error = 0; - - /* - * indexes into the list array: - * i -> unique controller 0 = yes, 1 = no - * j -> same bus type 0 = yes, 1 = no - * k -> unique disk 0 = yes, 1 = no - * l -> same disk geom 0 = yes, 1 = no - * m -> size 0 == exact, 1 = larger, 2 = any - */ - int i, j, k, l, m; - dlist_t *list[2][2][2][2][3]; - - /* output string arrays for each array dimension and index */ - char *uniqhba[2]; - char *samebus[2]; - char *uniqdisk[2]; - char *samegeom[2]; - char *sizes[3]; - - /* other output strings */ - char *look_msg = NULL; - char *npaths_msg = NULL; - char *samegeom_msg = NULL; - char *samebus_msg = NULL; - char *uniqhba_msg = NULL; - char *uniqdisk_msg = NULL; - char *exact_msg = NULL; - char *larger_msg = NULL; - char *smaller_msg = NULL; - char *insuff_paths = NULL; - char *too_small = NULL; - char *useddisk_msg = NULL; - - if (verbose == B_TRUE) { - /* only initialize the output strings if needed */ - - /* BEGIN CSTYLED */ - look_msg = gettext( - "\tlooking at slice: %s (%s)\n"); - npaths_msg = gettext( - "\t has the requested number of data paths (%d)\n"); - samegeom_msg = gettext( - "\t has the same disk geometry relative to used slices\n"); - samebus_msg = gettext( - "\t on a similar I/O bus/HBA relative to used slices\n"); - uniqhba_msg = gettext( - "\t on a unique HBA relative to used slices\n"); - uniqdisk_msg = gettext( - "\t on a unique disk relative to used slices\n"); - exact_msg = gettext( - "\t the exact size necessary\n"); - larger_msg = gettext( - "\t larger than necessary\n"); - smaller_msg = gettext( - "\t smaller than necessary\n"); - insuff_paths = gettext( - "\t rejected: not enough paths (%d requested)\n"); - too_small = gettext( - "\t rejected: too small\n"); - useddisk_msg = gettext( - "\t rejected: on a disk with other volume component(s)\n"); - - uniqhba[0] = gettext("unique HBA"); - uniqhba[1] = gettext("non unique HBA"); - samebus[0] = gettext("same bus type"); - samebus[1] = gettext("different bus type"); - uniqdisk[0] = gettext("unique disk"); - uniqdisk[1] = gettext("non unique disk"); - samegeom[0] = gettext("same geometry"); - samegeom[1] = gettext("different geometry"); - sizes[0] = gettext("an exact size slice"); - sizes[1] = gettext("a larger slice"); - sizes[2] = gettext("a smaller slice"); - - /* END CSTYLED */ - } - - /* init list array pointers */ - (void) memset(list, 0, 2*2*2*2*3 * sizeof (dlist_t *)); - - for (iter = slices; - (iter != NULL) && (error == 0); iter = iter->next) { - - dm_descriptor_t slice = (uintptr_t)iter->obj; - uint64_t snbytes = 0; - boolean_t uniqdisk = B_FALSE; - boolean_t uniqhba = B_FALSE; - boolean_t samegeom = B_FALSE; - boolean_t samebus = B_FALSE; - boolean_t paths = B_FALSE; - dlist_t *item = NULL; - - ((error = slice_get_size(slice, &snbytes)) != 0) || - (error = slice_has_n_paths(slice, npaths, &paths)) || - (error = slice_on_unique_hba(slice, used, used_hbas, &uniqhba)) || - (error = slice_on_unique_disk(slice, used, used_disks, - &uniqdisk)) || - (error = slice_on_similar_bus(slice, used, &samebus)) || - (error = slice_has_same_disk_geom(slice, used, &samegeom)); - if (error != 0) { - continue; - } - - if (verbose == B_TRUE) { - char *sname = NULL; - char *sizestr = NULL; - (void) get_display_name(slice, &sname); - if (bytes_to_sizestr(snbytes, &sizestr, - universal_units, B_FALSE) == 0) { - oprintf(OUTPUT_VERBOSE, look_msg, sname, sizestr); - free(sizestr); - } - } - - if (npaths > 1) { - if (paths && verbose) { - /* specifically asked for more paths, ... */ - oprintf(OUTPUT_VERBOSE, npaths_msg); - } - } else if (npaths == 1) { - /* every disk has at least 1 path */ - paths = B_TRUE; - } - - if (verbose == B_TRUE) { - if (uniqhba) { - oprintf(OUTPUT_VERBOSE, uniqhba_msg); - } - if (uniqdisk) { - oprintf(OUTPUT_VERBOSE, uniqdisk_msg); - } - - if (used != NULL) { - if (samebus) { - oprintf(OUTPUT_VERBOSE, samebus_msg); - } - if (samegeom) { - oprintf(OUTPUT_VERBOSE, samegeom_msg); - } - } - - if (snbytes > nbytes) { - oprintf(OUTPUT_VERBOSE, larger_msg); - } else if (snbytes == nbytes) { - oprintf(OUTPUT_VERBOSE, exact_msg); - } else { - oprintf(OUTPUT_VERBOSE, smaller_msg); - } - } - - /* filter slices not meeting minimum criteria */ - if (nbytes_is_min && (snbytes < nbytes)) { - /* not large enough */ - if (verbose == B_TRUE) { - oprintf(OUTPUT_VERBOSE, too_small); - } - continue; - } - - if (paths == B_FALSE) { - /* not connected thru enough paths */ - if (verbose == B_TRUE) { - oprintf(OUTPUT_VERBOSE, insuff_paths, npaths); - } - continue; - } - - if (uniqdisk != B_TRUE && unused_disk == TRUE) { - /* not on a unique disk */ - if (verbose == B_TRUE) { - oprintf(OUTPUT_VERBOSE, useddisk_msg); - } - continue; - } - - /* map slice properties into array indices */ - i = (uniqhba ? 0 : 1); - j = (samebus ? 0 : 1); - k = (uniqdisk ? 0 : 1); - l = (samegeom ? 0 : 1); - m = (snbytes == nbytes ? 0 : (snbytes > nbytes ? 1 : 2)); - - /* - * insert slice into the list array using derived indices. - * NB: lists of slices larger than necessary are kept in - * ascending order (results in best fit, not worst fit) - */ - if ((item = dlist_new_item((void*)(uintptr_t)slice)) == NULL) { - error = ENOMEM; - } else { - list[i][j][k][l][m] = - dlist_insert_ordered( - item, - list[i][j][k][l][m], - (m == 1 ? ASCENDING : DESCENDING), - compare_slice_sizes); - } - } - - /* - * Select a slice from one of the lists. - * - * The list with the combination of lowest indices - * is the most preferred list... in rough order: - * - * one on a unique HBA and disk that is of the exact size - * one on a unique HBA and disk that is of sufficient size (resize) - * one on unique HBA that is of the exact size - * one on unique HBA that is of sufficient size (resize) - * one on unique disk that is of the exact size - * one on unique disk that is of sufficient size (resize) - * one on any HBA that is of exact size - * one on any HBA that is of sufficient size (resize) - * one on a unique HBA that is the largest size - * one on a unique disk that is the largest size - * one on any HBA that is the largest size - */ - slice = NULL; - - for (i = 0; i < 2; i++) { - for (j = 0; j < 2; j++) { - for (k = 0; k < 2; k++) { - for (l = 0; l < 2; l++) { - for (m = 0; m < 3; m++) { - if (list[i][j][k][l][m] != NULL) { - - /* pick least used slice from this list */ - error = pick_from_best_hba_and_disk( - list[i][j][k][l][m], - used, &slice); - - resize = (m == 1); - - /* terminate all loops */ - goto stop; - } - } - } - } - } - } -stop: - - /* - * Slice chosen, is a resize necessary? - */ - if ((error == 0) && (slice != NULL)) { - - if (error == 0) { - if (verbose == B_TRUE) { - uint64_t snbytes = 0; - char *sname = NULL; - char *sizestr = NULL; - - (void) get_display_name(slice, &sname); - (void) slice_get_size(slice, &snbytes); - - if (bytes_to_sizestr(snbytes, &sizestr, - universal_units, B_FALSE) == 0) { - oprintf(OUTPUT_VERBOSE, - gettext(" selected %s (%s)\n" - " it is %s on a\n" - " %s (%s) and a\n" - " %s (%s)\n"), - sname, sizestr, - sizes[m], - uniqhba[i], samebus[j], - uniqdisk[k], samegeom[l]); - free(sizestr); - } - } - - if (resize) { - if (verbose == B_TRUE) { - oprintf(OUTPUT_VERBOSE, - gettext(" it has excess space, " - "resizing...\n")); - } - - error = create_new_slice(slice, nbytes, add_extra_cyl, - chosen); - if ((error == 0) && (*chosen != NULL) && verbose) { - oprintf(OUTPUT_VERBOSE, - gettext(" exactly resized\n")); - } - } - - if (error == 0) { - /* either no resize was necessary or the resize failed */ - if (*chosen == NULL) { - /* - * use the original slice as it is. - * Make a devconfig_t for it. - */ - error = create_devconfig_for_slice(slice, chosen); - } - } - } - } else if (slice == NULL) { - oprintf(OUTPUT_DEBUG, - gettext(" no possible slice\n")); - } - - for (i = 0; i < 2; i++) { - for (j = 0; j < 2; j++) { - for (k = 0; k < 2; k++) { - for (l = 0; l < 2; l++) { - for (m = 0; m < 3; m++) { - if (list[i][j][k][l][m] != NULL) { - dlist_free_items(list[i][j][k][l][m], NULL); - } - } - } - } - } - } - - return (error); -} - -/* - * FUNCTION: create_devconfig_for_slice(dm_descriptor_t slice, - * devconfig_t **nslice) - * - * INPUT: slice - dm_descriptor_t handle to an existing slice - * nslice - devconfig_t pointer to hold the new slice - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Creates a devconfig_t struct representation of the input - * slice dm_descriptor. - */ -int -create_devconfig_for_slice( - dm_descriptor_t slice, - devconfig_t **nslice) -{ - uint64_t nbytes = 0; - uint64_t nblks = 0; - uint64_t stblk = 0; - uint32_t index = 0; - char *name = NULL; - int error = 0; - - ((error = get_display_name(slice, &name)) != 0) || - (error = slice_get_size(slice, &nbytes)) || - (error = slice_get_size_in_blocks(slice, &nblks)) || - (error = slice_get_start_block(slice, &stblk)) || - (error = slice_get_index(slice, &index)); - if (error != 0) { - return (error); - } - - ((error = new_devconfig(nslice, TYPE_SLICE)) != 0) || - (error = devconfig_set_name(*nslice, name)) || - (error = devconfig_set_slice_index(*nslice, index)) || - (error = devconfig_set_slice_start_block(*nslice, stblk)) || - (error = devconfig_set_size_in_blocks(*nslice, nblks)) || - (error = devconfig_set_size(*nslice, nbytes)); - if (error != 0) { - free_devconfig(*nslice); - } - - return (error); -} - -/* - * FUNCTION: make_slicename_for_disk_and_index(dm_descriptor_t disk, - * uint32_t index, char **slicename) - * - * INPUT: disk - a dm_descriptor_t disk handle - * index - a slice index - * - * OUTPUT slicename - a char * pointer to hold the resulting slicename - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Utility function to manufacture a new slice name given the - * "parent" disk and an available slice index. - * - * The caller should free the returned name when done with it. - */ -static int -make_slicename_for_disk_and_index( - dm_descriptor_t disk, - uint16_t index, - char **slicename) -{ - char *dname; - int error = 0; - - if ((error = get_display_name(disk, &dname)) == 0) { - error = make_slicename_for_diskname_and_index(dname, - index, slicename); - } - - return (error); -} - -/* - * FUNCTION: make_slicename_for_diskname_and_index(char *diskname, - * uint32_t index, char **slicename) - * - * INPUT: diskname - a char * disk name - * index - a slice index - * - * OUTPUT slicename - a char * pointer to hold the resulting slicename - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Utility function to manufacture a new slice name given the - * name of a disk and an available slice index. - * - * The caller should free the returned name when done with it. - */ -int -make_slicename_for_diskname_and_index( - char *diskname, - uint16_t index, - char **slicename) -{ - int error = 0; - char buf[MAXNAMELEN+1]; - - (void) snprintf(buf, sizeof (buf), "%ss%u", diskname, index); - if ((*slicename = strdup(buf)) == NULL) { - *slicename = NULL; - error = ENOMEM; - } - - return (error); -} - -/* - * FUNCTION: create_new_slice(dm_descriptor_t oslice, uint64_t nbytes, - * boolean_t add_extra_cyl, devconfig_t **nslice) - * - * INPUT: oslice - dm_descriptor_t handle to an existing slice - * nbytes - desired minimum size of the new slice - * add_extra_cyl - boolean indicating whether the resized slice - * needs to be oversized by 1 cylinder to account for - * interlace rounding done for stripe components. - * nslice - devconfig_t pointer to hold the new slice - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Creates a new slice object using space from the input slice. - * - * If there is an open slice slot in the disk VTOC, it will be - * reserved for the new slice. Space for the new slice will be - * taken from the original slice. - * - * If there is no open slice slot, the original slice will be - * returned as the usable new slice. - * - * The new slice will be of at least 'nbytes' bytes and possibly - * larger due to sector and cylinder boundary alignment. - * - * For EFI labeled disks, nbytes is rounded up to the next block - * boundary. - * - * For VTOC labeled disks, nbytes is rounded up to the next - * cylinder boundary. - * - * Additionally, if add_extra_cyl is true, the new slice will be - * made 1 cylinder larger than necessary. This accounts for the - * interlace rounding done within libmeta when computing the - * usable size of stripe components on disks with VTOC labels. - * Rounding the size up to the next cylinder boundary is not - * sufficient because libmeta will round this size down to an - * integral multiple of the stripe interlace and then round that - * result down to a cylinder boundary. This makes the usable - * size of the slice one cylinder smaller and possibly less than - * nbytes. Adding an extra cylinder ensures the usable size is - * greater than nbytes despite the rounding. - * - * If the resize is successful a pointer to the devconfig_t - * representing the new slice will be returned in "newslice". - * - * If the resize cannot be done, the newslice pointer will - * be NULL. - */ -static int -create_new_slice( - dm_descriptor_t oslice, - uint64_t nbytes, - boolean_t add_extra_cyl, - devconfig_t **nslice) -{ - dm_descriptor_t odisk = NULL; - boolean_t efi = B_FALSE; - - char *oname = NULL; - uint64_t osize = 0; /* orig size (bytes) */ - uint64_t ostart = 0; /* orig start (byte) */ - uint64_t ostblk = 0; /* orig start (blk) */ - uint64_t nsize = 0; /* new size (bytes) */ - uint64_t bytes_per_sect = 0; - - uint32_t oindex = 0; - uint32_t nindex = oindex; - - int error = 0; - - *nslice = NULL; - - ((error = slice_get_disk(oslice, &odisk)) != 0) || - (error = slice_get_index(oslice, &oindex)); - if (error != 0) { - return (error); - } - - /* find an unused slice number, default to oindex */ - nindex = oindex; - if ((error = disk_get_available_slice_index(odisk, &nindex)) != 0) { - return (error); - } - - ((error = get_display_name(oslice, &oname)) != 0) || - (error = slice_get_size(oslice, &osize)) || - (error = slice_get_start(oslice, &ostart)) || - (error = slice_get_start_block(oslice, &ostblk)) || - (error = disk_get_is_efi(odisk, &efi)) || - (error = disk_get_blocksize(odisk, &bytes_per_sect)); - if (error != 0) { - return (error); - } - - if (efi) { - - /* EFI: round size to an integral number of blocks (sectors) */ - nsize = bytes_per_sect * - ((nbytes + (bytes_per_sect - 1)) / bytes_per_sect); - - oprintf(OUTPUT_DEBUG, - gettext(" " - "rounded up to %10.2f blocks\n"), - (double)(nsize/bytes_per_sect)); - - } else { - - /* VTOC: round size to an integral number of cylinders */ - uint64_t nhead = 0; - uint64_t nsect = 0; - uint64_t ncyls = 0; - - ((error = disk_get_ncylinders(odisk, &ncyls)) != 0) || - (error = disk_get_nheads(odisk, &nhead)) || - (error = disk_get_nsectors(odisk, &nsect)); - if (error == 0) { - uint64_t bytes_per_cyl = nhead * nsect * bytes_per_sect; - nsize = bytes_per_cyl * - ((nbytes + (bytes_per_cyl - 1)) / bytes_per_cyl); - - if (add_extra_cyl == TRUE) { - nsize += bytes_per_cyl; - } - - oprintf(OUTPUT_DEBUG, - gettext(" " - "rounded VTOC slice to %10.2f cylinders " - "(out of %llu)\n"), - (double)(nsize/bytes_per_cyl), ncyls); - } - } - - /* is sufficient space still available? */ - if (error == 0) { - if (osize == nsize) { - /* use existing slice as is */ - ((error = create_devconfig_for_slice(oslice, nslice)) != 0) || - (error = disk_reserve_index(odisk, (uint16_t)nindex)); - } else if (osize > nsize) { - - if (nindex == oindex) { - /* no more slices, resize existing slice */ - ((error = create_devconfig_for_slice(oslice, - nslice)) != 0) || - (error = devconfig_set_size(*nslice, nsize)) || - (error = devconfig_set_size_in_blocks(*nslice, - nsize/bytes_per_sect)); - (error = disk_reserve_index(odisk, (uint16_t)nindex)); - - } else { - /* make a new slice */ - char *nname = NULL; - - ((error = make_slicename_for_disk_and_index(odisk, - nindex, &nname)) != 0) || - (error = create_modified_slice(oslice, oname, oindex, - ostart, osize, bytes_per_sect, nname, nindex, nsize, - nslice)) || - /* mark the new slice's index as used */ - (error = disk_reserve_index(odisk, (uint16_t)nindex)); - - if ((error != 0) && (*nslice == NULL)) { - free(nname); - } - } - } - } - - return (error); -} - -/* - * FUNCTION: create_modified_slice(dm_descriptor_t oslice, char *oname, - * uint32_t oindex, uint64_t ostart, uint64_t osize, - * uint64_t bytes_per_sect, uint64_t nsize, - * char *nname, uint32_t nindex, devconfig_t **nslice) - * - * INPUT: oslice - dm_descriptor_t handle for the original slice - * oname - existing source slice name - * oindex - existing source slice VTOC index - * ostart - existing source slice start byte - * osize - existing source slice size in bytes - * bytes_per_sect - bytes per block (sector) for the disk - * nname - new slice name - * nindex - new slice VTOC index - * nsize - new slice size in bytes (cylinder and block aligned) - * - * SIDEEFFECTS: updates the module private list of modified slices - * - * OUTPUT: nslice - pointer to a devconfig_t to hold the new slice - * - * PURPOSE: create a new VTOC slice by taking space from an - * existing slice. - * - * The input size for the new slice is expected to be - * cylinder aligned. - */ -static int -create_modified_slice( - dm_descriptor_t oslice, - char *oname, - uint32_t oindex, - uint64_t ostart, - uint64_t osize, - uint64_t bytes_per_sect, - char *nname, - uint32_t nindex, - uint64_t nsize, - devconfig_t **nslice) -{ - int error = 0; - - /* compute start sector and size in sectors for the new slice */ - - /* subtract nsize from original slice to get starting byte */ - uint64_t nstart = (ostart + osize) - nsize; - - /* convert starting byte to a sector */ - uint64_t nstblk = (uint64_t)(nstart / bytes_per_sect); - - /* convert nsize to an integral number of blocks (sectors) */ - uint64_t nblks = (uint64_t)(nsize / bytes_per_sect); - - /* create a modified slice record for the new slice */ - error = assemble_modified_slice(oslice, nname, nindex, - nstblk, nblks, nsize, nslice); - if (error != 0) { - free(nname); - return (error); - } - - /* update the existing source slice's new size */ - osize = osize - nsize; - (void) slice_set_size(oslice, osize); - - /* update/create the modified slice record gfor the source slice */ - error = assemble_modified_slice((dm_descriptor_t)0, - oname, oindex, (uint64_t)(ostart / bytes_per_sect), - (uint64_t)(osize / bytes_per_sect), - osize, NULL); - - return (error); -} - -/* - * FUNCTION: assemble_modified_slice(dm_descriptor_t src_slice, - * char *mod_name, uint32_t mod_index, - * uint64_t mod_stblk, uint64_t mod_nblks, - * uint64_t mod_size, devconfig_t **modslice) - * - * INPUT: src_slice - dm_descriptor_t handle of the slice space - * was taken from to create the modified slice - * mod_name - name of the modified slice - * mod_index - name of the modified slice - * mod_stblk - start block of the modified slice - * mod_nblks - size in blocks of the modified slice - * mod_size - size in bytes of the modified slice - * - * OUTPUT: mod_slice - if non-NULL, will be populated with a - * devconfig_t representing the modified slice. - * - * SIDEEFFECTS: adds or updates an entry in the modified slice list - * tracking the slices that have been explicitly modified - * by the layout code. - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Utility function to which updates or creates a devconfig_t - * representing a slice that needs to be modified. - * - * If a modified slice record does not exist for the named - * slice, a new devconfig_t struct is allocated and added - * to the modified slice list. - * - * The existing or created devconfig_t struct is updated with - * the input values. - * - * The information about the slices in the modified slice list - * will eventually be handed to fmthard. - */ -int -assemble_modified_slice( - dm_descriptor_t src_slice, - char *mod_name, - uint32_t mod_index, - uint64_t mod_stblk, - uint64_t mod_nblks, - uint64_t mod_size, - devconfig_t **mod_slice) -{ - devconfig_t *slice = NULL; - modslice_t *mstp = NULL; - dlist_t *item = NULL; - int error = 0; - - /* see if the slice has been modified before */ - if ((item = dlist_find(_modified_slices, mod_name, - compare_string_to_modslice_name)) != NULL) { - - /* yes, update the resize count and attributes */ - mstp = (modslice_t *)item->obj; - slice = mstp->slice_devcfg; - - mstp->times_modified += 1; - mstp->src_slice_desc = src_slice; - - ((error = devconfig_set_slice_start_block(slice, - mod_stblk)) != 0) || - (error = devconfig_set_size(slice, mod_size)) || - (error = devconfig_set_size_in_blocks(slice, mod_nblks)); - - } else { - - /* no, first modification... */ - /* create a devconfig_t representing the new slice */ - ((error = new_devconfig(&slice, TYPE_SLICE)) != 0) || - (error = devconfig_set_name(slice, mod_name)) || - (error = devconfig_set_slice_index(slice, mod_index)) || - (error = devconfig_set_slice_start_block(slice, mod_stblk)) || - (error = devconfig_set_size_in_blocks(slice, mod_nblks)) || - (error = devconfig_set_size(slice, mod_size)); - if (error == 0) { - /* add to list of modified slices */ - if ((mstp = (modslice_t *) - calloc(1, sizeof (modslice_t))) != NULL) { - - /* count # of times source slice has been modified */ - if (src_slice != (dm_descriptor_t)0) { - mstp->times_modified = 0; - } else { - mstp->times_modified = 1; - } - mstp->src_slice_desc = src_slice; - mstp->slice_devcfg = slice; - - if ((item = dlist_new_item(mstp)) != NULL) { - _modified_slices = - dlist_insert_ordered( - item, - _modified_slices, - ASCENDING, - compare_modslice_names); - } else { - error = ENOMEM; - } - } else { - error = ENOMEM; - } - } - - if (error != 0) { - free_devconfig(mstp); - free_devconfig(slice); - } - } - - if (error == 0) { - oprintf(OUTPUT_DEBUG, - " " - "modified %s (start blk: %9llu, nblks: %9llu)\n", - mod_name, mod_stblk, mod_nblks); - - /* return devconfig_t for modified slice */ - if (mod_slice != NULL) { - *mod_slice = slice; - mstp->volume_component = B_TRUE; - } - } - - return (error); -} - -/* - * FUNCTION: dlist_t *get_modified_slices() - * - * RETURNS: pointer to the list of modslice_t structs representing - * modified slices - * - * PURPOSE: public accessor to the list of slices modified while - * processing a request. - */ -dlist_t * -get_modified_slices() -{ - return (_modified_slices); -} - -/* - * FUNCTION: free_modslice_object(void *obj) - * - * INPUT: obj - opaque pointer - * - * PURPOSE: Frees memory associated with a modslice_t struct. - */ -static void -free_modslice_object( - void *obj) -{ - assert(obj != (modslice_t *)NULL); - - if (((modslice_t *)obj)->slice_devcfg != NULL) { - if (((modslice_t *)obj)->volume_component != B_TRUE) { - free_devconfig(((modslice_t *)obj)->slice_devcfg); - } - } - - free(obj); -} - -/* - * FUNCTION: void release_modified_slices() - * - * INPUT: none - - * OUTPUT: none - - * - * PURPOSE: cleanup the module global list of slices modified - * while processing a request. - */ -int -release_modified_slices() -{ - dlist_free_items(_modified_slices, free_modslice_object); - _modified_slices = NULL; - - return (0); -} - -/* - * FUNCTION: destroy_new_slice(devconfig_t *dev) - * - * INPUT: dev - a devconfig_t pointer to a slice object - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Undoes slice creation done by create_new_slice(): - * - * release index - * remove from used_slices - * remove from modified_slices - * return space to source slice - * free memory - */ -int -destroy_new_slice( - devconfig_t *dev) -{ - dm_descriptor_t disk = NULL; - uint64_t size = 0; - uint16_t index = 0; - modslice_t *modified = NULL; - dlist_t *item = NULL; - char *name = NULL; - int error = 0; - - ((error = devconfig_get_name(dev, &name)) != 0) || - (error = devconfig_get_slice_index(dev, &index)) || - (error = devconfig_get_size(dev, &size)) || - (error = get_disk_for_named_slice(name, &disk)) || - (error = disk_release_index(disk, index)) || - (error = remove_used_slice_by_name(name)); - if (error != 0) { - return (error); - } - - /* remove from the modified_slices list */ - _modified_slices = - dlist_remove_equivalent_item( - _modified_slices, name, - compare_string_to_modslice_name, &item); - - if (item != NULL) { - modified = (modslice_t *)item->obj; - free((void*) item); - } - - /* space from an existing slice? if so reclaim it. */ - if (modified != NULL) { - - dm_descriptor_t src = modified->src_slice_desc; - char *srcname = NULL; - dlist_t *srcitem = NULL; - - if (src != (dm_descriptor_t)0) { - if ((error = get_display_name(src, &srcname)) == 0) { - srcitem = - dlist_find( - _modified_slices, - srcname, - compare_string_to_modslice_name); - } - } - - if ((error == 0) && (srcitem != NULL)) { - - modslice_t *source = (modslice_t *)srcitem->obj; - devconfig_t *srcdevcfg = NULL; - uint64_t srcsize = NULL; - uint64_t srcsizeblks = NULL; - uint64_t inblks = NULL; - - srcdevcfg = source->slice_devcfg; - source->times_modified -= 1; - - ((error = devconfig_get_size(srcdevcfg, &srcsize)) != 0) || - (error = devconfig_set_size(srcdevcfg, srcsize + size)) || - (error = slice_set_size(src, srcsize + size)) || - (error = slice_get_size_in_blocks(src, &srcsizeblks)) || - (error = devconfig_get_size_in_blocks(srcdevcfg, &inblks)); - (error = devconfig_set_size_in_blocks(srcdevcfg, srcsizeblks)); - - if (error == 0) { - - /* was only modification undone? */ - if (source->times_modified == 0) { - - _modified_slices = - dlist_remove_equivalent_item( - _modified_slices, srcname, - compare_string_to_modslice_name, - &srcitem); - - free_modslice_object((modslice_t *)srcitem->obj); - free((void *)srcitem); - } - } - } - - free_modslice_object(modified); - } - - return (error); -} - -/* - * FUNCTION: pick_from_best_hba_and_disk(dlist_t *slices, - * dlist_t *used, dm_descriptor_t *chosen) - * - * INPUT: slices - a dlist_t poitner to a list of slices - * used - a dlist_t pointer to a list of used slices - * chosen - a dm_descriptor_t pointer to hold the result - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Examines the input list of slices and chooses the one - * that is on the least used HBA and disk. - * - * HBA and disk usage is determined by examining the input - * list of used slices and counting the number of slices - * each HBA and disk contributes. - * - * The HBA which contributes the fewest is selected, and - * then the disk on that HBA which contributes the fewest - * is selected. - * - * The largest slice from that disk is then returned. - */ -static int -pick_from_best_hba_and_disk( - dlist_t *slices, - dlist_t *used, - dm_descriptor_t *chosen) -{ - dlist_t *iter = NULL; - dlist_t *iter1 = NULL; - dlist_t *iter2 = NULL; - dlist_t *item = NULL; - - dlist_t *used_slice_hbas = NULL; - - int maxuses = 128; - int maxslices = VTOC_SIZE; /* meta.h */ - - int i = 0; - int error = 0; - - /* - * allocate an array to hold lists of slices grouped by - * HBA contribution... the list indexed by N is the list - * of slices that are on HBAs contributing N slices - */ - dlist_t **prefhbas = (dlist_t **)calloc(maxuses, sizeof (dlist_t *)); - - /* - * allocate an array to hold lists of slices grouped by - * disk contribution... the list indexed by N is the list - * of slices that are on disks contributing N slices - */ - dlist_t **prefdisks = (dlist_t **)calloc(maxslices, sizeof (dlist_t *)); - - *chosen = (dm_descriptor_t)0; - - if (prefhbas == NULL || prefdisks == NULL) { - free(prefhbas); - free(prefdisks); - return (ENOMEM); - } - - /* - * precompute the used slices' lists of HBAS: iterate the list - * of used slices and determine the HBA(s) each is connected thru. - * construct a list of lists containing the HBAs. - */ - for (iter = used; - (iter != NULL) && (error == 0); - iter = iter->next) { - - devconfig_t *uslice = (devconfig_t *)iter->obj; - dm_descriptor_t udisk = NULL; - char *uname = NULL; - dlist_t *uhbas = NULL; - - /* need to use disk to get to HBAs because */ - /* the slice doesn't exist yet */ - ((error = devconfig_get_name(uslice, &uname)) != 0) || - (error = get_disk_for_named_slice(uname, &udisk)) || - (error = disk_get_hbas(udisk, &uhbas)); - if (error == 0) { - if ((item = dlist_new_item((void *)uhbas)) == NULL) { - error = ENOMEM; - } else { - used_slice_hbas = dlist_append( - item, used_slice_hbas, AT_HEAD); - } - } - } - - /* - * iterate the list of chosen slices and for each, - * determine how many other slices from its HBA(s) - * are already being used... - * - * iter steps thru the list of slices - * iter1 steps thru each of the slice's HBAs - * iter2 steps thru the precomputed list of used slice's HBAs - * dlist_contains then searches each used slice's HBAs - * to see if it contains iter1's HBA - * - * If it does, increment the count for that HBA. - */ - for (iter = slices; - (iter != NULL) && (error == 0); - iter = iter->next) { - - dm_descriptor_t slice = (uintptr_t)iter->obj; - dlist_t *hbas = NULL; - int n = 0; /* # slices each HBA contributes */ - - if ((error = slice_get_hbas(slice, &hbas)) != 0) { - continue; - } - - for (iter1 = hbas; iter1 != NULL; iter1 = iter1->next) { - for (iter2 = used_slice_hbas; iter2 != NULL; - iter2 = iter2->next) { - - dlist_t *uhbas = (dlist_t *)iter2->obj; - if (dlist_contains(uhbas, iter1->obj, - compare_descriptor_names) == B_TRUE) { - n++; - } - } - } - - dlist_free_items(hbas, NULL); - - /* group slices from HBAs contributing more than maxuses */ - if (n >= maxuses) { - n = maxuses - 1; - } - - /* add slice to list in descending size order */ - if ((item = dlist_new_item((void*)(uintptr_t)slice)) == NULL) { - error = ENOMEM; - } else { - prefhbas[n] = - dlist_insert_ordered( - item, - prefhbas[n], - DESCENDING, - compare_slice_sizes); - } - } - - /* free list of lists of used slices HBAs */ - for (iter = used_slice_hbas; iter != NULL; iter = iter->next) { - dlist_free_items((dlist_t *)iter->obj, NULL); - } - dlist_free_items(used_slice_hbas, NULL); - - /* - * Select the list of slices that are on the HBA(s) contributing - * the fewest slices... iterate these slices and for each, detemmine - * how many other slices from its disk are already being used... - */ - for (i = 0; (i < maxuses) && (error == 0); i++) { - - for (iter = (dlist_t *)prefhbas[i]; - (iter != NULL) && (error == 0); - iter = iter->next) { - - dm_descriptor_t slice = (uintptr_t)iter->obj; - dm_descriptor_t disk; - int n = 0; - - (void) slice_get_disk(slice, &disk); - - /* - * count how many slices this slice's disk is contributing - * by comparing it to the list of used slices - */ - for (iter1 = _used_slices; iter1 != NULL; iter1 = iter1->next) { - usedslice_t *used = (usedslice_t *)iter1->obj; - if (compare_descriptors((void *)(uintptr_t)disk, - (void *)(uintptr_t)used->disk) == 0) { - n++; - } - } - - /* add slice to list in descending size order */ - if ((item = dlist_new_item((void *)(uintptr_t)slice)) == NULL) { - error = ENOMEM; - } else { - prefdisks[n] = - dlist_insert_ordered( - item, - prefdisks[n], - DESCENDING, - compare_slice_sizes); - } - } - } - - if (error == 0) { - /* select largest slice from least used disk */ - for (i = 0; (i < maxslices) && (*chosen == NULL); i++) { - if (prefdisks[i] != NULL) { - *chosen = (uintptr_t)prefdisks[i]->obj; - } - } - } - - for (i = 0; i < maxuses; i++) { - dlist_free_items(prefhbas[i], NULL); - } - for (i = 0; i < maxslices; i++) { - dlist_free_items(prefdisks[i], NULL); - } - - free((void*)prefhbas); - free((void*)prefdisks); - - return (error); -} - -/* - * FUNCTION: slice_on_unique_hba(dm_descriptor_t slice, - * dlist_t *used, dlist_t *used_hbas, - * boolean_t *unique) - * - * INPUT: slice - a dm_descriptor_t handle for the slice of interest - * used - a dlist_t pointer to a list of used slices - * used_hbas - a dlist_t pointer to a list of used_hbas - * unique - a boolean_t pointer to hold the result - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Determines if the input slice is connected thru the same HBA - * as a slice in the used list. - * - * Also checks to see if the input slice is connected thru any - * HBA in the used_hbas list. - * - * If the slice is found to be on a unique HBA, bool is set - * to B_TRUE, B_FALSE otherwise. - */ -static int -slice_on_unique_hba( - dm_descriptor_t slice, - dlist_t *used, - dlist_t *used_hbas, - boolean_t *unique) -{ - dlist_t *iter = NULL; - dlist_t *iter1 = NULL; - - dlist_t *hbas = NULL; - - int error = 0; - - *unique = B_TRUE; - - if ((error = slice_get_hbas(slice, &hbas)) != 0) { - return (error); - } - - /* - * check to see if any of slice's HBAs is the same - * as the HBA for any of the used - */ - for (iter = used; - (iter != NULL) && (*unique == B_TRUE) && (error == 0); - iter = iter->next) { - - devconfig_t *dev = (devconfig_t *)iter->obj; - if (devconfig_isA(dev, TYPE_SLICE)) { - - dm_descriptor_t odisk = NULL; - char *oname = NULL; - dlist_t *ohbas = NULL; - - /* get HBAs for other slice using its disk */ - /* because the slice doesn't exist yet. */ - ((error = devconfig_get_name(dev, &oname)) != 0) || - (error = get_disk_for_named_slice(oname, &odisk)) || - (error = disk_get_hbas(odisk, &ohbas)); - - /* any HBA overlap? */ - for (iter1 = hbas; - (iter1 != NULL) && (*unique == B_TRUE) && (error == 0); - iter1 = iter1->next) { - - if (dlist_contains(ohbas, iter1->obj, - compare_descriptor_names) == B_TRUE) { - *unique = B_FALSE; - } - } - dlist_free_items(ohbas, NULL); - } - } - - /* - * check to see if any of slice's HBAs is the contained - * in the list of used hbas - */ - for (iter = hbas; - (iter != NULL) && (*unique == B_TRUE) && (error == 0); - iter = iter->next) { - if (dlist_contains(used_hbas, - iter->obj, compare_descriptor_names) == B_TRUE) { - *unique = B_FALSE; - } - } - - dlist_free_items(hbas, NULL); - - return (error); -} - -/* - * FUNCTION: slice_on_unique_disk(dm_descriptor_t slice, - * dlist_t *used, dlist_t *used_disks, - * boolean_t *unique) - * - * INPUT: slice - a dm_descriptor_t handle for the slice of interest - * used - a dlist_t pointer to a list of used slices - * othervols - a dlist_t pointer to a list of other volumes - * bool - a boolean_t pointer to hold the result - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Determines if the input slice is on a drive that is not - * part of any volume in the othervols list, or on the same - * drive as any slice in the used list. - * - * If the slice is found to be on a unique disk, bool is set - * to B_TRUE, B_FALSE otherwise. - */ -static int -slice_on_unique_disk( - dm_descriptor_t slice, - dlist_t *used, - dlist_t *used_disks, - boolean_t *unique) -{ - dm_descriptor_t disk = NULL; - dlist_t *iter = NULL; - int error = 0; - - *unique = B_TRUE; - - if ((error = slice_get_disk(slice, &disk)) != 0) { - return (error); - } - - /* - * check to see if this disk is the same as the - * disk for any of the used - */ - for (iter = used; - (iter != NULL) && (*unique == B_TRUE) && (error == 0); - iter = iter->next) { - - devconfig_t *dev = (devconfig_t *)iter->obj; - - if (devconfig_isA(dev, TYPE_SLICE)) { - - /* get disk for otherslice */ - dm_descriptor_t odisk = NULL; - char *oname = NULL; - - ((error = devconfig_get_name(dev, &oname)) != 0) || - (error = get_disk_for_named_slice(oname, &odisk)); - - if ((error == 0) && - (compare_descriptor_names((void*)(uintptr_t)disk, - (void*)(uintptr_t)odisk) == 0)) { - /* origslice is on same disk, stop */ - *unique = B_FALSE; - } - } - } - - /* check disk against the used disks */ - if ((error == 0) && (*unique == B_TRUE) && - dlist_contains(used_disks, (void *)(uintptr_t)disk, - compare_descriptor_names) == B_TRUE) { - *unique = B_FALSE; - } - - return (error); -} - -/* - * FUNCTION: slice_has_same_disk_geom(dm_descriptor_t slice, - * dlist_t *used, boolean_t *has_same_geom) - * - * INPUT: slice - a dm_descriptor_t handle for the slice of interest - * used - a dlist_t pointer to a list of used slices - * bool - a boolean_t pointer to hold the result - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Determines if the input slice is on a drive with similar - * hardware geometry as the slices in the used list. - * - * If the slice is found to be on a disk with similar geometry, - * bool is set to B_TRUE, B_FALSE otherwise. - * - * The comparison is based on the available disk geometry - * information which may not be relevant or accurate for - * EFI labeled disks, so the disk drive type needs to be - * checked as well. - */ -static int -slice_has_same_disk_geom( - dm_descriptor_t slice, - dlist_t *used, - boolean_t *has_same_geom) -{ - dm_descriptor_t disk = NULL; - boolean_t efi = B_FALSE; - uint64_t bsize = 0; - uint64_t ncyls = 0; - uint64_t nsects = 0; - uint64_t nheads = 0; - dlist_t *iter = NULL; - int error = 0; - - *has_same_geom = B_TRUE; - - ((error = slice_get_disk(slice, &disk)) != 0) || - (error = disk_get_is_efi(disk, &efi)) || - (error = disk_get_blocksize(disk, &bsize)); - - if ((error == 0) && (efi == B_FALSE)) { - ((error = disk_get_ncylinders(disk, &ncyls)) != 0) || - (error = disk_get_nheads(disk, &nheads)) || - (error = disk_get_nsectors(disk, &nsects)); - } - - if (error != 0) { - return (error); - } - - /* - * check to see if slice's disk has the same geometry - * as the disks for the slices in the used list - */ - for (iter = used; - (iter != NULL) && (*has_same_geom == B_TRUE) && (error = 0); - iter = iter->next) { - - devconfig_t *dev = (devconfig_t *)iter->obj; - - if (devconfig_isA(dev, TYPE_SLICE)) { - - /* get disk info for otherslice */ - dm_descriptor_t odisk = NULL; - char *oname = NULL; - boolean_t oefi = B_FALSE; - uint64_t obsize = 0; - uint64_t oncyls = 0; - uint64_t onsects = 0; - uint64_t onheads = 0; - - ((error = devconfig_get_name(dev, &oname)) != 0) || - (error = get_disk_for_named_slice(oname, &odisk)) || - (error = disk_get_is_efi(odisk, &oefi)) || - (error = disk_get_blocksize(odisk, &obsize)); - - if ((error == 0) && (oefi == B_FALSE)) { - ((error = disk_get_ncylinders(odisk, &oncyls)) != 0) || - (error = disk_get_nheads(odisk, &onheads)) || - (error = disk_get_nsectors(odisk, &onsects)); - } - - if (error == 0) { - if ((bsize != obsize) || (ncyls != oncyls) || - (nsects != onsects) || (nheads != onheads)) { - /* this disk has a different geometry */ - *has_same_geom = B_FALSE; - } - } - } - } - - return (error); -} - -/* - * FUNCTION: slice_on_similar_bus(dm_descriptor_t slice, - * dlist_t *used, boolean_t *on_smlr_bus) - * - * INPUT: slice - a dm_descriptor_t handle for the slice of interest - * used - a dlist_t pointer to a list of used slices - * bool - a boolean_t pointer to hold the result - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Determines if the input slice is connected thru a bus with - * characteristics similar to the slices in the used list. - * - * If the slice is found to be on a similar bus, bool is set - * to B_TRUE, B_FALSE otherwise. - * - * The comparison is actually between any of the HBA/controllers - * thru which the slices are connected to the system. - * If any are of similar type (e.g., fibre, SCSI) and - * protocol (SCSI-2, -3, fast/wide), then the slices are - * considered to be on similar busses. - */ -static int -slice_on_similar_bus( - dm_descriptor_t slice, - dlist_t *used, - boolean_t *on_smlr_bus) -{ - dlist_t *iter = NULL; - dlist_t *iter1 = NULL; - dlist_t *hbas = NULL; - int error = 0; - - /* if there are no used slices, then the bus is similar */ - *on_smlr_bus = B_TRUE; - if (dlist_length(used) == 0) { - return (0); - } - - (error = slice_get_hbas(slice, &hbas)); - if (error != 0) { - return (error); - } - - /* if there are used slices, then make sure the bus is similar */ - *on_smlr_bus = B_FALSE; - for (iter = hbas; - (iter != NULL) && (*on_smlr_bus == B_FALSE) && (error == 0); - iter = iter->next) { - - dm_descriptor_t hba = (uintptr_t)iter->obj; - char *type = NULL; - boolean_t fast80 = B_FALSE; - boolean_t fast40 = B_FALSE; - boolean_t fast20 = B_FALSE; - boolean_t wide = B_FALSE; - - ((error = hba_get_type(hba, &type)) != 0) || - (error = hba_is_fast_80(hba, &fast80)) || - (error = hba_is_fast_40(hba, &fast40)) || - (error = hba_is_fast_20(hba, &fast20)) || - (error = hba_supports_wide(hba, &wide)); - if (error != 0) { - continue; - } - - /* check against the HBAs for the used slices */ - for (iter1 = used; - (iter1 != NULL) && (*on_smlr_bus == B_FALSE) && (error == 0); - iter1 = iter1->next) { - - devconfig_t *used = (devconfig_t *)iter1->obj; - - /* get HBAs for otherslice */ - dm_descriptor_t udisk = NULL; - char *uname = NULL; - dlist_t *uhbas = NULL; - dlist_t *iter2 = NULL; - - ((error = devconfig_get_name(used, &uname)) != 0) || - (error = get_disk_for_named_slice(uname, &udisk)) || - (error = disk_get_hbas(udisk, &uhbas)); - - for (iter2 = uhbas; - (iter2 != NULL) && (*on_smlr_bus == B_FALSE) && - (error == 0); - iter2 = iter2 ->next) { - - dm_descriptor_t uhba = (uintptr_t)iter2->obj; - char *utype = NULL; - boolean_t ufast80 = B_FALSE; - boolean_t ufast40 = B_FALSE; - boolean_t ufast20 = B_FALSE; - boolean_t uwide = B_FALSE; - - ((error = hba_get_type(uhba, &utype)) != 0) || - (error = hba_is_fast_80(uhba, &ufast80)) || - (error = hba_is_fast_40(uhba, &ufast40)) || - (error = hba_is_fast_20(uhba, &ufast20)) || - (error = hba_supports_wide(uhba, &uwide)); - - if (error == 0) { - /* check sync speed ? */ - if ((fast80 == ufast80) && (fast40 == ufast40) && - (fast20 == ufast20) && (wide == uwide) && - (type == utype)) { - *on_smlr_bus = B_TRUE; - } - } - } - dlist_free_items(uhbas, NULL); - } - } - - dlist_free_items(hbas, NULL); - - return (error); -} - -/* - * FUNCTION: slice_has_n_paths(dm_descriptor_t slice, - * uint16_t npaths, boolean_t *has_n_paths) - * INPUT: slice - a dm_descriptor_t handle for the slice of interest - * npaths - the number of paths desired - * has_n_paths - a boolean_t pointer to hold the result - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Determines if the input slice is connected via npaths. - * has_n_paths is set to B_TRUE if so, B_FALSE otherwise. - * - * In order for a disk to have multiple paths, MPXIO must - * be enabled and these conditions should hold: - * - * Slice will have one drive object. - * Drive will have one HBA (scsi_vhci) - * Drive will have one alias. - * Drive will have possibly > 1 paths. - * - * Getting the HBAs and aliases for the disk is relatively - * expensive, so they aren't checked. The actual number of - * paths is only checked if MPXIO is known to be enabled on - * the system and the input npaths is > 1. - */ -static int -slice_has_n_paths( - dm_descriptor_t slice, - uint16_t npaths, - boolean_t *has_n_paths) -{ - int error = 0; - - *has_n_paths = B_FALSE; - - if ((npaths > 1) && (is_mpxio_enabled() == B_TRUE)) { - - dm_descriptor_t disk = NULL; - dlist_t *paths = NULL; - - ((error = slice_get_disk(slice, &disk)) != 0) || - (error = disk_get_paths(disk, &paths)); - - if ((error == 0) && (dlist_length(paths) == npaths)) { - *has_n_paths = B_TRUE; - } - dlist_free_items(paths, NULL); - } - - return (error); -} - -/* - * FUNCTION: compare_string_to_modslice_name(void *str, void *modslice) - * - * INPUT: str - opaque char * pointer - * modslice - opaque modslice_t pointer - * - * RETURNS: int - <0 - if str < modslice->slice_devcfg.name - * 0 - if str == modslice->slice_devcfg.name - * >0 - if str > modslice->slice_devcfg.name - * - * PURPOSE: dlist_t helper which compares the input string to - * the name of a slice represented as modslice_t struct. - * - * Comparison is done via string_case_compare. - */ -static int -compare_string_to_modslice_name( - void *str, - void *modslice) -{ - char *name = NULL; - - assert(str != NULL); - assert(modslice != NULL); - - (void) devconfig_get_name( - ((modslice_t *)modslice)->slice_devcfg, &name); - - return (string_case_compare((char *)str, name)); -} - -/* - * FUNCTION: compare_modslice_names(void *obj1, void *obj2) - * - * INPUT: obj1 - opaque pointer - * obj2 - opaque pointer - * - * RETURNS: int - <0 - if obj1 name < obj2 name - * 0 - if obj1 name == obj2 name - * >0 - if obj1 name > obj2 name - * - * PURPOSE: dlist_t helper which compares the names of two slices - * represented as modslice_t structs. - * - * Comparison is done by string_case_compare - */ -static int -compare_modslice_names( - void *obj1, - void *obj2) -{ - char *name1 = NULL; - char *name2 = NULL; - - assert(obj1 != NULL); - assert(obj2 != NULL); - - (void) devconfig_get_name( - ((modslice_t *)obj1)->slice_devcfg, &name1); - (void) devconfig_get_name( - ((modslice_t *)obj2)->slice_devcfg, &name2); - - return (string_case_compare(name1, name2)); -} - -/* - * FUNCTION: release_used_slices() - * - * PURPOSE: Helper which cleans up the module private list of used - * slices. - */ -void -release_used_slices() -{ - dlist_free_items(_used_slices, free_used_slice); - _used_slices = NULL; -} - -static void -free_used_slice( - void *obj) -{ - if (obj != NULL) { - usedslice_t *used = (usedslice_t *)obj; - free(used->slicename); - free(used); - } -} - -/* - * FUNCTION: is_used_slice(dm_descriptor_t slice, boolean_t *is_used) - * - * INPUT: slice - a dm_descriptor_t slice handle - * - * OUTPUT: is_reserved - pointer to a boolean_t to hold the - * return result. - * - * PURPOSE: Helper which checks to see if the input slice - * is in the used_slice list. - * - * Check the input name against any used slice name or alias. - * is_used is set to B_TRUE if the input slice is already used, - * B_FALSE otherwise. - */ -int -is_used_slice( - dm_descriptor_t slice, - boolean_t *is_used) -{ - char *name; - int error = 0; - - if ((error = get_display_name(slice, &name)) == 0) { - *is_used = dlist_contains(_used_slices, (void *)name, - compare_usedslice_name_to_string); - } - - return (error); -} - -/* - * FUNCTIONS: add_used_slice(dm_descriptor_t slice) - * add_used_slice_by_name(char *slicename) - * add_used_slice_list_entry(char *slice) - * remove_used_slice_by_name(char *slicename) - * - * INPUT: diskset - a char * diskset name. - * slice - a dm_descriptor_t slice handle - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Access or maintain the list of used slices. - */ -int -add_used_slice( - dm_descriptor_t slice) -{ - dm_descriptor_t disk; - char *name; - int error = 0; - - assert(slice != (dm_descriptor_t)0); - - ((error = get_display_name(slice, &name)) != 0) || - (error = slice_get_disk(slice, &disk)) || - (error = add_used_slice_list_entry(name, disk)); - - return (error); -} - -int -add_used_slice_by_name( - char *slicename) -{ - dm_descriptor_t disk = (dm_descriptor_t)0; - int error = 0; - - assert(slicename != NULL); - - /* find disk for slice */ - error = get_disk_for_named_slice(slicename, &disk); - if (error == 0) { - error = add_used_slice_list_entry(slicename, disk); - } - - return (error); -} - -static int -add_used_slice_list_entry( - char *slicename, - dm_descriptor_t disk) -{ - usedslice_t *used = NULL; - int error = 0; - - assert(slicename != NULL); - assert(disk != (dm_descriptor_t)0); - - used = (usedslice_t *)calloc(1, sizeof (usedslice_t)); - if (used == NULL) { - error = ENOMEM; - } else { - - used->disk = disk; - if ((used->slicename = strdup(slicename)) == NULL) { - free(used); - error = ENOMEM; - } else { - dlist_t *item = dlist_new_item((void *) used); - if (item == NULL) { - free(used->slicename); - free(used); - error = ENOMEM; - } else { - _used_slices = - dlist_append(item, _used_slices, AT_HEAD); - } - } - } - return (error); -} - -int -remove_used_slice_by_name( - char *slice) -{ - dlist_t *removed = NULL; - - _used_slices = - dlist_remove_equivalent_item(_used_slices, (void *)slice, - compare_usedslice_name_to_string, &removed); - - if (removed != NULL) { - free_used_slice(removed->obj); - removed->obj = NULL; - free(removed); - } - - return (0); -} - -/* - * FUNCTION: compare_usedslice_name_to_string(void *obj1, void *obj2) - * INPUT: obj1 - opaque pointer - * obj2 - opaque pointer - * - * RETURNS: int - <0 - if obj1 name < obj2 name - * 0 - if obj1 name == obj2 name - * >0 - if obj1 name > obj2 name - * - * PURPOSE: dlist_t helper which compares the names of a slice - * represented as modslice_t struct to a string. - * - * obj1 is assumed to be a char * - * obj2 is assumed to be a usedslice_t * - * - * Comparison is done via string_case_compare. - */ -static int -compare_usedslice_name_to_string( - void *obj1, - void *obj2) -{ - assert(obj1 != NULL); - assert(obj2 != NULL); - - return (string_case_compare((char *)obj1, - ((usedslice_t *)obj2)->slicename)); -} - -/* - * FUNCTION: disk_has_used_slice(dm_descriptor_t disk, boolean_t *hasused) - * - * INPUT: disk - a dm_descriptor_t disk handle. - * inuse - a boolean_t pointer to hold the result - * - * RETURNS: int - 0 on success - * !0 othersize. - * - * PURPOSE: Determines if any of the known used slices is on the - * input disk. - */ -int -disk_has_used_slice( - dm_descriptor_t disk, - boolean_t *hasused) -{ - dlist_t *iter; - int error = 0; - - *hasused = B_FALSE; - for (iter = _used_slices; - (iter != NULL) && (*hasused == B_FALSE); - iter = iter->next) { - - usedslice_t *used = (usedslice_t *)iter->obj; - - /* compare used slice's disk to disk */ - if (compare_descriptors((void *)(uintptr_t)disk, - (void *)(uintptr_t)used->disk) == 0) { - *hasused = B_TRUE; - } - } - - return (error); -} - -/* - * FUNCTION: add_reserved_slice(dm_descriptor_t slice) - * - * INPUT: slice - a dm_descriptor_t slice handle - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Helper which remembers specfically requested slices - * in a private list to ensure that the same slice isn't - * requested more than once. - * - * Does not check to see if the slice already exists - * in the list of reserved slices. Assumes that the - * caller has checked using is_reserved_slice(). - * - * The reserved slice list is used by several functions: - * - * 1. layout_validate.validate_slice_components() adds user - * requested slices to the list. - * - * 2. After all potentially usable slices have been scanned, - * layout_validate.validate_reserved_slices() checks the - * slices in the reserved and ensures that each slice is - * actually usable as a volume component. - * - * 3. layout.disk_get_avail_space(), layout.disk_get_avail_slices() - * exclude slices in the reserved list from being considered - * available for general layout use. - */ -int -add_reserved_slice( - dm_descriptor_t slice) -{ - dlist_t *item = NULL; - - if ((item = dlist_new_item((void *)(uintptr_t)slice)) == NULL) { - return (ENOMEM); - } - - _rsvd_slices = dlist_append(item, _rsvd_slices, AT_HEAD); - - return (0); -} - -/* - * FUNCTION: is_reserved_slice(dm_descriptor_t slice, - * boolean_t *is_reserved) - * - * INPUT: slice - a dm_descriptor_t slice handle - * - * OUTPUT: is_reserved - pointer to a boolean_t to hold the - * return result. - * - * PURPOSE: Helper which checks to see if the input slice - * was previously reserved. - * - * Check the input name against any reserved slice - * name or alias. is_reserved is set to B_TRUE if the - * input slice is already reserved, B_FALSE otherwise. - */ -int -is_reserved_slice( - dm_descriptor_t slice, - boolean_t *is_reserved) -{ - *is_reserved = dlist_contains(_rsvd_slices, - (void *)(uintptr_t)slice, compare_descriptor_names); - - return (0); -} - -/* - * FUNCTION: release_reserved_slice() - * - * PURPOSE: Helper which cleans up the module private list of reserved - * slices. - */ -void -release_reserved_slices() -{ - dlist_free_items(_rsvd_slices, free); - _rsvd_slices = NULL; -} - -/* - * FUNCTION: get_reserved_slices(dlist_t **list) - * - * OUTPUT: list - a dlist_t pointer to hold the returned list of - * reserverd slices. - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Accessor to retrieve the current list of reserved slice - * dm_descriptor_t handles. - */ -int -get_reserved_slices( - dlist_t **list) -{ - *list = _rsvd_slices; - - return (0); -} - -/* - * FUNCTION: add_slice_to_remove(char *name, uint32_t index) - * - * INPUT: name - name of a slice - * index - index for the slice - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Utility function to add the named slice to the list of - * those that need to be "removed" by having their sizes - * set to 0. - */ -int -add_slice_to_remove( - char *name, - uint32_t index) -{ - rmvdslice_t *rmvd = NULL; - int error = 0; - - assert(name != NULL); - - rmvd = (rmvdslice_t *)calloc(1, sizeof (rmvdslice_t)); - if (rmvd == NULL) { - error = ENOMEM; - } else { - rmvd->slice_index = index; - if ((rmvd->slice_name = strdup(name)) == NULL) { - free(rmvd); - error = ENOMEM; - } else { - dlist_t *item = dlist_new_item((void *) rmvd); - if (item == NULL) { - free(rmvd->slice_name); - free(rmvd); - error = ENOMEM; - } else { - _rmvd_slices = - dlist_append(item, _rmvd_slices, AT_HEAD); - } - } - } - return (error); -} - -/* - * FUNCTION: get_removed_slices() - * - * RETURNS: dlist_t * - pointer to a list of rmvdslice_t structs - * - * PURPOSE: Accessor to retrieve the current list of names of slices - * to be removed. - */ -dlist_t * -get_slices_to_remove( - dlist_t **list) -{ - return (_rmvd_slices); -} - -static void -free_rmvd_slice( - void *obj) -{ - if (obj != NULL) { - rmvdslice_t *rmvd = (rmvdslice_t *)obj; - free(rmvd->slice_name); - free(rmvd); - } -} - -/* - * FUNCTION: release_removed_slices() - * - * PURPOSE: Helper which cleans up the module private list of removed - * slices. - */ -void -release_slices_to_remove() -{ - dlist_free_items(_rmvd_slices, free_rmvd_slice); - _rmvd_slices = NULL; -} diff --git a/usr/src/cmd/lvm/metassist/layout/layout_slice.h b/usr/src/cmd/lvm/metassist/layout/layout_slice.h deleted file mode 100644 index 5a3febdf653e..000000000000 --- a/usr/src/cmd/lvm/metassist/layout/layout_slice.h +++ /dev/null @@ -1,135 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _LAYOUT_SLICE_H -#define _LAYOUT_SLICE_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#ifdef __cplusplus -extern "C" { -#endif - -#include "libdiskmgt.h" -#include "volume_devconfig.h" -#include "volume_dlist.h" - -/* - * struct to track which slices need to be explicitly "removed" from - * the system before applying any metassist updates/changes. - */ -typedef struct { - char *slice_name; - uint32_t slice_index; -} rmvdslice_t; - -extern void release_slices_to_remove(); -extern dlist_t *get_slices_to_remove(); -extern int add_slice_to_remove(char *name, uint32_t index); - -/* - * struct to track which slices have been explicitly modified - * during the layout process... - * - * src_slice_desc is the dm_descriptor_t of the slice which provided the - * space (this is only relevant to slices that have been created by - * taking space from some other "source" slice). - * slice_devconfig is the devconfig_t struct with the modified slice properties. - * times_modified is the number of times the slice has been modified - * (this is only relevant to slices that have been resized to - * provide space for new slices) - * volume_component is used to control when the slice_devcfg is freed. - * if volume_component is B_TRUE, the devconfig is returned as part - * of the result of layout and so cannot be freed by - * release_modified_slices. - */ -typedef struct { - dm_descriptor_t src_slice_desc; - devconfig_t *slice_devcfg; - int times_modified; - boolean_t volume_component; -} modslice_t; - -extern dlist_t *get_modified_slices(); -extern int release_modified_slices(); - -extern int make_slicename_for_diskname_and_index( - char *diskname, - uint16_t index, - char **slicename); - -extern int assemble_modified_slice( - dm_descriptor_t src_slice_desc, - char *mod_name, - uint32_t mod_index, - uint64_t mod_stblk, - uint64_t mod_nblks, - uint64_t mod_size, - devconfig_t **mod_slice); - -extern int choose_slice( - uint64_t nbytes, - uint16_t npaths, - dlist_t *slices, - dlist_t *used, - dlist_t *used_hbas, - dlist_t *used_disks, - boolean_t unused_disk, - boolean_t nbytes_is_min, - boolean_t add_extra_cyl, - devconfig_t **chosen); - -extern int create_devconfig_for_slice( - dm_descriptor_t slice, - devconfig_t **newslice); - -extern int destroy_new_slice( - devconfig_t *vol); - -/* - * accessors for the list of used slice names for named diskset. - */ -extern int is_used_slice(dm_descriptor_t slice, boolean_t *is_used); -extern int add_used_slice_by_name(char *slicename); -extern int remove_used_slice_by_name(char *slicename); -extern int add_used_slice(dm_descriptor_t slice); -extern void release_used_slices(); -extern int disk_has_used_slice(dm_descriptor_t disk, boolean_t *inuse); - -/* - * accessors to track slices reserved for use in explicit - * volume requests - */ -extern int add_reserved_slice(dm_descriptor_t slice); -extern int is_reserved_slice(dm_descriptor_t slice, boolean_t *is_rsvd); -extern int get_reserved_slices(dlist_t **list); -extern void release_reserved_slices(); - -#ifdef __cplusplus -} -#endif - -#endif /* _LAYOUT_SLICE_H */ diff --git a/usr/src/cmd/lvm/metassist/layout/layout_stripe.c b/usr/src/cmd/lvm/metassist/layout/layout_stripe.c deleted file mode 100644 index cfb4ed160e97..000000000000 --- a/usr/src/cmd/lvm/metassist/layout/layout_stripe.c +++ /dev/null @@ -1,1034 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include - -#include - -#include "volume_error.h" -#include "volume_devconfig.h" -#include "volume_dlist.h" -#include "volume_output.h" - -#include "layout_device_cache.h" -#include "layout_device_util.h" -#include "layout_discovery.h" -#include "layout_dlist_util.h" -#include "layout_messages.h" -#include "layout_request.h" -#include "layout_slice.h" -#include "layout_svm_util.h" - -#define _LAYOUT_STRIPE_C - -static int compose_stripe( - devconfig_t *request, - uint64_t nbytes, - dlist_t *disks, - int max, - int min, - dlist_t *othervols, - devconfig_t **stripe); - -static int compose_stripe_within_hba( - devconfig_t *request, - dlist_t *hbas, - uint64_t nbytes, - uint16_t min, - uint16_t max, - devconfig_t **stripe); - -static int assemble_stripe( - devconfig_t *request, - dlist_t *comps, - devconfig_t **stripe); - -static dlist_t * -order_stripe_components_alternate_hbas( - dlist_t *comps); - -static int compute_usable_stripe_capacity( - dlist_t *comps, - uint64_t ilace, - uint64_t *nbytes); - -/* - * FUNCTION: layout_stripe(devconfig_t *request, uint64_t nbytes, - * dlist_t **results) - * - * INPUT: request - pointer to a devconfig_t of the current request - * nbytes - the desired capacity of the stripe - * - * OUPUT: results - pointer to a list of composed volumes - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Main layout driver for composing stripe volumes. - * - * Attempts to construct a stripe of size nbytes. - * - * Basic goal of all strategies is to build wide-thin stripes: - * build widest stripe possible across as many HBAs as possible. - * - * Several different layout strategies are tried in order - * of preference until one succeeds or there are none left. - * - * 1 - stripe across similar HBAs - * . number of components is driven by # of HBAs - * . requires mincomp available HBAs - * - * 2 - stripe within a single HBA - * . number of components is driven by # of disks - * . requires at least 1 HBA with mincomp disks - * - * 3 - stripe across all available disks on similar HBAs - * . number of components is driven by # of disk - * . requires at least mincomp disks - * - * 4 - stripe across all available HBAs - * . number of components is driven by # of HBAs - * . requires at least mincomp HBAs - * - * 5 - stripe across all available disks on all HBAs - * . number of components is driven by # of disks - * . requires at least mincomp disks - * - * Each strategy tries to compose a stripe with the - * maximum number of components first then reduces the - * number of components down to mincomp. - * - * get allowed minimum number of stripe components - * get allowed maximum number of stripe components - * get available HBAs - * - * group HBAs by characteristics - * for (each HBA grouping) and (stripe not composed) { - * select next HBA group - * for (strategy[1,2,3]) and (stripe not composed) { - * compose stripe using HBAs in group - * } - * } - * - * if (stripe not composed) { - * for (strategy[4,5]) and (stripe not composed) { - * compose stripe using all HBAs - * } - * } - * - * if (stripe composed) { - * append composed stripe to results - * } - * - */ -int -layout_stripe( - devconfig_t *request, - uint64_t nbytes, - dlist_t **results) -{ - /* - * these enums define the # of strategies and the preference order - * in which they are tried - */ - typedef enum { - STRIPE_ACROSS_SIMILAR_HBAS_DISK_PER = 0, - STRIPE_WITHIN_SIMILAR_HBA, - STRIPE_ACROSS_SIMILAR_HBAS, - N_SIMILAR_HBA_STRATEGIES - } similar_hba_strategy_order_t; - - typedef enum { - STRIPE_ACROSS_ANY_HBAS_DISK_PER = 0, - STRIPE_ACROSS_ANY_HBAS, - N_ANY_HBA_STRATEGIES - } any_hba_strategy_order_t; - - - dlist_t *usable_hbas = NULL; - dlist_t *similar_hba_groups = NULL; - dlist_t *iter = NULL; - devconfig_t *stripe = NULL; - - uint16_t mincomp = 0; - uint16_t maxcomp = 0; - - int error = 0; - - (error = get_usable_hbas(&usable_hbas)); - if (error != 0) { - return (error); - } - - print_layout_volume_msg(devconfig_type_to_str(TYPE_STRIPE), nbytes); - - if (dlist_length(usable_hbas) == 0) { - print_no_hbas_msg(); - volume_set_error(gettext("There are no usable HBAs.")); - return (-1); - } - - ((error = group_similar_hbas(usable_hbas, &similar_hba_groups)) != 0) || - - /* - * determine the min/max number of stripe components - * based on the request, the diskset defaults or the - * global defaults. These are absolute limits, the - * actual values are determined by the number of HBAs - * and/or disks available. - */ - (error = get_stripe_min_comp(request, &mincomp)) || - (error = get_stripe_max_comp(request, &maxcomp)); - if (error != 0) { - return (error); - } - - for (iter = similar_hba_groups; - (error == 0) && (stripe == NULL) && (iter != NULL); - iter = iter->next) { - - dlist_t *hbas = (dlist_t *)iter->obj; - - similar_hba_strategy_order_t order; - - for (order = STRIPE_ACROSS_SIMILAR_HBAS_DISK_PER; - (order < N_SIMILAR_HBA_STRATEGIES) && - (stripe == NULL) && (error == 0); - order++) { - - dlist_t *selhbas = NULL; - dlist_t *disks = NULL; - int n = 0; - - switch (order) { - - case STRIPE_ACROSS_SIMILAR_HBAS_DISK_PER: - - error = select_hbas_with_n_disks( - request, hbas, 1, &selhbas, &disks); - - if (error == 0) { - -/* BEGIN CSTYLED */ -oprintf(OUTPUT_TERSE, -gettext(" -->Strategy 1: use 1 disk from %d-%d similar HBAs - stripe across HBAs\n"), - mincomp, maxcomp); -/* END CSTYLED */ - - if ((n = dlist_length(selhbas)) >= mincomp) { - n = ((n > maxcomp) ? maxcomp : n); - error = compose_stripe( - request, nbytes, disks, n, - mincomp, NULL, &stripe); - } else { - print_insufficient_hbas_msg(n); - } - } - - break; - - case STRIPE_WITHIN_SIMILAR_HBA: - - error = select_hbas_with_n_disks( - request, hbas, mincomp, &selhbas, &disks); - - if (error == 0) { - -/* BEGIN CSTYLED */ -oprintf(OUTPUT_TERSE, -gettext(" -->Strategy 2: use %d-%d disks from any single HBA - stripe within HBA\n"), - mincomp, maxcomp); -/* END CSTYLED */ - - if ((n = dlist_length(selhbas)) > 0) { - error = compose_stripe_within_hba( - request, selhbas, nbytes, - mincomp, maxcomp, &stripe); - } else { - print_insufficient_disks_msg(n); - } - } - - break; - - case STRIPE_ACROSS_SIMILAR_HBAS: - - error = select_hbas_with_n_disks( - request, hbas, 1, &selhbas, &disks); - - if (error == 0) { - -/* BEGIN CSTYLED */ -oprintf(OUTPUT_TERSE, -gettext(" -->Strategy 3: use %d-%d disks from %d similar HBAs - stripe across HBAs\n"), - mincomp, maxcomp, dlist_length(hbas)); -/* END CSTYLED */ - - if ((n = dlist_length(selhbas)) > 0) { - if ((n = dlist_length(disks)) >= mincomp) { - n = ((n > maxcomp) ? maxcomp : n); - error = compose_stripe( - request, nbytes, disks, n, - mincomp, NULL, &stripe); - } else { - print_insufficient_disks_msg(n); - } - } else { - print_insufficient_hbas_msg(n); - } - } - - break; - - default: - break; - } - - dlist_free_items(disks, NULL); - dlist_free_items(selhbas, NULL); - } - } - - for (iter = similar_hba_groups; iter != NULL; iter = iter->next) { - dlist_free_items((dlist_t *)iter->obj, NULL); - } - dlist_free_items(similar_hba_groups, NULL); - - /* - * if striping within similar HBA groups failed, - * try across all available HBAs - */ - if ((stripe == NULL) && (error == 0)) { - - any_hba_strategy_order_t order; - - for (order = STRIPE_ACROSS_ANY_HBAS_DISK_PER; - (order < N_ANY_HBA_STRATEGIES) && - (stripe == NULL) && (error == 0); - order++) { - - dlist_t *selhbas = NULL; - dlist_t *disks = NULL; - int n = 0; - - switch (order) { - - case STRIPE_ACROSS_ANY_HBAS_DISK_PER: - - error = select_hbas_with_n_disks( - request, usable_hbas, 1, &selhbas, &disks); - - if (error == 0) { - -/* BEGIN CSTYLED */ -oprintf(OUTPUT_TERSE, -gettext(" -->Strategy 4: use 1 disk from %d-%d available HBAs - stripe across any HBAs\n"), - mincomp, maxcomp); -/* END CSTYLED */ - - if ((n = dlist_length(selhbas)) >= mincomp) { - - n = ((n > maxcomp) ? maxcomp : n); - error = compose_stripe( - request, nbytes, disks, n, - mincomp, NULL, &stripe); - - } else { - print_insufficient_hbas_msg(n); - } - } - - break; - - case STRIPE_ACROSS_ANY_HBAS: - - error = select_hbas_with_n_disks( - request, usable_hbas, 1, &selhbas, &disks); - - if (error == 0) { - -/* BEGIN CSTYLED */ -oprintf(OUTPUT_TERSE, -gettext(" -->Strategy 5: use %d-%d disks from %d available HBA - stripe across any HBAs\n"), - mincomp, maxcomp, dlist_length(selhbas)); -/* END CSTYLED */ - - if ((n = dlist_length(disks)) >= mincomp) { - - n = ((n > maxcomp) ? maxcomp : n); - error = compose_stripe( - request, nbytes, disks, n, - mincomp, NULL, &stripe); - - } else { - print_insufficient_disks_msg(n); - } - } - - break; - } - - dlist_free_items(disks, NULL); - dlist_free_items(selhbas, NULL); - } - } - - if (stripe != NULL) { - - dlist_t *item = NULL; - if ((item = dlist_new_item(stripe)) == NULL) { - error = ENOMEM; - } else { - *results = dlist_append(item, *results, AT_TAIL); - print_layout_success_msg(); - } - - } else if (error != 0) { - - print_debug_failure_msg( - devconfig_type_to_str(TYPE_STRIPE), - get_error_string(error)); - - } else { - - print_insufficient_resources_msg( - devconfig_type_to_str(TYPE_STRIPE)); - error = -1; - } - - return (error); -} - -/* - * FUNCTION: populate_stripe(devconfig_t *request, uint64_t nbytes, - * dlist_t *disks, uint16_t ncomp, dlist_t *othervols, - * devconfig_t **stripe) - * - * INPUT: request - pointer to a request devconfig_t - * nbytes - desired stripe size - * disks - pointer to a list of availalb disks - * ncomp - number of components desired - * othervols - pointer to a list of other volumes whose - * composition may affect this stripe - * (e.g., submirrors of the same mirror) - * - * OUTPUT: stripe - pointer to a devconfig_t to hold resulting stripe - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Helper to populate a stripe with the specified number of - * components and aggregate capacity using slices on disks - * in the input list. - * - * If the othervols list is not empty, the slice components - * chosen for the stripe must not on the same disks as any - * of the other volumes. - * - * If sufficient slice components can be found, the stripe - * is assembled and returned. - */ -int -populate_stripe( - devconfig_t *request, - uint64_t nbytes, - dlist_t *disks, - uint16_t ncomp, - dlist_t *othervols, - devconfig_t **stripe) -{ - uint16_t npaths = 0; - uint16_t ncomps = 0; /* number of components found */ - uint64_t rsize = 0; /* reqd component size */ - - dlist_t *other_hbas = NULL; - dlist_t *other_disks = NULL; - - dlist_t *slices = NULL; - dlist_t *comps = NULL; - - int error = 0; - - *stripe = NULL; - - ((error = disks_get_avail_slices(request, disks, &slices)) != 0) || - (error = get_volume_npaths(request, &npaths)); - if (error != 0) { - return (error); - } - - print_populate_volume_ncomps_msg( - devconfig_type_to_str(TYPE_STRIPE), nbytes, ncomp); - - if (slices == NULL) { - print_populate_no_slices_msg(); - return (0); - } - - /* determine HBAs and disks used by othervols */ - error = get_hbas_and_disks_used_by_volumes(othervols, - &other_hbas, &other_disks); - if (error != 0) { - dlist_free_items(other_hbas, NULL); - dlist_free_items(other_disks, NULL); - return (error); - } - - print_populate_choose_slices_msg(); - - /* - * each stripe component needs to be this size. - * Note that the stripe interlace doesn't need to be - * taken into account in this computation because any - * slice selected as a stripe component will be oversized - * to account for interlace and cylinder rounding done - * by libmeta. - */ - rsize = nbytes / ncomp; - - /* - * need to select 'ncomp' slices that are at least 'rsize' - * large in order to reach the desired capacity. - */ - ncomps = 0; - while ((ncomps < ncomp) && (error == 0)) { - - devconfig_t *comp = NULL; - dlist_t *item = NULL; - dlist_t *rmvd = NULL; - char *cname = NULL; - - /* BEGIN CSTYLED */ - /* - * 1st B_TRUE: require a different disk than those used by - * comps and othervols - * 2nd B_TRUE: requested size is minimum acceptable - * 3rd B_TRUE: add an extra cylinder to the resulting slice, this is - * necessary for Stripe components whose sizes get rounded - * down to an interlace multiple and then down to a cylinder - * boundary. - */ - /* END CSTYLED */ - error = choose_slice(rsize, npaths, slices, comps, - other_hbas, other_disks, B_TRUE, B_TRUE, B_TRUE, &comp); - - if ((error == 0) && (comp != NULL)) { - - ++ncomps; - - item = dlist_new_item(comp); - if (item == NULL) { - error = ENOMEM; - } else { - - /* add selected component to comp list */ - comps = dlist_insert_ordered( - item, - comps, - ASCENDING, - compare_devconfig_sizes); - - /* remove it from the available list */ - slices = dlist_remove_equivalent_item(slices, (void *) comp, - compare_devconfig_and_descriptor_names, &rmvd); - - if (rmvd != NULL) { - free(rmvd); - } - - /* add the component slice to the used list */ - if ((error = devconfig_get_name(comp, &cname)) == 0) { - error = add_used_slice_by_name(cname); - } - } - } else if (comp == NULL) { - /* no possible slice */ - break; - } - } - - dlist_free_items(slices, NULL); - dlist_free_items(other_hbas, NULL); - dlist_free_items(other_disks, NULL); - - if (ncomps == ncomp) { - - if ((error = assemble_stripe(request, comps, stripe)) == 0) { - print_populate_success_msg(); - } else { - dlist_free_items(comps, free_devconfig_object); - } - - } else if (error == 0) { - - if (ncomps > 0) { - print_insufficient_components_msg(ncomps); - dlist_free_items(comps, free_devconfig_object); - } else { - print_populate_no_slices_msg(); - } - - } - return (error); -} - -/* - * FUNCTION: populate_explicit_stripe(devconfig_t *request, - * dlist_t **results) - * - * INPUT: request - pointer to a request devconfig_t - * - * OUTPUT: results - pointer to a list of volume devconfig_t results - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Processes the input stripe request that specifies explicit - * slice components. - * - * The components have already been validated and reserved, - * all that is required is to create devconfig_t structs - * for each requested slice. - * - * The net size of the stripe is determined by the slice - * components. - * - * The stripe devconfig_t is assembled and appended to the - * results list. - * - * This function is also called from - * layout_mirror.populate_explicit_mirror() - */ -int -populate_explicit_stripe( - devconfig_t *request, - dlist_t **results) -{ - devconfig_t *stripe = NULL; - int error = 0; - - dlist_t *comps = NULL; - dlist_t *iter = NULL; - dlist_t *item = NULL; - - print_layout_explicit_msg(devconfig_type_to_str(TYPE_STRIPE)); - - /* assemble components */ - iter = devconfig_get_components(request); - for (; (iter != NULL) && (error == 0); iter = iter->next) { - devconfig_t *rqst = (devconfig_t *)iter->obj; - dm_descriptor_t rqst_slice = NULL; - char *rqst_name = NULL; - devconfig_t *comp = NULL; - - /* slice components have been validated */ - /* turn each into a devconfig_t */ - ((error = devconfig_get_name(rqst, &rqst_name)) != 0) || - (error = slice_get_by_name(rqst_name, &rqst_slice)) || - (error = create_devconfig_for_slice(rqst_slice, &comp)); - - if (error == 0) { - - print_layout_explicit_added_msg(rqst_name); - - item = dlist_new_item((void *)comp); - if (item == NULL) { - error = ENOMEM; - } else { - comps = dlist_append(item, comps, AT_TAIL); - } - } - } - - if (error == 0) { - error = assemble_stripe(request, comps, &stripe); - } - - if (error == 0) { - if ((item = dlist_new_item(stripe)) == NULL) { - error = ENOMEM; - } else { - *results = dlist_append(item, *results, AT_TAIL); - print_populate_success_msg(); - } - } else { - dlist_free_items(comps, free_devconfig); - } - - return (error); -} - -/* - * FUNCTION: compose_stripe(devconfig_t *request, uint64_t nbytes, - * dlist_t *disks, uint16_t max, uint16_t min, - * dlist_t *othervols, devconfig_t **stripe) - * - * INPUT: request - pointer to a request devconfig_t - * nbytes - desired stripe size - * disks - pointer to a list of availalb disks - * max - maximum number of components allowed - * min - minimum number of components allowed - * othervols - pointer to a list of other volumes whose - * composition may affect this stripe - * (e.g., submirrors of the same mirror) - * - * OUTPUT: stripe - pointer to a devconfig_t to hold resulting stripe - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Attempt to compose a stripe of capacity nbytes, with - * component slices chosen from the input list of disks. - * The number of components in the stripe should be in the - * range min <= N <= max, more components are preferred. - * - * If a stripe can be composed, a pointer to it will be - * returned in the stripe devconfig_t. - * - * This is a loop wrapped around populate_stripe which - * varies the number of components between 'max' and 'min'. - */ -static int -compose_stripe( - devconfig_t *request, - uint64_t nbytes, - dlist_t *disks, - int max, - int min, - dlist_t *othervols, - devconfig_t **stripe) -{ - int error = 0; - - *stripe = NULL; - - for (; (error == 0) && (*stripe == NULL) && (max >= min); max--) { - error = populate_stripe( - request, nbytes, disks, max, othervols, stripe); - } - - return (error); -} - -/* - * FUNCTION: compose_stripe_within_hba(devconfig_t *request, - * dlist_t *hbas, uint64_t nbytes, - * int maxcomp, int mincomp, dlist_t **stripe) - * - * INPUT: request - pointer to a devconfig_t of the current request - * hbas - pointer to a list of available HBAs - * nbytes - the desired capacity for the stripe - * maxcomp - the maximum number of stripe components - * mincomp - the minimum number of stripe components - * - * OUTPUT: stripe - pointer to a stripe devconfig_t result - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Layout function which compose a stripe of the desired size - * using available disks within any single HBA from the input list. - * - * The number of components within the composed stripe will be - * in the range of min to max, preferring more components - * over fewer. - * - * All input HBAs are expected to have at least mincomp - * available disks and total space sufficient for the stripe. - * - * If the stripe can be composed, a pointer to it is returned in - * the stripe devconfig_t *. - * - * - * while (more hbas and stripe not composed) { - * select HBA - * if (not enough available space on this HBA) { - * continue; - * } - * get available disks for HBA - * use # disks as max # of stripe components - * try to compose stripe - * } - * - */ -static int -compose_stripe_within_hba( - devconfig_t *request, - dlist_t *hbas, - uint64_t nbytes, - uint16_t min, - uint16_t max, - devconfig_t **stripe) -{ - int error = 0; - - dlist_t *iter = NULL; - - *stripe = NULL; - - for (iter = hbas; - (iter != NULL) && (error == 0) && (*stripe == NULL); - iter = iter->next) { - - dm_descriptor_t hba = (uintptr_t)iter->obj; - dlist_t *disks = NULL; - uint64_t space = 0; - uint16_t ncomp = 0; - char *name; - - ((error = get_display_name(hba, &name)) != 0) || - (error = hba_get_avail_disks_and_space(request, - hba, &disks, &space)); - - if (error == 0) { - if (space >= nbytes) { - ncomp = dlist_length(disks); - ncomp = ((ncomp > max) ? max : ncomp); - error = compose_stripe( - request, nbytes, disks, ncomp, - min, NULL, stripe); - } else { - print_hba_insufficient_space_msg(name, space); - } - } - - dlist_free_items(disks, NULL); - } - - return (error); -} - -/* - * FUNCTION: assemble_stripe(devconfig_t *request, dlist_t *comps, - * devconfig_t **stripe) - * - * INPUT: request - pointer to a devconfig_t of the current request - * comps - pointer to a list of slice components - * - * OUPUT: stripe - pointer to a devconfig_t to hold final stripe - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Helper which creates and populates a stripe devconfig_t - * struct using information from the input request and the - * list of slice components. - * - * Determines the name of the stripe either from the request - * or from the default naming scheme. - * - * Sets the interlace for the stripe if a value is specified - * in the request. - * - * Attaches the input list of components to the devconfig. - */ -static int -assemble_stripe( - devconfig_t *request, - dlist_t *comps, - devconfig_t **stripe) -{ - uint64_t ilace = 0; - char *name = NULL; - int error = 0; - - if ((error = new_devconfig(stripe, TYPE_STRIPE)) == 0) { - /* set stripe name, use requested name if specified */ - if ((error = devconfig_get_name(request, &name)) != 0) { - if (error != ERR_ATTR_UNSET) { - volume_set_error(gettext("error getting requested name\n")); - } else { - error = 0; - } - } - - if (error == 0) { - if (name == NULL) { - if ((error = get_next_volume_name(&name, - TYPE_STRIPE)) == 0) { - error = devconfig_set_name(*stripe, name); - free(name); - } - } else { - error = devconfig_set_name(*stripe, name); - } - } - } - - if (error == 0) { - if ((error = get_stripe_interlace(request, &ilace)) == 0) { - error = devconfig_set_stripe_interlace(*stripe, ilace); - } else if (error == ENOENT) { - ilace = get_default_stripe_interlace(); - error = 0; - } - } - - if (error == 0) { - uint64_t nbytes = 0; - if ((error = compute_usable_stripe_capacity(comps, - ilace, &nbytes)) == 0) { - error = devconfig_set_size_in_blocks(*stripe, nbytes/DEV_BSIZE); - } - } - - if (error == 0) { - comps = order_stripe_components_alternate_hbas(comps); - devconfig_set_components(*stripe, comps); - } else { - free_devconfig(*stripe); - *stripe = NULL; - } - - return (error); -} - -/* - * Order the given stripe component list such that the number of - * slices on the same hba adjacent to each other in the list are - * minimized. - * - * @param comps - * the slice component list to order - * - * @return the first element of the resulting list - */ -static dlist_t * -order_stripe_components_alternate_hbas( - dlist_t *comps) -{ - dlist_t *iter; - - oprintf(OUTPUT_DEBUG, - gettext("Stripe components before ordering to alternate HBAs:\n")); - - for (iter = comps; iter != NULL; iter = iter->next) { - devconfig_t *slice = (devconfig_t *)(iter->obj); - char *name; - devconfig_get_name(slice, &name); - oprintf(OUTPUT_DEBUG, " %s\n", name); - } - - return (dlist_separate_similar_elements( - comps, compare_slices_on_same_hba)); -} - -/* - * FUNCTION: compute_usable_stripe_capacity(dlist_t *comps, uint64_t ilace, - * uint64_t *nbytes) - * - * INPUT: comps - pointer to a list of stripe components - * ilace - the expected stripe interlace in bytes - * - * OUPUT: nbytes - pointer to hold the computed capacity - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Helper which computes the usable size of a stripe taking - * into account the interlace and cylinder rounding that - * libmeta uses: a stripe component's size is rounded down to - * an integral multiple of the interlace and then rounded down - * to a cylinder boundary on VTOC labeled disks. - * - * (These libmeta computations are in the meta_stripe_attach() - * function of .../lib/lvm/libmeta/common/meta_stripe.c and - * meta_adjust_geom() in .../lib/lvm/libmeta/common/meta_init.c) - * - * This function's implementation iterates the input list of - * stripe component slices and determines the smallest usable - * component capacity. - * - * The usable stripe capacity is then that component capacity - * times the number of components. - */ -static int -compute_usable_stripe_capacity( - dlist_t *comps, - uint64_t ilace, - uint64_t *nbytes) -{ - uint64_t bytes_per_component = 0; - dlist_t *iter; - int ncomps = 0; - int error = 0; - - for (iter = comps; (iter != NULL) && (error == 0); iter = iter->next) { - - devconfig_t *comp = (devconfig_t *)iter->obj; - char *comp_name = NULL; - uint64_t comp_nbytes = 0; - dm_descriptor_t comp_disk; - boolean_t comp_disk_efi = B_FALSE; - uint64_t comp_disk_bps = 0; /* disk bytes per sector */ - - ((error = devconfig_get_size(comp, &comp_nbytes)) != 0) || - (error = devconfig_get_name(comp, &comp_name)) || - (error = get_disk_for_named_slice(comp_name, &comp_disk)) || - (error = disk_get_blocksize(comp_disk, &comp_disk_bps)) || - (error = disk_get_is_efi(comp_disk, &comp_disk_efi)); - if (error == 0) { - - if (comp_disk_efi == B_FALSE) { - uint64_t nhead = 0; - uint64_t nsect = 0; - uint64_t ncyls = 0; - - /* do cylinder and interlace rounding for non-EFI disks */ - ((error = disk_get_ncylinders(comp_disk, &ncyls)) != 0) || - (error = disk_get_nheads(comp_disk, &nhead)) || - (error = disk_get_nsectors(comp_disk, &nsect)); - if (error == 0) { - /* compute bytes per cyl */ - uint64_t bpc = nhead * nsect * comp_disk_bps; - - /* round nbytes down to a multiple of interlace */ - comp_nbytes = (comp_nbytes / ilace) * ilace; - - /* round nbytes down to a cylinder boundary */ - comp_nbytes = (comp_nbytes / bpc) * bpc; - } - } - - /* save smallest component size */ - if ((bytes_per_component == 0) || - (comp_nbytes < bytes_per_component)) { - bytes_per_component = comp_nbytes; - } - - ++ncomps; - } - } - - if (error == 0) { - /* size of stripe = smallest component size * n components */ - *nbytes = (bytes_per_component * ncomps); - } - - return (error); -} diff --git a/usr/src/cmd/lvm/metassist/layout/layout_stripe.h b/usr/src/cmd/lvm/metassist/layout/layout_stripe.h deleted file mode 100644 index b41c9c3ecf6b..000000000000 --- a/usr/src/cmd/lvm/metassist/layout/layout_stripe.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _LAYOUT_STRIPE_H -#define _LAYOUT_STRIPE_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#ifdef __cplusplus -extern "C" { -#endif - -#include "volume_devconfig.h" -#include "volume_dlist.h" - -extern int layout_stripe( - devconfig_t *request, - uint64_t nbytes, - dlist_t **results); - -extern int populate_stripe( - devconfig_t *request, - uint64_t nbytes, - dlist_t *disks, - uint16_t ncomps, - dlist_t *othervols, - devconfig_t **stripe); - -extern int populate_explicit_stripe( - devconfig_t *request, - dlist_t **results); - -#ifdef __cplusplus -} -#endif - -#endif /* _LAYOUT_STRIPE_H */ diff --git a/usr/src/cmd/lvm/metassist/layout/layout_svm_util.c b/usr/src/cmd/lvm/metassist/layout/layout_svm_util.c deleted file mode 100644 index d4fcaa74eedc..000000000000 --- a/usr/src/cmd/lvm/metassist/layout/layout_svm_util.c +++ /dev/null @@ -1,2091 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "volume_dlist.h" -#include "volume_error.h" -#include "volume_output.h" - -#include "layout_device_util.h" -#include "layout_discovery.h" -#include "layout_dlist_util.h" -#include "layout_request.h" -#include "layout_svm_util.h" - -static int _max_hsps = 1000; /* # of HSPs (arbitrary limit) */ -static int _max_devs = 8192; /* # of SVM volumes allowed */ -static int _max_devs_cfg = 128; /* # of SVM volumes configured */ -static int _max_sets = 4; /* # of SVM disk sets */ - -/* volume name prefixes for generating new names */ -static const char *_hsp_prefix = "hsp"; -static const char *_dev_prefix = "d"; - -/* - * dynamically allocated arrays to track used HSP (hspXXX) and volume - * names (dXXX) by number - */ -static boolean_t *hsps_by_number = NULL; -static boolean_t *devs_by_number = NULL; - -/* - * This struct remembers a diskset and the names of - * the disks in the set - */ -typedef struct { - char *name; - dlist_t *disknames; - dlist_t *hsps; -} diskset_t; - -/* - * list of diskset_t for known disksets - */ -static dlist_t *_disksets = NULL; - -static int add_diskset( - char *diskset); - -static int add_diskset_diskname( - char *diskset, - char *diskname); - -static int add_diskset_hsp( - char *diskset, - char *hspname); - -static int add_diskset_hsp_spare( - char *diskset, - char *hspname, - char *spare); - -static int is_disk_in_local_diskset( - dm_descriptor_t disk, - boolean_t *bool); - -static int is_disk_in_named_diskset( - dm_descriptor_t disk, - char *dsname, - boolean_t *bool); - -/* SVM snapshot stuff */ -typedef enum { - SVM_DISKSET = 0, - SVM_MDB, - SVM_STRIPE, - SVM_MIRROR, - SVM_RAID, - SVM_TRANS, - SVM_SP, - SVM_HSP, - SVM_HS, - SVM_DRIVE -} svm_type_t; - -typedef struct svm_snap_entry { - struct svm_snap_entry *next; - char *diskset; - svm_type_t type; - char *name; - char *slice; -} svm_snap_t; - -static svm_snap_t *svm_snapshot(int *errp); -static void free_svm_snapshot(svm_snap_t *listp); - -static char *type_name(svm_type_t type); -static int add_record( - svm_snap_t **listp, - char *setname, - svm_type_t type, - char *mname, - char *slice_name); -static int diskset_info(svm_snap_t **listp, mdsetname_t *sp); -static void free_names(mdnamelist_t *nlp); -static int load_svm(svm_snap_t **listp); -static int new_entry( - svm_snap_t **listp, - char *sname, - svm_type_t type, - char *mname, - mdsetname_t *sp); - -/* - * FUNCTION: scan_svm_names(char *diskset) - * - * INPUT: diskset - a char * disk set name - * - * PURPOSE: Take a snapshot of the current SVM config. - * - * Scan it and remember: - * 1. all known disk sets - * s. the disks in the named disk set - * 3. the used device and HSP names in the named disk set - * 4. the HSPs in the disk set - * 5. the spares in the HSPs - */ -int -scan_svm_names( - char *diskset) -{ - int ndisks = 0; - int nhsps = 0; - int ndevices = 0; - int nsets = 0; - - int number = 0; - int error = 0; - svm_snap_t *headp = NULL; - svm_snap_t *listp = NULL; - char *tablefmt = " %-20s %-10s %-20s %-10s\n"; - - oprintf(OUTPUT_TERSE, - gettext("\nScanning system SVM configuration...\n")); - - headp = svm_snapshot(&error); - if (error != 0) { - oprintf(OUTPUT_TERSE, - gettext("failed to scan SVM devices\n")); - return (error); - } - - if (error == 0) { - if ((error = get_max_number_of_devices(&_max_devs_cfg)) == 0) { - oprintf(OUTPUT_VERBOSE, - gettext(" configured maximum number of " - "volumes: %d\n"), - _max_devs_cfg); - } - } - - if (error == 0) { - if ((error = get_max_number_of_disksets(&_max_sets)) == 0) { - oprintf(OUTPUT_VERBOSE, - gettext(" configured maximum number of " - "disk sets: %d\n"), - _max_sets); - } - } - - if (error == 0) { - /* array is realloc'ed as necessary */ - if ((hsps_by_number = - (boolean_t *)calloc(_max_hsps, sizeof (boolean_t))) == NULL) { - oprintf(OUTPUT_TERSE, - gettext("failed to allocate HSP name array\n")); - error = ENOMEM; - } - } - - if (error == 0) { - /* array is realloc'ed as necessary */ - if ((devs_by_number = - (boolean_t *)calloc(_max_devs, sizeof (boolean_t))) == NULL) { - oprintf(OUTPUT_TERSE, - gettext("failed to allocate volume name array\n")); - error = ENOMEM; - } - } - - if ((error == 0) && (get_max_verbosity() >= OUTPUT_DEBUG)) { - (void) oprintf(OUTPUT_DEBUG, "\n"); - (void) oprintf(OUTPUT_DEBUG, - tablefmt, - gettext("disk set"), - gettext("dev type"), - gettext("name"), - gettext("slice")); - (void) oprintf(OUTPUT_DEBUG, - " -----------------------------------" - "-----------------------------------\n"); - } - - for (listp = headp; listp != NULL && error == 0; listp = listp->next) { - - oprintf(OUTPUT_DEBUG, - tablefmt, - listp->diskset, - type_name(listp->type), - listp->name, - listp->slice); - - switch (listp->type) { - case SVM_DISKSET: - - error = add_diskset(listp->name); - ++nsets; - break; - - case SVM_DRIVE: - - error = add_diskset_diskname(listp->diskset, listp->name); - - /* is this drive in the requested diskset? */ - if (string_case_compare(diskset, listp->diskset) == 0) { - ++ndisks; - } - break; - - case SVM_MIRROR: - case SVM_RAID: - case SVM_TRANS: - case SVM_SP: - case SVM_STRIPE: - - /* is this SVM volume in the requested diskset? */ - if (string_case_compare(diskset, listp->diskset) == 0) { - - /* isolate device name from "poolname/dXXXX" */ - char *cp = strrchr(listp->name, '/'); - if (cp != NULL) { - ++cp; - } else { - cp = listp->name; - } - - /* BEGIN CSTYLED */ - /* - * names for requested devices and HSPs are remembered - * so that the default name generation scheme knows - * which names are already being used - */ - /* END CSTYLED */ - /* extract device number from name "dXXXX" */ - if (sscanf(cp, "d%d", &number) != EOF) { - oprintf(OUTPUT_DEBUG, - gettext(" device: %6s number: %3d\n"), - cp, number); - - if (number > _max_devs) { - /* hit current limit, expand it */ - boolean_t *tmp = - (boolean_t *)realloc((void *)_max_devs, - (number * sizeof (boolean_t))); - - if (tmp == NULL) { - error = ENOMEM; - } else { - _max_devs = number; - devs_by_number = tmp; - } - } - - if ((error == 0) && - (devs_by_number[number] == B_FALSE)) { - devs_by_number[number] = B_TRUE; - ++ndevices; - } - } - } - break; - - case SVM_HSP: - - /* is this HSP in the requested diskset? */ - if (string_case_compare(diskset, listp->diskset) == 0) { - - /* isolate HSP name from "poolname/hspXXX" */ - char *cp = strrchr(listp->name, '/'); - if (cp != NULL) { - ++cp; - } else { - cp = listp->name; - } - - /* extract pool number from name "hspXXX" */ - if (sscanf(cp, "hsp%03d", &number) != EOF) { - oprintf(OUTPUT_DEBUG, - gettext(" HSP: %6s number: %3d\n"), - cp, number); - - if (number > _max_hsps) { - /* hit our arbitrary limit, double it */ - boolean_t *tmp = - (boolean_t *)realloc((void *)hsps_by_number, - 2 * _max_hsps * sizeof (boolean_t)); - - if (tmp != NULL) { - _max_hsps *= 2; - hsps_by_number = tmp; - } else { - error = ENOMEM; - } - } - - if ((error == 0) && - (hsps_by_number[number] == B_FALSE)) { - hsps_by_number[number] = B_TRUE; - error = add_diskset_hsp(diskset, cp); - ++nhsps; - } - } - } - - break; - - case SVM_HS: - - /* is this hot spare in the requested disk set? */ - if (string_case_compare(diskset, listp->diskset) == 0) { - - /* isolate HSP name from "poolname/hspXXXX" */ - char *cp = strrchr(listp->name, '/'); - if (cp != NULL) { - ++cp; - } else { - cp = listp->name; - } - - error = add_diskset_hsp_spare(diskset, cp, listp->slice); - } - break; - - case SVM_MDB: - default: - break; - } - } - - free_svm_snapshot(headp); - - if (error == 0) { - /* available diskset? subtract 1 for the local set */ - if ((diskset_exists(diskset) != B_TRUE) && - (nsets >= _max_sets)) { - volume_set_error( - gettext("Disk set \"%s\" cannot be created, the " - "maximum number of disk sets (%d) already " - "exists.\n"), - diskset, _max_sets); - error = -1; - } - } - - if (error == 0) { - oprintf(OUTPUT_VERBOSE, - gettext("\n Disk set \"%s\" has:\n\n"), diskset); - oprintf(OUTPUT_VERBOSE, - gettext(" %d drives\n"), ndisks); - oprintf(OUTPUT_VERBOSE, - gettext(" %d volumes\n"), ndevices); - oprintf(OUTPUT_VERBOSE, - gettext(" %d HSPs\n"), nhsps); - } else { - free(hsps_by_number); - free(devs_by_number); - hsps_by_number = (boolean_t *)NULL; - devs_by_number = (boolean_t *)NULL; - } - - return (error); -} - -/* - * FUNCTION: release_svm_names() - * - * PURPOSE: Release snapshot of the current SVM config. - * - * Free memory allocated by scan_svm_names() - */ -void -release_svm_names() -{ - dlist_t *iter; - - for (iter = _disksets; iter != NULL; iter = iter->next) { - diskset_t *diskset = (diskset_t *)iter->obj; - dlist_free_items(diskset->disknames, free); - dlist_free_items(diskset->hsps, free_devconfig); - free(diskset->name); - } - dlist_free_items(_disksets, free); - _disksets = NULL; - - if (hsps_by_number != NULL) - free(hsps_by_number); - if (devs_by_number != NULL) - free(devs_by_number); - - hsps_by_number = (boolean_t *)NULL; - devs_by_number = (boolean_t *)NULL; -} - -/* - * FUNCTION: diskset_exists(char *diskset) - * - * INPUT: dsname - a char * diskset name - * - * RETURNS: boolean_t - B_TRUE if the named diskset exists - * B_FALSE otherwise - * - * PURPOSE: Checks the list of known disk sets and determines - * if the input name is in that list. - */ -boolean_t -diskset_exists( - char *dsname) -{ - dlist_t *iter; - - for (iter = _disksets; iter != NULL; iter = iter->next) { - diskset_t *diskset = (diskset_t *)iter->obj; - if (string_case_compare(dsname, diskset->name) == 0) { - return (B_TRUE); - } - } - - return (B_FALSE); -} - -/* - * FUNCTION: add_diskset(char *dsname) - * - * INPUT: dsname - a char * disk set name - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Add the named disk set to the list of known disk sets. - */ -static int -add_diskset( - char *dsname) -{ - dlist_t *iter; - int error = 0; - - for (iter = _disksets; iter != NULL; iter = iter->next) { - diskset_t *diskset = (diskset_t *)iter->obj; - if (string_case_compare(diskset->name, dsname) == 0) { - break; - } - } - - if (iter == NULL) { - - dlist_t *item = NULL; - diskset_t *diskset = (diskset_t *)calloc(1, sizeof (diskset_t)); - - if (diskset == NULL) { - error = ENOMEM; - } else { - diskset->hsps = NULL; - diskset->name = strdup(dsname); - if (diskset->name == NULL) { - free(diskset); - error = ENOMEM; - } else { - if ((item = dlist_new_item(diskset)) == NULL) { - free(diskset->name); - free(diskset); - error = ENOMEM; - } else { - _disksets = dlist_append(item, _disksets, AT_HEAD); - oprintf(OUTPUT_DEBUG, - gettext(" added disk set %s \n"), dsname); - } - } - } - } - - return (error); -} - -/* - * FUNCTION: add_diskset_diskname(char *diskset, char *diskname) - * - * INPUT: dsname - a char * disk set name - * diskname - a char * disk name - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Add the disk name to the named disk set's list of disks. - * - * The input diskname is fully qualified with the path - * to the raw disk device (/dev/rdsk/cXtXdXsX) which is - * not relevant, so it is removed. - */ -static int -add_diskset_diskname( - char *dsname, - char *diskname) -{ - dlist_t *iter; - int error = 0; - - for (iter = _disksets; iter != NULL; iter = iter->next) { - - diskset_t *diskset = (diskset_t *)iter->obj; - if (string_case_compare(diskset->name, dsname) == 0) { - - dlist_t *item = NULL; - char *name = NULL; - char *cp = NULL; - - /* trim leading path */ - if ((cp = strrchr(diskname, '/')) != 0) { - if ((name = strdup(cp+1)) == NULL) { - error = ENOMEM; - } - } else if ((name = strdup(diskname)) == NULL) { - error = ENOMEM; - } - - if ((item = dlist_new_item(name)) == NULL) { - free(name); - error = ENOMEM; - } else { - diskset->disknames = - dlist_append(item, diskset->disknames, AT_HEAD); - } - - break; - } - } - - if ((error == 0) && (iter == NULL)) { - /* new disk set */ - if ((error = add_diskset(dsname)) == 0) { - return (add_diskset_diskname(dsname, diskname)); - } - } - - return (error); -} - -/* - * FUNCTION: add_diskset_hsp(char *dsname, char *hspname) - * - * INPUT: dsname - a char * disk set name - * hspname - a char * HSP name - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Model a new HSP for the named disk set. - * - * Metassist can use existing HSPs to service new volumes. - * - * It is necessary to have a model of what HSPs currently - * exist for each disk set. - * - * This function takes information found during discovery - * and turns it into a form usable by the HSP layout code. - */ -static int -add_diskset_hsp( - char *dsname, - char *hspname) -{ - dlist_t *iter; - int error = 0; - - for (iter = _disksets; iter != NULL; iter = iter->next) { - - diskset_t *diskset = (diskset_t *)iter->obj; - - if (string_case_compare(diskset->name, dsname) == 0) { - - dlist_t *item = NULL; - devconfig_t *hsp = NULL; - - if (((error = new_devconfig(&hsp, TYPE_HSP)) != 0) || - (error = devconfig_set_name(hsp, hspname))) { - free_devconfig(hsp); - } else { - if ((item = dlist_new_item(hsp)) == NULL) { - free_devconfig(hsp); - error = ENOMEM; - } else { - diskset->hsps = - dlist_append(item, diskset->hsps, AT_TAIL); - - oprintf(OUTPUT_DEBUG, - gettext(" added %s to disk set %s\n"), - hspname, dsname); - } - } - break; - } - } - - if ((error == 0) && (iter == NULL)) { - if ((error = add_diskset(dsname)) == 0) { - return (add_diskset_hsp(dsname, hspname)); - } - } - - return (error); -} - -/* - * FUNCTION: add_diskset_hsp_spare(char *dsname, char *hspname, - * char *sparename) - * - * INPUT: dsname - a char * diskset name - * hspname - a char * HSP name - * sparename - a char * hot spare (slice) name - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Locate the named hot spare pool in the named disk set and - * add the named spare slice to its list of spares. - * - * Metassist can use existing HSPs to service new volumes. - * - * It is necessary to have a model of what HSPs currently - * exist for each disk set. - * - * This function takes information found during discovery - * and turns it into a form usable by the HSP layout code. - */ -static int -add_diskset_hsp_spare( - char *dsname, - char *hspname, - char *sparename) -{ - dlist_t *iter; - int error = 0; - - for (iter = _disksets; iter != NULL; iter = iter->next) { - - diskset_t *diskset = (diskset_t *)iter->obj; - - if (string_case_compare(diskset->name, dsname) == 0) { - - dlist_t *item = - dlist_find( - diskset->hsps, hspname, - compare_string_to_devconfig_name); - - if (item != NULL) { - - /* add spare to HSP */ - devconfig_t *hsp = (devconfig_t *)item->obj; - dm_descriptor_t slice = (dm_descriptor_t)0; - - (void) slice_get_by_name(sparename, &slice); - if (slice == (dm_descriptor_t)0) { - oprintf(OUTPUT_TERSE, - gettext("warning: ignoring nonexistent " - "slice %s defined in %s\n"), - sparename, hspname); - } else { - - uint64_t nbytes = 0; - uint32_t index = 0; - devconfig_t *spare = NULL; - - /* build a devconfig_t model of the slice */ - if (((error = slice_get_size(slice, &nbytes)) != 0) || - (error = slice_get_index(slice, &index)) || - (error = new_devconfig(&spare, TYPE_SLICE)) || - (error = devconfig_set_name(spare, sparename)) || - (error = devconfig_set_size(spare, nbytes)) || - (error = devconfig_set_slice_index(spare, index))) { - free_devconfig(spare); - } else { - - if ((item = dlist_new_item(spare)) == NULL) { - error = ENOMEM; - free_devconfig(spare); - } else { - dlist_t *spares; - spares = devconfig_get_components(hsp); - spares = dlist_append(item, spares, AT_TAIL); - devconfig_set_components(hsp, spares); - - oprintf(OUTPUT_DEBUG, - gettext(" added %s to %s in " - "disk set %s\n"), - sparename, hspname, dsname); - } - } - } - - break; - - } else { - if ((error = add_diskset_hsp(dsname, hspname)) == 0) { - return (add_diskset_hsp_spare( - dsname, hspname, sparename)); - } - } - } - } - - return (error); -} - -/* - * Return a list of disks in the given diskset. - * - * @param dsname - * The name of the named disk set, or "" for the local - * set. - * - * @param disks - * RETURN: pointer to the list of disks in the given disk - * set - * - * @return 0 if succesful, non-zero otherwise - */ -int -get_disks_in_diskset( - char *dsname, - dlist_t **disks) -{ - dlist_t *known_disks; - int error = 0; - - *disks = NULL; - - if ((error = get_known_disks(&known_disks)) == 0) { - dlist_t *iter; - - /* For each known disk... */ - for (iter = known_disks; - iter != NULL && error == 0; - iter = iter->next) { - dm_descriptor_t disk = (uintptr_t)iter->obj; - boolean_t in_diskset = B_FALSE; - - /* If this disk is in the given set... */ - error = is_disk_in_diskset(disk, dsname, &in_diskset); - if (error == 0 && in_diskset == B_TRUE) { - dlist_t *item = dlist_new_item((void *)(uintptr_t)disk); - *disks = dlist_append(item, *disks, AT_TAIL); - } - } - } - - return (error); -} - -/* - * FUNCTION: is_disk_in_diskset(dm_descriptor_t disk, char *dsname, - * boolean_t *bool) - * - * INPUT: disk - dm_descriptor_t disk handle - * dsname - char * diskset name, or MD_LOCAL_NAME for - * the local set. - * - * OUTPUT: bool - pointer to a boolean_t to hold the result - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Determine if the input disk is known to be in the - * given diskset. - */ -int -is_disk_in_diskset( - dm_descriptor_t disk, - char *dsname, - boolean_t *bool) -{ - if (string_case_compare(dsname, MD_LOCAL_NAME) == 0) { - return (is_disk_in_local_diskset(disk, bool)); - } - - return (is_disk_in_named_diskset(disk, dsname, bool)); -} - -static int -is_disk_in_local_diskset( - dm_descriptor_t disk, - boolean_t *bool) -{ - dlist_t *iter; - dlist_t *aliases = NULL; - boolean_t in_named_diskset = B_FALSE; - char *name = NULL; - int error = 0; - - *bool = B_FALSE; - - error = get_display_name(disk, &name); - if (error == 0) { - - error = get_aliases(disk, &aliases); - if (error == 0) { - - /* For each known disk set... */ - for (iter = _disksets; - iter != NULL && in_named_diskset == B_FALSE; - iter = iter->next) { - - diskset_t *diskset = (diskset_t *)iter->obj; - dlist_t *names = diskset->disknames; - - /* Check disk name */ - in_named_diskset = dlist_contains( - names, name, compare_device_names); - - /* Check disk aliases */ - if (in_named_diskset == B_FALSE) { - dlist_t *iter2; - for (iter2 = aliases; - iter2 != NULL && in_named_diskset == B_FALSE; - iter2 = iter2->next) { - in_named_diskset = dlist_contains(names, - (char *)iter2->obj, compare_device_names); - } - } - } - } - } - - if (error == 0) { - *bool = (in_named_diskset == B_TRUE ? B_FALSE : B_TRUE); - } - - return (error); -} - -static int -is_disk_in_named_diskset( - dm_descriptor_t disk, - char *dsname, - boolean_t *bool) -{ - dlist_t *iter; - int error = 0; - boolean_t in_diskset = B_FALSE; - - *bool = B_FALSE; - - for (iter = _disksets; - (iter != NULL) && (in_diskset == B_FALSE); - iter = iter->next) { - - diskset_t *diskset = (diskset_t *)iter->obj; - - if (string_case_compare(diskset->name, dsname) == 0) { - - dlist_t *names = diskset->disknames; - dlist_t *aliases = NULL; - char *name = NULL; - - ((error = get_display_name(disk, &name)) != 0) || - (error = get_aliases(disk, &aliases)); - if (error != 0) { - break; - } - - /* check disk name */ - in_diskset = dlist_contains(names, name, compare_device_names); - - /* check disk aliases */ - if (in_diskset == B_FALSE) { - dlist_t *iter2; - for (iter2 = aliases; - (iter2 != NULL) && (in_diskset == B_FALSE); - iter2 = iter2->next) { - in_diskset = dlist_contains(names, - (char *)iter2->obj, compare_device_names); - } - } - } - } - - *bool = in_diskset; - - return (error); -} - -/* - * FUNCTION: is_disk_in_other_diskset(dm_descriptor_t disk, char *dsname, - * boolean_t *bool) - * - * INPUT: disk - dm_descriptor_t disk handle - * dsname - char * disk set name - * - * OUTPUT: bool - pointer to a boolean_t to hold the result. - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Determine if the named disk is known to be in a disk set - * other than the named disk set. - */ -int -is_disk_in_other_diskset( - dm_descriptor_t disk, - char *dsname, - boolean_t *bool) -{ - boolean_t in_other = B_FALSE; - dlist_t *iter; - dlist_t *aliases = NULL; - char *name = NULL; - char *cp = NULL; - int error = 0; - - ((error = get_display_name(disk, &name)) != 0) || - (error = get_aliases(disk, &aliases)); - if (error != 0) { - return (error); - } - - /* - * discard the leading path, it is probably /dev/dsk - * and the disk set disk names are all /dev/rdsk/... - * - * aliases do not have leading paths - */ - cp = strrchr(name, '/'); - if (cp != NULL) { - ++cp; - } else { - cp = name; - } - name = cp; - - for (iter = _disksets; - (iter != NULL) && (in_other == B_FALSE); - iter = iter->next) { - - diskset_t *diskset = (diskset_t *)iter->obj; - dlist_t *names = diskset->disknames; - - if (string_case_compare(diskset->name, dsname) == 0) { - /* skip named disk set */ - continue; - } - - /* see if disk's name is in disk set's name list */ - in_other = dlist_contains(names, name, compare_device_names); - - /* see if any of the disk's aliases is in name list */ - if (in_other == B_FALSE) { - dlist_t *iter2; - for (iter2 = aliases; - (iter2 != NULL) && (in_other == B_FALSE); - iter2 = iter2->next) { - - in_other = dlist_contains(names, - (char *)iter2->obj, compare_device_names); - } - } - } - - *bool = in_other; - - return (error); -} - -/* - * FUNCTION: hsp_get_default_for_diskset(char *diskset, - * devconfig_t **hsp) - * - * INPUT: diskset - char * disk set name - * - * RETURNS: devconfig_t * - pointer to the first HSP in the disk set - * NULL if none found - * - * PURPOSE: Locate the first HSP in the named disk set. - */ -int -hsp_get_default_for_diskset( - char *diskset, - devconfig_t **hsp) -{ - dlist_t *iter = _disksets; - - *hsp = NULL; - - for (; (iter != NULL) && (*hsp == NULL); iter = iter->next) { - diskset_t *set = (diskset_t *)iter->obj; - if (string_case_compare(set->name, diskset) == 0) { - dlist_t *item = set->hsps; - if (item != NULL) { - *hsp = item->obj; - } - } - } - - return (0); -} - -/* - * FUNCTION: get_n_metadb_replicas(int *nreplicas) - * - * OUTPUT: nreplicas - pointer to int to hold the result - * - * RETURNS: int - 0 on success - * !0 on failure - * - * PURPOSE: Check the number of replicas configured for the local set. - */ -int -get_n_metadb_replicas( - int *nreplicas) -{ - mdsetname_t *sp; - md_replicalist_t *rlp = NULL; - md_error_t mderror = mdnullerror; - int error = 0; - - *nreplicas = 0; - - sp = metasetname(MD_LOCAL_NAME, &mderror); - if (!mdisok(&mderror)) { - volume_set_error(mde_sperror(&mderror, NULL)); - mdclrerror(&mderror); - error = -1; - } else { - *nreplicas = metareplicalist(sp, MD_BASICNAME_OK, &rlp, &mderror); - if (!mdisok(&mderror)) { - volume_set_error(mde_sperror(&mderror, NULL)); - mdclrerror(&mderror); - error = -1; - } else if (rlp != NULL) { - metafreereplicalist(rlp); - rlp = NULL; - } - - if (*nreplicas < 0) { - *nreplicas = 0; - } - } - - return (error); -} - -/* - * FUNCTION: hsp_get_by_name(char *diskset, char *name, - * devconfig_t **hsp) - * - * INPUT: diskset - char * disk set name - * name - char * HSP name - * - * OUTPUT: hsp - a devconfig_t * - pointer to hold - * the named HSP if none found - * - * PURPOSE: Locate the named HSP in the named disk set. - */ -int -hsp_get_by_name( - char *diskset, - char *name, - devconfig_t **hsp) -{ - dlist_t *iter = _disksets; - - *hsp = NULL; - - for (; (iter != NULL) && (*hsp == NULL); iter = iter->next) { - diskset_t *set = (diskset_t *)iter->obj; - if (string_case_compare(set->name, diskset) == 0) { - dlist_t *item = dlist_find( - set->hsps, name, compare_string_to_devconfig_name); - if (item != NULL) { - *hsp = item->obj; - } - } - } - - return (0); -} - -/* - * FUNCTION: is_volume_name_valid(char *name) - * - * OUTPUT: name - pointer to a char * volume name - * - * RETURNS: boolean_t - B_TRUE if the input name is valid - * B_FALSE otherwise - * - * PURPOSE: Wrapper around libmeta volume name validation method. - */ -boolean_t -is_volume_name_valid( - char *name) -{ - return (is_metaname(name)); -} - -/* - * FUNCTION: is_hsp_name_valid(char *name) - * - * INPUT: name - char * HSP name - * - * RETURNS: boolean_t - B_TRUE if the input name is valid - * B_FALSE otherwise - * - * PURPOSE: Wrapper around libmeta HSP name validation method. - */ -boolean_t -is_hsp_name_valid( - char *name) -{ - return (is_hspname(name)); -} - -/* - * FUNCTION: extract_index(char *name, char *prefix, char *num_fmt, - * int *index) - * - * INPUT: name - const char * volume name - * prefix - const char * fixed part of format string - * num_fmt - const char * format of number to extract (e.g. %d) - * - * OUTPUT: index - pointer to int to hold numeric part of name - * - * RETURNS: boolean_t - B_TRUE if the input name is parsed correctly - * B_FALSE otherwise - * - * PURPOSE: Extract the numeric portion of a device name for use - * by higher-level functions. - */ -static boolean_t -extract_index( - const char *name, - const char *prefix, - const char *num_fmt, - int *index) -{ - char buf[MAXNAMELEN]; - const char *cp; - const char *fmt = buf; - - if ((cp = strrchr(name, '/')) != NULL) { - ++cp; - } else { - cp = name; - } - - (void) snprintf(buf, sizeof (buf), "%s%s", prefix, num_fmt); - if (sscanf(cp, fmt, index) == 1) - return (B_TRUE); - else - return (B_FALSE); -} - -/* - * FUNCTION: is_volume_name_in_range(char *name) - * - * INPUT: name - char * volume name - * - * RETURNS: boolean_t - B_TRUE if the input name is in the allowed - * range of names - * B_FALSE otherwise - * - * PURPOSE: Determine if the input volume name is within the allowed - * range of device names (0 <= n < max # of devices configured). - */ -boolean_t -is_volume_name_in_range( - char *name) -{ - int index = -1; - - if (extract_index(name, _dev_prefix, "%d", &index)) { - if (index >= 0 && index < _max_devs_cfg) { - return (B_TRUE); - } - } - - return (B_FALSE); -} - -/* - * FUNCTION: reserve_volume_name(char *name) - * - * INPUT: name - a char * volume name - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Mark a volume name/number as used. - * - * Assumes that the input name has been validated. - * - * if the name is not currently available, return -1 - */ -int -reserve_volume_name( - char *name) -{ - int index = -1; - - if (extract_index(name, _dev_prefix, "%d", &index)) { - if (devs_by_number[index] != B_TRUE) { - devs_by_number[index] = B_TRUE; - return (0); - } - } - - return (-1); -} - -/* - * FUNCTION: reserve_hsp_name(char *name) - * - * INPUT: name - a char * hsp name - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Mark a HSP name/number as used. - * - * Assumes that the input name has been validated. - * - * if the name is not currently available, return -1 - */ -int -reserve_hsp_name( - char *name) -{ - int index = -1; - - if (extract_index(name, _hsp_prefix, "%03d", &index)) { - if (hsps_by_number[index] != B_TRUE) { - hsps_by_number[index] = B_TRUE; - return (0); - } - } - - return (-1); -} - -/* - * FUNCTION: release_volume_name(char *name) - * - * INPUT: name - a char * volume name - * - * PURPOSE: release the input volume name. - * - * Extract volume number from the input name - * and use it to index into the array of used - * volume numbers. Make that volume number - * available for use again. - */ -void -release_volume_name( - char *name) -{ - int index = -1; - - if (name != NULL && extract_index(name, _dev_prefix, "%d", &index)) { - oprintf(OUTPUT_DEBUG, - gettext("released volume name %s%d\n"), - _dev_prefix, index); - devs_by_number[index] = B_FALSE; - } -} - -/* - * FUNCTION: release_hsp_name(char *name) - * - * INPUT: name - a char * HSP name - * - * PURPOSE: release the input HSP name. - * - * Extract volume number from the input name - * and use it to index into the array of used - * hsp numbers. Make that hsp number available - * for use again. - */ -void -release_hsp_name( - char *name) -{ - int index = -1; - - if (name != NULL && extract_index(name, _hsp_prefix, "%d", &index)) { - oprintf(OUTPUT_DEBUG, - gettext("released hsp name %s%d\n"), - _hsp_prefix, index); - hsps_by_number[index] = B_FALSE; - } -} - -/* - * FUNCTION: get_next_volume_name(char **name) - * - * OUTPUT: name - pointer to a char * to hold volume name - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: generate a new volume name using the standard device - * name prefix and the lowest available device number. - * - * if type == MIRROR, determine the next available mirror - * name according to the convention that a mirror name is - * a multiple of 10. - * - * If such a name is unavailable, use the next available name. - */ -int -get_next_volume_name( - char **name, - component_type_t type) -{ - int next = 0; - - for (next = 0; next < _max_devs_cfg; ++next) { - if ((type == TYPE_MIRROR && ((next % 10) != 0)) || - (type != TYPE_MIRROR && ((next % 10) == 0))) { - /* use/save multiples of 10 for mirrors */ - continue; - } - if (devs_by_number[next] != B_TRUE) { - break; - } - } - - if ((next == _max_devs_cfg) && (type == TYPE_MIRROR)) { - /* try next sequentially available name */ - for (next = 0; next < _max_devs_cfg; ++next) { - if (devs_by_number[next] != B_TRUE) { - break; - } - } - } - - if (next == _max_devs_cfg) { - volume_set_error( - gettext("ran out of logical volume names.\n")); - return (-1); - } - - *name = (char *)calloc(MAXNAMELEN, sizeof (char)); - if (*name == NULL) { - return (ENOMEM); - } - - (void) snprintf(*name, MAXNAMELEN-1, "%s%d", _dev_prefix, next); - - devs_by_number[next] = B_TRUE; - return (0); -} - -/* - * FUNCTION: get_next_submirror_name(char *mname, char **subname) - * - * INPUT: mname - pointer to a char * mirror name - * OUTPUT: subname - pointer to a char * to hold submirror name - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Determine the next available submirror name according - * to the convention that each submirror name is a sequential - * increment of its mirror's name. - * - * If such a name is unavailable, return the next sequentially - * available volume name. - */ -int -get_next_submirror_name( - char *mname, - char **subname) -{ - char buf[MAXNAMELEN]; - int error = 0; - int next = 0; - int i = 0; - - *subname = NULL; - - /* try next sequential name: mirror + 1... */ - if (extract_index(mname, _dev_prefix, "%d", &next)) { - for (i = next + 1; i < _max_devs_cfg; i++) { - if ((i % 10) == 0) { - /* save for mirrors */ - continue; - } - if (devs_by_number[i] == B_FALSE) { - (void) snprintf(buf, MAXNAMELEN-1, "%s%d", _dev_prefix, i); - if ((*subname = strdup(buf)) != NULL) { - devs_by_number[i] = B_TRUE; - } else { - error = ENOMEM; - } - break; - } - } - } - - if ((error == 0) && (*subname == NULL)) { - /* name adhering to convention isn't available, */ - /* use next sequentially available name */ - error = get_next_volume_name(subname, TYPE_STRIPE); - } - - return (error); -} - -/* - * FUNCTION: get_next_hsp_name(char **name) - * - * OUTPUT: name - pointer to a char * to hold name - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Helper which generates a new hotsparepool name - * using the standard name prefix and the lowest - * available hsp number. - */ -int -get_next_hsp_name( - char **name) -{ - int next = 0; - - for (next = 0; next < _max_hsps; ++next) { - if (hsps_by_number[next] != B_TRUE) { - break; - } - } - - if (next == _max_hsps) { - volume_set_error(gettext("ran out of HSP names")); - return (-1); - } - - *name = (char *)calloc(MAXNAMELEN, sizeof (char)); - if (*name == NULL) { - oprintf(OUTPUT_TERSE, - gettext("failed to allocate volume name string, " - "out of memory")); - return (ENOMEM); - } - - (void) snprintf(*name, MAXNAMELEN-1, "%s%03d", _hsp_prefix, next); - - hsps_by_number[next] = B_TRUE; - - return (0); -} - -static char * -type_name( - svm_type_t type) -{ - switch (type) { - case SVM_DISKSET: return (gettext("disk set")); - case SVM_MDB: return (gettext("metadb")); - case SVM_STRIPE: return (gettext("stripe")); - case SVM_MIRROR: return (gettext("mirror")); - case SVM_RAID: return (gettext("raid")); - case SVM_TRANS: return (gettext("trans")); - case SVM_SP: return (gettext("soft partition")); - case SVM_HSP: return (gettext("hot spare pool")); - case SVM_HS: return (gettext("hot spare")); - case SVM_DRIVE: return (gettext("drive")); - default: return (gettext("unknown")); - } -} - -static svm_snap_t * -svm_snapshot(int *errp) -{ - svm_snap_t *svm_listp = NULL; - - *errp = 0; - - /* initialize the cluster library entry points */ - if (sdssc_bind_library() == SDSSC_ERROR) { - - volume_set_error(gettext("sdssc_bin_library() failed\n")); - *errp = -1; - - } else { - - /* load the SVM cache */ - *errp = load_svm(&svm_listp); - - if (*errp != 0) { - free_svm_snapshot(svm_listp); - svm_listp = NULL; - } - - } - - return (svm_listp); -} - -static void -free_svm_snapshot(svm_snap_t *listp) { - - svm_snap_t *nextp; - - while (listp != NULL) { - nextp = listp->next; - free((void *)listp->diskset); - free((void *)listp->name); - free((void *)listp->slice); - free((void *)listp); - listp = nextp; - } -} - -static int -add_record( - svm_snap_t **listp, - char *setname, - svm_type_t type, - char *mname, - char *slice_name) -{ - svm_snap_t *sp; - - sp = (svm_snap_t *)malloc(sizeof (svm_snap_t)); - if (sp == NULL) { - return (ENOMEM); - } - - if ((sp->diskset = strdup(setname)) == NULL) { - free(sp); - return (ENOMEM); - } - - if ((sp->name = strdup(mname)) == NULL) { - free(sp->diskset); - free(sp); - return (ENOMEM); - } - - sp->type = type; - - if ((sp->slice = strdup(slice_name)) == NULL) { - free(sp->diskset); - free(sp->name); - free(sp); - return (ENOMEM); - } - - sp->next = *listp; - *listp = sp; - - return (0); -} - -static int -diskset_info( - svm_snap_t **listp, - mdsetname_t *sp) -{ - md_error_t error = mdnullerror; - md_replicalist_t *replica_list = NULL; - md_replicalist_t *mdbp; - mdnamelist_t *nlp; - mdnamelist_t *trans_list = NULL; - mdnamelist_t *mirror_list = NULL; - mdnamelist_t *raid_list = NULL; - mdnamelist_t *stripe_list = NULL; - mdnamelist_t *sp_list = NULL; - mdhspnamelist_t *hsp_list = NULL; - - if (metareplicalist(sp, MD_BASICNAME_OK, &replica_list, &error) < 0) { - /* there are no metadb's; that is ok, no need to check the rest */ - mdclrerror(&error); - return (0); - } - mdclrerror(&error); - - for (mdbp = replica_list; mdbp != NULL; mdbp = mdbp->rl_next) { - char size[MAXPATHLEN]; - - (void) snprintf(size, sizeof (size), "%d", - (int)mdbp->rl_repp->r_nblk); - - if (new_entry(listp, mdbp->rl_repp->r_namep->cname, SVM_MDB, size, - sp)) { - metafreereplicalist(replica_list); - return (ENOMEM); - } - } - metafreereplicalist(replica_list); - - if (meta_get_trans_names(sp, &trans_list, 0, &error) >= 0) { - for (nlp = trans_list; nlp != NULL; nlp = nlp->next) { - if (new_entry(listp, nlp->namep->cname, SVM_TRANS, - nlp->namep->cname, sp)) { - free_names(trans_list); - return (ENOMEM); - } - } - - free_names(trans_list); - } - mdclrerror(&error); - - if (meta_get_mirror_names(sp, &mirror_list, 0, &error) >= 0) { - for (nlp = mirror_list; nlp != NULL; nlp = nlp->next) { - if (add_record(listp, sp->setname, SVM_MIRROR, - nlp->namep->cname, "")) { - free_names(mirror_list); - return (ENOMEM); - } - } - - free_names(mirror_list); - } - mdclrerror(&error); - - if (meta_get_raid_names(sp, &raid_list, 0, &error) >= 0) { - for (nlp = raid_list; nlp != NULL; nlp = nlp->next) { - mdname_t *mdn; - md_raid_t *raid; - - mdn = metaname(&sp, nlp->namep->cname, META_DEVICE, &error); - mdclrerror(&error); - if (mdn == NULL) { - continue; - } - - raid = meta_get_raid(sp, mdn, &error); - mdclrerror(&error); - - if (raid != NULL) { - int i; - - for (i = 0; i < raid->cols.cols_len; i++) { - if (new_entry(listp, - raid->cols.cols_val[i].colnamep->cname, SVM_RAID, - nlp->namep->cname, sp)) { - free_names(raid_list); - return (ENOMEM); - } - } - } - } - - free_names(raid_list); - } - mdclrerror(&error); - - if (meta_get_stripe_names(sp, &stripe_list, 0, &error) >= 0) { - for (nlp = stripe_list; nlp != NULL; nlp = nlp->next) { - mdname_t *mdn; - md_stripe_t *stripe; - - mdn = metaname(&sp, nlp->namep->cname, META_DEVICE, &error); - mdclrerror(&error); - if (mdn == NULL) { - continue; - } - - stripe = meta_get_stripe(sp, mdn, &error); - mdclrerror(&error); - - if (stripe != NULL) { - int i; - - for (i = 0; i < stripe->rows.rows_len; i++) { - md_row_t *rowp; - int j; - - rowp = &stripe->rows.rows_val[i]; - - for (j = 0; j < rowp->comps.comps_len; j++) { - md_comp_t *component; - - component = &rowp->comps.comps_val[j]; - if (new_entry(listp, component->compnamep->cname, - SVM_STRIPE, nlp->namep->cname, sp)) { - free_names(stripe_list); - return (ENOMEM); - } - } - } - } - } - - free_names(stripe_list); - } - mdclrerror(&error); - - if (meta_get_sp_names(sp, &sp_list, 0, &error) >= 0) { - for (nlp = sp_list; nlp != NULL; nlp = nlp->next) { - mdname_t *mdn; - md_sp_t *soft_part; - - mdn = metaname(&sp, nlp->namep->cname, META_DEVICE, &error); - mdclrerror(&error); - if (mdn == NULL) { - continue; - } - - soft_part = meta_get_sp(sp, mdn, &error); - mdclrerror(&error); - - if (soft_part != NULL) { - if (new_entry(listp, soft_part->compnamep->cname, SVM_SP, - nlp->namep->cname, sp)) { - free_names(sp_list); - return (ENOMEM); - } - } - } - - free_names(sp_list); - } - mdclrerror(&error); - - if (meta_get_hsp_names(sp, &hsp_list, 0, &error) >= 0) { - mdhspnamelist_t *nlp; - - for (nlp = hsp_list; nlp != NULL; nlp = nlp->next) { - md_hsp_t *hsp; - - hsp = meta_get_hsp(sp, nlp->hspnamep, &error); - mdclrerror(&error); - if (hsp != NULL) { - int i; - - for (i = 0; i < hsp->hotspares.hotspares_len; i++) { - md_hs_t *hs; - - hs = &hsp->hotspares.hotspares_val[i]; - - if (add_record(listp, sp->setname, SVM_HS, - nlp->hspnamep->hspname, hs->hsnamep->bname)) { - metafreehspnamelist(hsp_list); - return (ENOMEM); - } - } - } - - if (add_record(listp, sp->setname, SVM_HSP, - nlp->hspnamep->hspname, "")) { - metafreehspnamelist(hsp_list); - return (ENOMEM); - } - } - - metafreehspnamelist(hsp_list); - } - - mdclrerror(&error); - - return (0); -} - -static void -free_names( - mdnamelist_t *nlp) -{ - mdnamelist_t *p; - - for (p = nlp; p != NULL; p = p->next) { - meta_invalidate_name(p->namep); - } - metafreenamelist(nlp); -} - -/* - * Create a list of SVM devices - */ -static int -load_svm( - svm_snap_t **listp) -{ - int max_sets; - md_error_t error = mdnullerror; - int i; - - if ((max_sets = get_max_sets(&error)) == 0) { - return (0); - } - - if (!mdisok(&error)) { - volume_set_error( - gettext("failed to get maximum number of disk sets.\n")); - mdclrerror(&error); - return (-1); - } - - /* for each possible set number, see if we really have a disk set */ - for (i = 0; i < max_sets; i++) { - mdsetname_t *sp; - - if ((sp = metasetnosetname(i, &error)) == NULL) { - if (!mdisok(&error) && error.info.errclass == MDEC_RPC) { - /* rpc error - no metasets */ - break; - } - - mdclrerror(&error); - continue; - } - - mdclrerror(&error); - - if (add_record(listp, sp->setname, SVM_DISKSET, sp->setname, "")) { - metaflushsetname(sp); - return (ENOMEM); - } - - /* check for drives in disk sets */ - if (sp->setno != 0) { - md_drive_desc *dd; - - dd = metaget_drivedesc(sp, MD_BASICNAME_OK | PRINT_FAST, - &error); - mdclrerror(&error); - for (; dd != NULL; dd = dd->dd_next) { - if (add_record(listp, sp->setname, SVM_DRIVE, - dd->dd_dnp->rname, "")) { - metaflushsetname(sp); - return (ENOMEM); - } - } - } - - if (diskset_info(listp, sp)) { - metaflushsetname(sp); - return (ENOMEM); - } - - metaflushsetname(sp); - } - - mdclrerror(&error); - - return (0); -} - -/* determine if 'sp' is built on a slice */ -static int -new_entry( - svm_snap_t **listp, - char *slice_name, - svm_type_t type, - char *mname, - mdsetname_t *sp) -{ - mdname_t *mdn; - md_error_t error = mdnullerror; - meta_device_type_t uname_type = UNKNOWN; - - /* Determine the appropriate uname type for metaname */ - if (type == SVM_MDB || type == SVM_DRIVE || type == SVM_TRANS) - uname_type = LOGICAL_DEVICE; - - mdn = metaname(&sp, slice_name, uname_type, &error); - if (!mdisok(&error)) { - mdn = NULL; - } - mdclrerror(&error); - - if (mdn != NULL && ( - mdn->drivenamep->type == MDT_ACCES || - mdn->drivenamep->type == MDT_COMP || - mdn->drivenamep->type == MDT_FAST_COMP)) { - - return (add_record(listp, sp->setname, type, mname, mdn->bname)); - } else { - return (add_record(listp, sp->setname, type, mname, "")); - } -} - -/* - * FUNCTION: get_default_stripe_interlace() - * - * RETURNS: uint64_t - default stripe interlace value - * - * PURPOSE: Helper which retrieves the default stripe interlace - * from libmeta. - */ -uint64_t -get_default_stripe_interlace() -{ - /* convert back to bytes */ - return ((uint64_t)meta_default_stripe_interlace() * DEV_BSIZE); -} - -/* - * FUNCTION: get_max_number_of_devices(int *max) - * - * OUTPUT: max - pointer to int to hold the configured maximum number - * of SVM devices - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Helper which determines the maximum number of allowed - * SVM devices configured for the system. - * - * Wrapper around libmeta function meta_get_max_nunits(). - */ -int -get_max_number_of_devices( - int *max) -{ - md_error_t mderror = mdnullerror; - - *max = meta_get_nunits(&mderror); - if (!mdisok(&mderror)) { - volume_set_error(mde_sperror(&mderror, NULL)); - mdclrerror(&mderror); - return (-1); - } - - return (0); -} - -/* - * FUNCTION: get_max_number_of_disksets(int *max) - * - * OUTPUT: max - pointer to in to hold the configured maximum number - * of disk sets - * - * RETURNS: int - 0 on success - * !0 otherwise - * - * PURPOSE: Helper which determines the maximum number of allowed - * disk sets which has been configured for the system. - * - * Wrapper around libmeta function get_max_sets(). - */ -int -get_max_number_of_disksets( - int *max) -{ - md_error_t mderror = mdnullerror; - - *max = get_max_sets(&mderror); - if (!mdisok(&mderror)) { - volume_set_error(mde_sperror(&mderror, NULL)); - mdclrerror(&mderror); - return (-1); - } - - return (0); -} - -/* - * FUNCTION: is_reserved_replica_slice_index(char *diskset, char *dname, - * uint32_t index, boolean_t *bool) - * - * INPUT: diskset - char * disk set name - * dname - char * disk name - * index - integer index of interest - * - * OUTPUT: bool - pointer to a boolean_t to hold the result - * - * RETURNS: int - 0 - success - * !0 - failure - * - * PURPOSE: Helper which determines if the input slice index on - * the named disk in the named disk set is the replica - * slice that is reserved on disks in disk sets. - * - * The named disk is assumed to be in the named disk set. - * - * Determines if metassist is being run in a simulated - * hardware enironment, if not the libmeta function to - * determine the replica slice index is called. - * - * If simulation is active, then a local implementation - * is used to determine the replica slice index. - */ -int -is_reserved_replica_slice_index( - char *diskset, - char *dname, - uint32_t index, - boolean_t *bool) -{ - int error = 0; - boolean_t sim = B_FALSE; - static char *simfile = "METASSISTSIMFILE"; - - sim = ((getenv(simfile) != NULL) && (strlen(getenv(simfile)) > 0)); - - if (sim != B_TRUE) { - - /* sim disabled: use meta_replicaslice() */ - - md_error_t mderror = mdnullerror; - mdsetname_t *sp; - mddrivename_t *dnp; - uint_t replicaslice; - - /* slice assumed to be on disk in the named disk set */ - sp = metasetname(diskset, &mderror); - if (!mdisok(&mderror)) { - volume_set_error(mde_sperror(&mderror, NULL)); - mdclrerror(&mderror); - return (-1); - } - - dnp = metadrivename(&sp, dname, &mderror); - if (!mdisok(&mderror)) { - volume_set_error(mde_sperror(&mderror, NULL)); - mdclrerror(&mderror); - return (-1); - } - - if (meta_replicaslice(dnp, &replicaslice, &mderror) != 0) { - volume_set_error(mde_sperror(&mderror, NULL)); - mdclrerror(&mderror); - return (-1); - } - - *bool = (replicaslice == (uint_t)index); - - } else { - - dm_descriptor_t disk; - boolean_t efi = B_FALSE; - - /* sim enabled: use same logic as meta_replicaslice() */ - ((error = disk_get_by_name(dname, &disk)) != 0) || - (error = disk_get_is_efi(disk, &efi)); - if (error == 0) { - - if (efi == B_FALSE) { - *bool = (index == MD_SLICE7); - } else { - *bool = (index == MD_SLICE6); - } - } - } - - return (error); -} diff --git a/usr/src/cmd/lvm/metassist/layout/layout_svm_util.h b/usr/src/cmd/lvm/metassist/layout/layout_svm_util.h deleted file mode 100644 index d97914414693..000000000000 --- a/usr/src/cmd/lvm/metassist/layout/layout_svm_util.h +++ /dev/null @@ -1,87 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _VOLUME_SVM_UTIL_H -#define _VOLUME_SVM_UTIL_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#ifdef __cplusplus -extern "C" { -#endif - -#include "volume_devconfig.h" - -/* - * scan existing SVM config for the named diskset - * and build lists of device, HSP and diskset names. - */ -extern int scan_svm_names(char *diskset); -extern void release_svm_names(); - -extern int hsp_get_default_for_diskset(char *diskset, - devconfig_t **hsp); -extern int hsp_get_by_name(char *diskset, char *hspname, - devconfig_t **hsp); - -extern int get_next_volume_name(char **name, - component_type_t type); -extern int get_next_hsp_name(char **name); -extern int get_next_submirror_name(char *mname, char **subname); - -extern int reserve_volume_name(char *name); -extern int reserve_hsp_name(char *name); - -extern void release_volume_name(char *name); -extern void release_hsp_name(char *name); - -extern boolean_t is_volume_name_valid(char *name); -extern boolean_t is_hsp_name_valid(char *name); - -extern boolean_t is_volume_name_in_range(char *name); - -extern int get_disks_in_diskset(char *dsname, dlist_t **disks); - -extern int is_disk_in_diskset( - dm_descriptor_t disk, char *diskset, boolean_t *bool); -extern int is_disk_in_other_diskset( - dm_descriptor_t disk, char *diskset, boolean_t *bool); - -extern boolean_t diskset_exists(char *name); -extern uint64_t get_default_stripe_interlace(); - -extern int get_n_metadb_replicas(int *nreplicas); -extern int get_max_number_of_devices(int *max); -extern int get_max_number_of_disksets(int *max); - -extern int is_reserved_replica_slice_index( - char *diskset, char *dname, uint32_t index, boolean_t *bool); - -#ifdef __cplusplus -} -#endif - -#endif /* _VOLUME_SVM_UTIL_H */ diff --git a/usr/src/cmd/lvm/metassist/layout/layout_validate.c b/usr/src/cmd/lvm/metassist/layout/layout_validate.c deleted file mode 100644 index 02cae6577b1d..000000000000 --- a/usr/src/cmd/lvm/metassist/layout/layout_validate.c +++ /dev/null @@ -1,1994 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include - -#include - -#include "metassist.h" -#include "volume_dlist.h" -#include "volume_error.h" -#include "volume_string.h" -#include "volume_output.h" - -#define _LAYOUT_VALIDATE_C - -#include "layout_discovery.h" -#include "layout_dlist_util.h" -#include "layout_device_cache.h" -#include "layout_device_util.h" -#include "layout_request.h" -#include "layout_slice.h" -#include "layout_svm_util.h" -#include "layout_validate.h" - -/* - * This module contains the majority of the validation code which - * layout applies to input requests. The assumption/agreement with - * the controller implementation is that requests passed into layout - * have undergone syntactic validation and that layout is responsible - * for semantic validation. - * - * The semantic validation that is handled: - * - * 1. For a toplevel diskset request, validate: - * - * - the number of disksets is not exceeded - * - the number of devices is not exceeded - * - * (These items are not directly validated within this module, - * but it is useful to document that they are handled somewhere). - * - * 2. For any devconfig_t representing a volume request, verify that: - * - * - all HSP names are semantically valid. The name should conform - * to the HSP naming convention: hspXXX. - * - * - all concat, stripe, mirror, and volume names refer to - * unused, semantically valid metadevice names. Examples of - * bad data: - * - * - a valid volume name that is already in use (d0, d10) - * - * - a valid volume name that is used two or more times to - * refer to new elements in the request. - * - * - a valid volume name that is out of range (d99877, - * d44356) or exceeds the maximum number of possible - * volumes given the current SVM configuration. - * - * - all available and unavailable device specifications refer - * to existing controllers, disks, or slices on the system. - * Examples of bad data: - * - * - a valid but non-existent controller (c23, c2) - * - a valid but non-existent disk (c0t0d8, c1t0d0) - * - a valid slice on a non-existent disk or controller - * (c0t0d8s7, c1t0d05) - * - a valid slice on an existing disk (c0t0d0s12, - * c0t0d0s9) - * - * - any typed volume request that explicitly specifies components - * requires additional validation to detect syntactically valid - * expressions that are semantically ambiguous: - * - * a concat request that: - * - specifies size and components is invalid - * - * a stripe request that: - * - specifies size and components is invalid - * - specifies mincomp and components but not enough - * components is invalid - * - specifies maxcomp and components but too many - * components is invalid - * - * a HSP request that: - * - specifies components that are not appropriate for - * the volumes the HSP serves is invalid (?) - * - * a stripe, concat or HSP request that: - * - specifies a component that was used in a prior - * request is invalid - * - specifies a component that does not exist in the - * diskset is invalid (e.g., c0t0d0s0, but c0t0d0 is - * not yet in the diskset) - * - * a mirror request that: - * - specifies nsubs and components but not enough - * components is invalid - * - specifies components and the components specify - * different sizes results in a WARNING since the total - * usable capacity of the mirror is determined by the - * smallest of its submirrors. - * - specifies components and the components specify - * components results in a WARNING since the submirrors - * may end up with different sizes - */ -static int validate_request_name( - devconfig_t *req, - component_type_t type); - -static int validate_request_size( - devconfig_t *req, - component_type_t type); - -static int validate_minimum_size( - uint64_t nbytes); - -static uint64_t apply_layout_overhead_factor( - uint64_t req_size); - -static int get_space_available_for_request( - devconfig_t *request, - dlist_t *usable_slices, - uint64_t *avail_space); - -static int do_available_space_check( - uint64_t req_size, - uint64_t raw_avail_space, - devconfig_t *request, - dlist_t *usable_slices); - -static int validate_request_redundancy_level( - devconfig_t *req); - -static int validate_request_npaths( - devconfig_t *req); - -static int validate_request_submirrors( - devconfig_t *req); - -static int validate_submirror_types( - dlist_t *submirrors); - -static int validate_submirror_number( - devconfig_t *req, - dlist_t *submirrors); - -static int validate_submirror_sizes( - devconfig_t *req, - dlist_t *submirrors); - -static int validate_submirror_size_and_components( - devconfig_t *submir, - uint64_t mirror_size, - uint64_t *assumed_size, - dlist_t **submirs_with_size, - dlist_t **submirs_with_comps, - dlist_t **submirs_no_size_or_comps); - -static int validate_slice_components( - devconfig_t *req, - component_type_t type); - -static char *get_device_aliases_string( - dm_descriptor_t desc); - -static int validate_device_array( - char **array, - char *which, - dlist_t **list); - -static int add_reserved_name(char *name); -static boolean_t is_rsvd_name(char *name); -static dlist_t *_rsvd_names = NULL; - -/* - * FUNCTION: release_validatation_caches() - * - * RETURNS: int - 0 - * - * PURPOSE: Cleanup function. - * - * Purges list of reserved volume names. Should be called - * after all layout requests have been processed. - */ -int -release_validation_caches() -{ - dlist_free_items(_rsvd_names, NULL); - _rsvd_names = NULL; - - return (0); -} - -/* - * FUNCTION: validate_basic_svm_config() - * - * RETURNS: int - 0 on success - * !0 on failure - * - * PURPOSE: Check to see if the local set metadb replicas have been created. - * - * Makes sure at least 1 metadb replica exists for the local set. - */ -int -validate_basic_svm_config() -{ - int error = 0; - int nreplicas = 0; - - if ((error = get_n_metadb_replicas(&nreplicas)) == 0) { - if (nreplicas == 0) { - volume_set_error( - gettext("Failed: State database replicas must " - "exist before using %s.\n" - "See metadb(1M) and %s(1M)."), - progname, progname); - error = -1; - } else { - oprintf(OUTPUT_DEBUG, - gettext("%d metadb replicas found.\n"), - nreplicas); - } - } - - return (error); -} - -/* - * FUNCTION: validate_request_sizes(devconfig_t *req) - * - * INPUT: req: a devconfig_t pointer to the toplevel request - * - * RETURNS: int - 0 on success - * !0 on failure - * - * PURPOSE: Check to see if the any of the individual volume request - * sizes exceeds the raw available space on the system or - * the space available to that specific request. - * - * Check to see if the total space for all requests exceeds - * the raw available space. - * - * If any check fails, stop checking, emit an error and - * return -1. - * - * Note: this function must be called after the slice - * usages have been determined and the list of usable - * slices has been generated. - */ -int -validate_request_sizes( - devconfig_t *request) -{ - int error = 0; - dlist_t *usable_slices; - dlist_t *iter; - char bad_rqst_info[BUFSIZ]; - uint64_t bad_rqst_space = 0; - uint64_t total_rqst_space = 0; - uint64_t raw_space = 0; - - (void) get_usable_slices(&usable_slices); - - /* - * calculate raw available space: space on slices that are - * "available" based on the diskset defaults or global defaults - */ - if ((error = get_space_available_for_request(request, - usable_slices, &raw_space)) != 0) { - return (error); - } - - if (raw_space == 0) { - volume_set_error( - gettext("Failed: there is no available space.\n")); - return (-1); - } - - /* deduct sizes of reserved components */ - (void) get_reserved_slices(&iter); - for (; (iter != NULL) && (raw_space != 0) && (error == 0); - iter = iter->next) { - dm_descriptor_t slice = (uintptr_t)iter->obj; - uint64_t nbytes; - if ((error = slice_get_size(slice, &nbytes)) == 0) { - if (raw_space >= nbytes) { - raw_space -= nbytes; - } else { - raw_space = 0; - } - } - } - - /* - * check each volume request's size against raw_space, - * if that looks ok, do a closer check with the request's - * available devices - */ - iter = devconfig_get_components(request); - for (; (iter != NULL) && (error == 0); iter = iter->next) { - - devconfig_t *req = (devconfig_t *)iter->obj; - component_type_t type = TYPE_UNKNOWN; - char *typestr = NULL; - uint64_t nbytes = 0; - - (void) devconfig_get_type(req, &type); - if (type == TYPE_HSP) { - continue; - } - - typestr = devconfig_type_to_str(type); - - if ((error = devconfig_get_size(req, &nbytes)) == 0) { - - /* check specified size */ - - if (type == TYPE_CONCAT || type == TYPE_STRIPE) { - if ((error = do_available_space_check( - apply_layout_overhead_factor(nbytes), - raw_space, req, usable_slices)) == 0) { - total_rqst_space += nbytes; - } else if (error == ENOSPC || error == E2BIG) { - (void) snprintf(bad_rqst_info, BUFSIZ-1, - "%s", typestr); - bad_rqst_space = nbytes; - } - } else if (type == TYPE_MIRROR) { - uint16_t nsubs = 0; - if ((error = get_mirror_nsubs(req, &nsubs)) == 0) { - if ((error = do_available_space_check( - apply_layout_overhead_factor(nbytes * nsubs), - raw_space, req, usable_slices)) == 0) { - total_rqst_space += (nsubs * nbytes); - } else { - (void) snprintf(bad_rqst_info, BUFSIZ-1, - gettext("%s with %d submirrors"), - typestr, nsubs); - bad_rqst_space = nbytes; - } - } - } - - } else if ((error == ERR_ATTR_UNSET) && (type == TYPE_MIRROR)) { - - /* mirror specified no size: find submirror that does */ - - dlist_t *subs = devconfig_get_components(req); - - error = 0; - if (subs != NULL) { - dlist_t *iter2; - int nsubs = dlist_length(subs); - for (iter2 = subs; - (iter2 != NULL) && (error == 0); - iter2 = iter2->next) { - devconfig_t *sub = (devconfig_t *)iter2->obj; - if ((error = devconfig_get_size(sub, &nbytes)) == 0) { - if ((error = do_available_space_check( - apply_layout_overhead_factor(nbytes * nsubs), - raw_space, req, usable_slices)) == 0) { - total_rqst_space += (nbytes * nsubs); - } else { - (void) snprintf(bad_rqst_info, BUFSIZ-1, - gettext("%s with %d submirrors"), - typestr, nsubs); - bad_rqst_space = nbytes; - } - break; - } else if (error == ERR_ATTR_UNSET) { - error = 0; - } - } - } - } - } - - /* - * do_available_space_check may return ENOSPC or E2BIG - */ - if (error == ENOSPC) { - char *sizestr = NULL; - (void) bytes_to_sizestr(bad_rqst_space, - &sizestr, universal_units, B_FALSE); - - volume_set_error( - gettext("Failed: the request for a %s %s " - "exceeds the available space.\n"), - sizestr, bad_rqst_info); - - free(sizestr); - error = -1; - - } else if (error == E2BIG) { - char *sizestr = NULL; - (void) bytes_to_sizestr(bad_rqst_space, - &sizestr, universal_units, B_FALSE); - - volume_set_error( - gettext("Failed: the request for a %s %s " - "exceeds the usable space on the device(s) " - "specified as available.\n"), - sizestr, bad_rqst_info); - - free(sizestr); - error = -1; - - } else if (apply_layout_overhead_factor(total_rqst_space) > raw_space) { - char *sizestr = NULL; - (void) bytes_to_sizestr( - total_rqst_space, &sizestr, universal_units, B_FALSE); - - volume_set_error( - gettext("Failed: the total space requested for the " - "volumes (about %s) exceeds the available " - "space.\n"), - sizestr); - - free(sizestr); - error = -1; - } - - return (error); -} - -/* - * FUNCTION: apply_layout_overhead_factor(uint64_t req_size) - * - * INPUT: req_size: a requested volume size - * - * RETURNS: the requested volume size with an overhead factor applied - * - * PURPOSE: The input size size is inflated by a "fudge" factor - * to account for some of the expected overhead required for - * volumes such as block and cylinder boundary alignment. - */ -static uint64_t -apply_layout_overhead_factor( - uint64_t req_size) -{ - double overhead = 1.15; - double d_size = req_size; - uint64_t result = (uint64_t)(d_size * overhead); - - return (result); -} - -/* - * FUNCTION: get_space_available_for_request(devconfig_t *request, - * dlist_t *usable_slices, uint64_t *avail_space) - * - * INPUT: request: a devconfig_t volume request - * usable_slices: a list of usable slice dm_descriptor_t handles - * - * OUTPUT: avail_space: the total space on slices in the usable_slice - * list that is available for use by the input - * request. - * - * RETURNS: int - 0 on success - * !0 on failure - * - * PURPOSE: Iterate the input list of usable slices, determine which are - * available to the input request and accumulate the total space - * they represent. - * - * The slices in the usable_slice list are those with no apparent - * usage detected. The slice_is_available() check determines - * whether the slice passes the available/unavailable device - * specification associated with the input request. - */ -static int -get_space_available_for_request( - devconfig_t *request, - dlist_t *usable_slices, - uint64_t *avail_space) -{ - dlist_t *iter; - int error = 0; - - *avail_space = 0; - - for (iter = usable_slices; - (iter != NULL) && (error == 0); - iter = iter->next) { - dm_descriptor_t slice = (uintptr_t)iter->obj; - char *sname; - uint64_t nbytes; - boolean_t avail = B_FALSE; - if ((error = get_display_name(slice, &sname)) == 0) { - if ((error = slice_is_available(sname, request, &avail)) == 0) { - if (avail == B_TRUE) { - if ((error = slice_get_size(slice, &nbytes)) == 0) { - *avail_space += nbytes; - } - } - } - } - } - - return (error); -} - -/* - * FUNCTION: do_available_space_check(uint64_t req_size, - * uint64_t raw_avail_space, devconfig_t *request, - * dlist_t *usable_slices) - * - * INPUT: req_size: the requested size of a volume - * raw_avail_space:the total available space for all volumes - * request: a devconfig_t volume request - * usable_slices: a list of usable slice dm_descriptor_t handles - * - * RETURNS: int - ENOSPC if the requested size exceeds the raw - * available space. - * - * E2BIG if the requested size exceeds the space - * available specifically to the input request, - * taking into account its available and - * unavailable device specifications. - * - * 0 otherwise - * - * PURPOSE: Check the input request size against different forms of - * available space. - * - * If the requested size is less than the raw_avail_space, do the - * more expensive check against the space specifically available - * to the input request. - */ -static int -do_available_space_check( - uint64_t req_size, - uint64_t raw_avail_space, - devconfig_t *request, - dlist_t *usable_slices) -{ - int error = 0; - - if (req_size > raw_avail_space) { - error = ENOSPC; - } else { - uint64_t avail_space = 0; - if ((error = get_space_available_for_request(request, - usable_slices, &avail_space)) == 0) { - if (req_size > avail_space) { - error = E2BIG; - } - } - } - - return (error); -} - -/* - * FUNCTION: validate_request(devconfig_t *req) - * - * INPUT: req - a devconfig_t representing a volume layout request. - * - * RETURNS: int - 0 if the request passes validation - * !0 otherwise. - * - * PURPOSE: Main entry point into the layout request semantic - * validatation. - * - * Determines the type of volume requested and invokes the - * appropriate validation functions. - */ -int -validate_request( - devconfig_t *req) -{ - int error = 0; - component_type_t type = TYPE_UNKNOWN; - - ((error = validate_request_avail_unavail(req)) != 0) || - (error = devconfig_get_type(req, &type)); - if (error != 0) { - return (error); - } - - if (type == TYPE_MIRROR) { - - ((error = validate_request_name(req, type)) != 0) || - (error = validate_request_size(req, type)) || - (error = validate_request_submirrors(req)); - - } else if (type == TYPE_CONCAT || type == TYPE_STRIPE) { - - ((error = validate_request_name(req, type)) != 0) || - (error = validate_request_size(req, type)) || - (error = validate_slice_components(req, type)); - - } else if (type == TYPE_HSP) { - - ((error = validate_request_name(req, type)) != 0) || - (error = validate_slice_components(req, type)); - - } else if (type == TYPE_VOLUME) { - - ((error = validate_request_name(req, type)) != 0) || - (error = validate_request_redundancy_level(req)) || - (error = validate_request_npaths(req)); - - } - - return (error); -} - -/* - * FUNCTION: validate_reserved_slices() - * - * RETURNS: int - 0 if all reserved slices are usable in - * new devices. - * !0 otherwise. - * - * PURPOSE: Ensures that each reserved slice is actually usable - * as a volume component. - * - * Retrieves list of reserved slices and list of usable - * slices. Ensures that each reserved slice is in the - * usable list, generates an error if it is not. - * - * This is broken out as a separate function because - * initial validation is using the lists of all known - * devices. Device "usability" is only determined after - * the initial validation has completed successfully. - */ -int -validate_reserved_slices() -{ - dlist_t *reserved_slices; - dlist_t *usable_slices; - int error = 0; - - ((error = get_reserved_slices(&reserved_slices)) != 0) || - (error = get_usable_slices(&usable_slices)); - if (error == 0) { - - dlist_t *iter; - for (iter = reserved_slices; - (iter != NULL) && (error == 0); - iter = iter->next) { - - if (dlist_contains(usable_slices, iter->obj, - compare_descriptor_names) != B_TRUE) { - - dm_descriptor_t slice = (uintptr_t)iter->obj; - char *name = NULL; - - error = get_display_name(slice, &name); - if (error == 0) { - char *aliases = get_device_aliases_string(slice); - if (aliases[0] != NULL) { - volume_set_error( - gettext("A requested volume component " - "is currently in use: \"%s\" " - "(aliases: %s).\n"), - name, aliases); - } else { - volume_set_error( - gettext("A requested volume component " - "is currently in use: \"%s\"\n"), - name); - } - error = -1; - } - } - } - } - - return (error); -} - -/* - * FUNCTION: validate_request_avail_unavail(devconfig_t *req) - * - * INPUT: req - a devconfig_t representing a volume layout request. - * - * RETURNS: int - 0 if the request passes validation - * !0 otherwise. - * - * PURPOSE: validation function for a request's lists of available - * and unavailable devices. - * - * validates that both lists contain names of known devices. - * - * validates that the same name does not appear in both lists. - */ -int -validate_request_avail_unavail( - devconfig_t *req) -{ - dlist_t *avail = NULL; - dlist_t *unavail = NULL; - int error = 0; - - /* check that each array contains valid devices */ - ((error = validate_device_array(devconfig_get_available(req), - gettext("available"), &avail)) != 0) || - (error = validate_device_array(devconfig_get_unavailable(req), - gettext("unavailable"), &unavail)); - - /* check that the arrays don't both contain the same device(s) */ - if (error == 0) { - dlist_t *iter; - for (iter = avail; iter != NULL; iter = iter->next) { - if (dlist_contains(unavail, iter->obj, - compare_descriptor_names) == B_TRUE) { - char *name; - char *aliases = - get_device_aliases_string((uintptr_t)iter->obj); - - (void) get_display_name((uintptr_t)iter->obj, &name); - if (aliases[0] != NULL) { - volume_set_error( - gettext("\"%s\" specified as both available " - "and unavailable.\n" - "It has these aliases: %s\n"), - name, aliases); - } else { - volume_set_error( - gettext("\"%s\" specified as both available " - "and unavailable.\n"), - name); - } - error = -1; - break; - } - } - } - - dlist_free_items(avail, NULL); - dlist_free_items(unavail, NULL); - - return (error); -} - -/* - * FUNCTION: validate_device_array(char **array, char *which, dlist_t **list) - * - * INPUT: array - an array of char * device names - * which - either "available" or "unavailable" - * indicating the array name to use in - * error strings. - * OUTPUT: list - a list of device descriptors corresponding the each - * of the input names. - * - * RETURNS: int - 0 if the array passes validation - * !0 otherwise. - * - * PURPOSE: validation function for a request's list of available - * or unavailable devices. - * - * DID names are converted to CTD names. - * - * The CTD name must be of an available slice, disk or - * HBA, or a known used slice, disk or HBA that was - * discovered when the system's devices were probed. - * - * Any other name is assumed to refer to a device not - * attached to the system and results in a validation - * failure. - * - * Descriptors for validated devices are added to the input - * list. - */ -int -validate_device_array( - char **array, - char *which, - dlist_t **list) -{ - int error = 0; - int i = 0; - - if (array == NULL || *array == NULL) { - return (0); - } - - for (i = 0; (array[i] != NULL) && (error == 0); i++) { - - dm_descriptor_t slice = (dm_descriptor_t)0; - dm_descriptor_t disk = (dm_descriptor_t)0; - dm_descriptor_t hba = (dm_descriptor_t)0; - char *name = array[i]; - - /* name must correspond to a known HBA, disk, or slice */ - if ((error = hba_get_by_name(name, &hba)) == 0) { - if (hba == (dm_descriptor_t)0) { - if ((error = disk_get_by_name(name, &disk)) == 0) { - if (disk == (dm_descriptor_t)0) { - error = slice_get_by_name(name, &slice); - } - } - } - } - - if (error != 0) { - break; - } - - /* 0 sized slices cannot be used as-is, pretend non-existant */ - if (slice != (dm_descriptor_t)0) { - uint64_t size = 0; - if ((error = slice_get_size(slice, &size)) == 0) { - if (size == 0) { - slice = (dm_descriptor_t)0; - } - } - } - - oprintf(OUTPUT_DEBUG, - gettext(" validate %s (%s): s=%llu, d=%llu, c=%llu\n"), - which, array[i], slice, disk, hba); - - if ((error == 0) && ((slice != 0) || (disk != 0) || (hba != 0))) { - - /* name represents an individual "device", add it to the list */ - dm_descriptor_t desc = (dm_descriptor_t)0; - dlist_t *item; - - if (slice != 0) { - desc = slice; - } else if (disk != 0) { - desc = disk; - } else if (hba != 0) { - desc = hba; - } - - if ((item = dlist_new_item((void *)(uintptr_t)desc)) == NULL) { - error = ENOMEM; - } else { - *list = dlist_append(item, *list, AT_HEAD); - } - - } else if (is_ctd_target_name(name) == B_TRUE) { - - /* expand target to all of its disks */ - dlist_t *disks = NULL; - if ((error = get_disks_for_target(name, &disks)) == 0) { - if ((disks == NULL) || (dlist_length(disks) == 0)) { - volume_set_error( - gettext("nonexistent device specified " - "as %s: \"%s\"."), - which, array[i]); - error = -1; - } else { - dlist_t *iter; - for (iter = disks; - (iter != NULL) && (error == 0); - iter = iter->next) { - - dlist_t *item; - if ((item = dlist_new_item(iter->obj)) == NULL) { - error = ENOMEM; - } else { - *list = dlist_append(item, *list, AT_HEAD); - } - } - } - } - - } else { - - /* not a slice, disk, target or ctrl */ - volume_set_error( - gettext("nonexistent device specified " - "as %s: \"%s\"."), - which, array[i]); - error = -1; - } - } - - return (error); -} - -/* - * FUNCTION: validate_request_name(devconfig_t *req, component_type_t type) - * - * INPUT: req - a devconfig_t volume request - * type - the volume type being requested - * - * SIDEEFFECT: if the request specifies a name and the name is valid and - * not currently in use an attempt is made to reserve it. - * if the name has already been reserved by a prior volume - * request, validation fails. - * - * RETURNS: int - 0 if the requested name passes validation - * (or there is no name request) - * !0 otherwise. - * - * PURPOSE: Validation function for a request's volume name. - * - * a HSP name must be valid and reservable. - * - * a volume name must be valid and reservable. - */ -static int -validate_request_name( - devconfig_t *req, - component_type_t type) -{ - char *name = NULL; - char *typestr = devconfig_type_to_str(type); - int error = 0; - - if ((error = devconfig_get_name(req, &name)) != 0) { - if (error != ERR_ATTR_UNSET) { - volume_set_error( - gettext("error getting requested name.\n")); - return (error); - } - /* no name specified */ - return (0); - } - - if (type == TYPE_HSP) { - if (is_hsp_name_valid(name) == 0) { - volume_set_error( - gettext("requested %s name \"%s\" is not valid.\n"), - typestr, name); - error = -1; - } else if (reserve_hsp_name(name) != 0) { - if (is_rsvd_name(name) == B_TRUE) { - volume_set_error( - gettext("requested %s name \"%s\" used " - "previously in this request.\n"), - typestr, name); - } else { - volume_set_error( - gettext("requested %s name \"%s\" is not " - "available.\n"), - typestr, name); - } - error = -1; - } else { - error = add_reserved_name(name); - } - } else { - if (is_volume_name_valid(name) == 0) { - volume_set_error( - gettext("requested %s name \"%s\" is not valid.\n"), - typestr, name); - error = -1; - } else if (is_volume_name_in_range(name) != B_TRUE) { - int max = 0; - (void) get_max_number_of_devices(&max); - volume_set_error( - gettext("requested %s name \"%s\" is not legal.\n" - "Use a name less than d%d.\n"), - typestr, name, max); - error = -1; - } else if (reserve_volume_name(name) != 0) { - if (is_rsvd_name(name) == B_TRUE) { - volume_set_error( - gettext("requested %s name \"%s\" used " - "previously in this request.\n"), - typestr, name); - } else { - volume_set_error( - gettext("requested %s name \"%s\" is not " - "available, a volume with that name " - "already exists.\n"), - typestr, name); - } - error = -1; - } else { - error = add_reserved_name(name); - } - } - - return (error); -} - -/* - * FUNCTION: add_reserved_name(char *name) - * - * INPUT: name - a char * volume name - * - * RETURNS: int - 0 on success - * !0 otherwise. - * - * PURPOSE: Helper which remembers specfically requested names - * in a private list to ensure that the same name isn't - * requested more than once. - */ -static int -add_reserved_name( - char *name) -{ - dlist_t *item = NULL; - - if ((item = dlist_new_item(name)) == NULL) { - return (ENOMEM); - } - - _rsvd_names = dlist_append(item, _rsvd_names, AT_TAIL); - - return (0); -} - -/* - * FUNCTION: is_rsvd_name(char *name) - * - * INPUT: name - a char * volume name - * - * RETURNS: boolean_t - B_TRUE if the requested name is currently - * reserved, B_FALSE otherwise. - * - * PURPOSE: Helper which checks to see if the input volume - * name was previously reserved. - */ -static boolean_t -is_rsvd_name( - char *name) -{ - dlist_t *iter = NULL; - - for (iter = _rsvd_names; iter != NULL; iter = iter->next) { - if ((string_case_compare(name, (char *)iter->obj)) == 0) { - return (B_TRUE); - } - } - - return (B_FALSE); -} - -/* - * FUNCTION: validate_request_size(devconfig_t *req, component_type_t type) - * - * INPUT: req - a devconfig_t volume request - * type - the volume type being requested - * - * RETURNS: int - 0 if the requested size passes validation - * (or there is no size request) - * !0 otherwise. - * - * PURPOSE: Validation function for a request's volume size. - * - * a HSP request can have no size. - * - * a concat, stripe or mirror request may have a size. - * if size is specified, the request cannot also specify - * components. Conversely, if the request does not specify - * a size, it must specify components. - */ -static int -validate_request_size( - devconfig_t *req, - component_type_t type) -{ - uint64_t nbytes = 0; - int error = 0; - - if (type == TYPE_HSP) { - return (0); - } - - if ((error = devconfig_get_size(req, &nbytes)) != 0) { - if (error == ERR_ATTR_UNSET) { - /* nbytes not specified, request must have subcomponents */ - dlist_t *list = devconfig_get_components(req); - if (list != NULL && dlist_length(list) > 0) { - error = 0; - } else { - volume_set_error( - gettext("%s request specifies no size or " - "subcomponents.\n"), - devconfig_type_to_str(type)); - error = -1; - } - } - return (error); - } - - return (error); -} - -/* - * FUNCTION: validate_minimum_size(uint64_t nbytes) - * - * INPUT: nbytes - requested volume size in bytes - * - * RETURNS: int - 0 if the requested size passes validation - * (or there is no size request) - * !0 otherwise. - * - * PURPOSE: Validation function for a request's volume size. - * - * an error is issued if the requested size <= 512K. - */ -static int -validate_minimum_size( - uint64_t nbytes) -{ - static uint64_t min = (512 * 1024) - 1; - int error = 0; - - if (nbytes <= min) { - char *sizestr = NULL; - char *minstr = NULL; - - (void) bytes_to_sizestr( - nbytes, &sizestr, universal_units, B_FALSE); - (void) bytes_to_sizestr( - min, &minstr, universal_units, B_FALSE); - - volume_set_error( - gettext("requested volume size (%s) must be " - "greater than %s.\n"), - sizestr, minstr); - - free(sizestr); - free(minstr); - - error = -1; - } - - return (error); -} - -/* - * FUNCTION: validate_request_redundancy_level(devconfig_t *req) - * - * INPUT: req - a devconfig_t volume request - * - * RETURNS: int - 0 if the requested redundancy level - * passes validation (or none was requested) - * !0 otherwise. - * - * PURPOSE: Validation function for a redundant volume request's - * redundancy level. - * - * If the request specifies redundancy, the value must be - * between 1 and 4. - */ -static int -validate_request_redundancy_level( - devconfig_t *req) -{ - uint16_t rlevel = 0; - int error = 0; - - if ((error = devconfig_get_volume_redundancy_level( - req, &rlevel)) != 0) { - if (error == ERR_ATTR_UNSET) { - error = 0; - } - return (error); - } - - if (rlevel > 4) { - volume_set_error(gettext( - "requested redundancy level must be between 0 and 4.\n")); - error = -1; - } - - return (error); -} - -/* - * FUNCTION: validate_request_npaths(devconfig_t *req) - * - * INPUT: req - a devconfig_t volume request - * - * RETURNS: int - 0 if the requested # of redundant data paths - * passes validation (or none was requested) - * !0 otherwise. - * - * PURPOSE: Validation function for a volume request's number of - * redundant data paths. This value controls the number - * of independent data paths slices components selected - * for the volume should have. - * - * If the request specifies npaths, the value must be - * between 1 and 4 (4 is an arbitrary upper limit, there - * is no known physical limit). - */ -static int -validate_request_npaths( - devconfig_t *req) -{ - uint16_t npaths = 0; - uint16_t minpaths = 1; - uint16_t maxpaths = 4; - - int error = 0; - - if ((error = devconfig_get_volume_npaths(req, &npaths)) != 0) { - if (error == ERR_ATTR_UNSET) { - error = 0; - } - return (error); - } - - if (npaths < minpaths || npaths > maxpaths) { - volume_set_error( - gettext("requested number of redundant paths must be " - "between %d and %d.\n"), minpaths, maxpaths); - error = -1; - } - - - if ((npaths > 1) && (is_mpxio_enabled() != B_TRUE)) { - volume_set_error( - gettext("requested number of redundant paths (%d) cannot " - "be provided, MPXIO is not enabled on this " - "system."), - npaths); - error = -1; - } - - return (error); -} - -/* - * FUNCTION: validate_request_submirrors(devconfig_t *req) - * - * INPUT: req - a devconfig_t volume request - * - * RETURNS: int - 0 if the requested mirror's submirrors - * pass validation - * !0 otherwise. - * - * PURPOSE: Validation function for a mirror volume request's - * explicitly specified submirror components. - * - * Items to check: - * a. submirror types - * b. submirror number - * c. submirror sizes - */ -static int -validate_request_submirrors( - devconfig_t *req) -{ - dlist_t *submirrors = NULL; - int error = 0; - - submirrors = devconfig_get_components(req); - - ((error = validate_submirror_types(submirrors)) != 0) || - (error = validate_submirror_number(req, submirrors)) || - (error = validate_submirror_sizes(req, submirrors)); - - return (error); -} - -/* - * FUNCTION: validate_submirror_types(dlist_t *subs) - * - * INPUT: subs - a list of submirror requests - * - * RETURNS: int - 0 if the requested submirrors - * pass validation - * !0 otherwise. - * - * PURPOSE: Validation function for a mirror volume request's - * explicitly specified submirror components. - * - * Checks that each requested submirror request - * is for a concat or stripe. - */ -static int -validate_submirror_types( - dlist_t *submirrors) -{ - dlist_t *iter; - int error = 0; - - /* specified submirrors must be stripes or concats */ - for (iter = submirrors; - (iter != NULL) && (error == 0); - iter = iter->next) { - - devconfig_t *submir = (devconfig_t *)iter->obj; - component_type_t submirtype = TYPE_UNKNOWN; - - if ((error = devconfig_get_type(submir, &submirtype)) != 0) { - volume_set_error( - gettext("failed to get requested component type.\n")); - break; - } - - if (submirtype != TYPE_CONCAT && submirtype != TYPE_STRIPE) { - volume_set_error( - gettext("requested submirror type \"%s\" " - "is not valid.\n"), - devconfig_type_to_str(submirtype)); - error = -1; - break; - } - } - - return (error); -} - -/* - * FUNCTION: validate_submirror_number(devconfig_t *req, dlist_t *subs) - * - * INPUT: req - the mirror request - * subs - the list of requested submirrors - * - * RETURNS: int - 0 if the requested submirrors - * pass validation - * !0 otherwise. - * - * PURPOSE: Validation function for a mirror volume request's - * explicitly specified submirror components. - * - * Checks that the number of submirror components - * that have been specified matches the number of - * submirrors specified. - */ -static int -validate_submirror_number( - devconfig_t *req, - dlist_t *submirrors) -{ - uint16_t nsubs = 0; - int error = 0; - - if ((error = devconfig_get_mirror_nsubs(req, &nsubs)) != 0) { - if (error == ERR_ATTR_UNSET) { - /* not specified */ - error = 0; - } - } else if ((submirrors != NULL) && - (dlist_length(submirrors) != nsubs)) { - volume_set_error( - gettext("the requested number of submirrors (%d) differs " - "from the number of specified submirrors (%d).\n"), - nsubs, dlist_length(submirrors)); - error = -1; - } - - return (error); -} - -/* - * FUNCTION: validate_submirror_sizes(devconfig_t *req, - * dlist_t *submirrors) - * - * INPUT: req - the mirror request - * submirrors - the list of requested submirrors - * - * RETURNS: int - 0 if the requested submirrors - * pass validation - * !0 otherwise. - * - * PURPOSE: Validation function for a mirror volume request's - * explicitly specified size. Assumes that the mirror's size - * has been validated by validate_request_size(). - * - * Compares explicitly requested mirror size against specified - * component sizes and checks: - * - * - any submirror request that specifies both size and - * components is invalid - * - any submirror request specifying a size different - * than that explictly requested for the mirror is - * invalid - * - a submirror request specifying a size < 512K is invalid. - * - * Other validation/warnings: - * - * - submirrors that specify components may end up with - * usable capacity that differs from what was specified - * for the mirror. - * - * - submirrors which specify neither size nor components are - * assumed to be the size requested for the mirror. If the - * mirror size is not specified, the first explicit size for - * a submirror is assumed as the size for the mirror. - */ -static int -validate_submirror_sizes( - devconfig_t *req, - dlist_t *submirrors) -{ - dlist_t *submirs_with_size = NULL; - dlist_t *submirs_with_comps = NULL; - dlist_t *submirs_with_nothing = NULL; - - dlist_t *iter = NULL; - uint64_t mirror_size = 0; - uint64_t assumed_size = 0; - int error = 0; - - if (submirrors == NULL || dlist_length(submirrors) == 0) { - return (0); - } - - if ((error = devconfig_get_size(req, &mirror_size)) != 0) { - if (error == ERR_ATTR_UNSET) { - error = 0; - } else { - return (error); - } - } - - /* - * check size and component for each submirror, - * collect those that specify size, components or neither - * into separate lists. - */ - for (iter = submirrors; - (iter != NULL) && (error == 0); - iter = iter->next) { - - devconfig_t *submir = (devconfig_t *)iter->obj; - - error = validate_submirror_size_and_components(submir, - mirror_size, &assumed_size, &submirs_with_size, - &submirs_with_comps, &submirs_with_nothing); - - } - - if (error == 0) { - - int n_size = dlist_length(submirs_with_size); - int n_comp = dlist_length(submirs_with_comps); - int n_none = dlist_length(submirs_with_nothing); - - if ((n_size != 0) && (n_comp != 0)) { - /* some submirrors specified size, some components */ - oprintf(OUTPUT_TERSE, - gettext(" *** warning: %d submirrors are specified " - "by size, %d specified by components.\n" - " The resulting mirror capacity will be " - "that of the smallest submirror.\n"), - n_size, n_comp); - } - - if (n_none != 0) { - if (assumed_size != 0) { - /* some submirrors specified neither size or components */ - char *sizestr = NULL; - - (void) bytes_to_sizestr( - assumed_size, &sizestr, universal_units, B_FALSE); - - oprintf(OUTPUT_TERSE, - gettext(" *** warning: %d submirrors specified " - "neither size or components,\n" - " the assumed size is %s.\n"), - n_none, sizestr); - - free(sizestr); - - } else if (mirror_size == 0) { - volume_set_error( - gettext("no size specified for requested " - "mirror and no sizes/components " - "specified for its submirrors.")); - - error = -1; - } - } - - dlist_free_items(submirs_with_size, NULL); - dlist_free_items(submirs_with_comps, NULL); - dlist_free_items(submirs_with_nothing, NULL); - - } - - return (error); -} - -/* - * FUNCTION: validate_submirror_size_and_components( - * devconfig_t *submir, - * uint64_t mirror_size, - * uint64_t *assumed_size, - * dlist_t **submirs_with_size, - * dlist_t **submirs_with_comps, - * dlist_t **submirs_no_size_or_comps) - * - * INPUT: submir - a specific submirror request - * mirror_size, - the size specified for the mirror - * - * OUTPUT: assumed_size - the assumed size of the mirror, - * if none specified. - * submirs_with_size - pointer to a list of submirror - * requests that specify a size - * submirs_with_comps - pointer to a list of submirror - * requests that specify components - * submirs_no_size_or_comps - pointer to a list of - * submirror requests that specify neither - * a size or components - * - * RETURNS: int - 0 if the requested submirrors - * pass validation - * !0 otherwise. - * - * PURPOSE: Validation function which checks a specific submirror - * request's size and components against the parent mirror's - * size. - * - * - any submirror request that specifies both size and - * components is invalid - * - any submirror request specifying a size different - * than that explictly requested for the mirror is - * invalid - * - a submirror request specifying a size < 512K is invalid. - * - any components specified for a submirror are validated. - * - * If the submirror passes the validation checks, it is added - * to the appropriate output list. - * - * If the input mirror_size is 0 and the submirror specifies - * a valid size, the submirror size is returned as the - * assumed_size for the mirror. - */ -static int -validate_submirror_size_and_components( - devconfig_t *submir, - uint64_t mirror_size, - uint64_t *assumed_size, - dlist_t **submirs_with_size, - dlist_t **submirs_with_comps, - dlist_t **submirs_no_size_or_comps) -{ - uint64_t submir_size = 0; - component_type_t submir_type = TYPE_UNKNOWN; - char *submir_typestr = NULL; - dlist_t *submir_comps = NULL; - dlist_t *item = NULL; - int n_submir_comps = 0; - int error = 0; - - submir_comps = devconfig_get_components(submir); - if (submir_comps != NULL) { - n_submir_comps = dlist_length(submir_comps); - } - - if ((error = devconfig_get_size(submir, &submir_size)) != 0) { - if (error == ERR_ATTR_UNSET) { - /* submirror size not specified */ - error = 0; - submir_size = 0; - } - } - - if (error != 0) { - return (error); - } - - /* submirror type previously validated */ - (void) devconfig_get_type(submir, &submir_type); - submir_typestr = devconfig_type_to_str(submir_type); - - if (submir_size == 0) { - - /* submirror has no size, components? */ - if (n_submir_comps > 0) { - - /* validate components */ - error = validate_slice_components(submir, submir_type); - - item = dlist_new_item((void *)submir); - if (item == NULL) { - error = ENOMEM; - } else { - *submirs_with_comps = - dlist_append(item, *submirs_with_comps, AT_TAIL); - } - - } else { - - /* no size or components */ - item = dlist_new_item((void *)submir); - if (item == NULL) { - error = ENOMEM; - } else { - *submirs_no_size_or_comps = - dlist_append(item, *submirs_no_size_or_comps, AT_TAIL); - } - - } - - } else { - - /* submirror has size, check it */ - if (error == 0) { - error = validate_minimum_size(submir_size); - } - - /* check size against mirror's size */ - if ((error == 0) && (submir_size != mirror_size)) { - - if (mirror_size != 0) { - - /* sizes differ */ - char *sizestr = NULL; - char *mstr = NULL; - - (void) bytes_to_sizestr( - submir_size, &sizestr, universal_units, B_FALSE); - (void) bytes_to_sizestr( - mirror_size, &mstr, universal_units, B_FALSE); - - volume_set_error( - gettext("the requested submirror size (%s) " - "differs from the requested mirror " - "size (%s).\n"), - sizestr, mstr); - - error = -1; - - free(sizestr); - free(mstr); - - } else if (*assumed_size == 0) { - - /* first size assumed as mirror size */ - char *sizestr = NULL; - - (void) bytes_to_sizestr( - submir_size, &sizestr, universal_units, B_FALSE); - - oprintf(OUTPUT_TERSE, - gettext(" *** warning, using first " - "explicit submirror size (%s)\n" - " as the mirror size\n"), - sizestr); - - *assumed_size = submir_size; - - free(sizestr); - - } else if (submir_size != *assumed_size) { - - /* submirror sizes differ */ - char *sizestr1 = NULL; - char *sizestr2 = NULL; - - (void) bytes_to_sizestr( - submir_size, &sizestr1, universal_units, B_FALSE); - (void) bytes_to_sizestr( - *assumed_size, &sizestr2, universal_units, B_FALSE); - - volume_set_error( - gettext("submirror specifies different " - "size (%s) than a previous " - "submirror (%s)\n"), - sizestr1, sizestr2); - - free(sizestr1); - free(sizestr2); - - error = -1; - } - } - - if ((error == 0) && (n_submir_comps > 0)) { - - /* size and subcomponents specified */ - char *sizestr = NULL; - - (void) bytes_to_sizestr( - submir_size, &sizestr, universal_units, B_FALSE); - - volume_set_error( - gettext("%s submirror specifies both an " - "explicit size (%s) and components.\n"), - submir_typestr, sizestr); - - free(sizestr); - error = -1; - - } - - if (error == 0) { - item = dlist_new_item((void *)submir); - if (item == NULL) { - error = ENOMEM; - } else { - *submirs_with_size = - dlist_append(item, *submirs_with_size, AT_TAIL); - } - } - } - - return (error); -} - - -/* - * FUNCTION: validate_slice_components(devconfig_t *req, - * component_type_t type) - * - * INPUT: req - the request - * type - the type of volume being requested - * - * SIDEEFFECT: if the slice component is otherwise valid, an attempt is made - * to reserve it. - * - * RETURNS: int - 0 if the request passes slice component validation - * !0 otherwise. - * - * PURPOSE: Validation function for a concat, stripe or HSP request's - * explicitly specified slice components. - * - * Is the component slice a known device - * Is the component slice available - * Is the component slice already reserved - * - * If the request is for a stripe or concat and the - * request specifies an explicit size, it cannot also - * specify component slices. This is a validation failure. - * - * If the request is for a stripe, the number of specified - * slice components must agree with any expilcit specification - * of the minimum or maximum number of components the stripe - * should have. - */ -static int -validate_slice_components( - devconfig_t *req, - component_type_t type) -{ - dlist_t *list = NULL; - dlist_t *iter = NULL; - int error = 0; - int ncomp = 0; - - char *dsname = get_request_diskset(); - char *voltype = devconfig_type_to_str(type); - - list = devconfig_get_components(req); - - for (iter = list; (iter != NULL) && (error == 0); iter = iter->next) { - - devconfig_t *comp = (devconfig_t *)iter->obj; - component_type_t ctype = TYPE_UNKNOWN; - char *cname = NULL; - dm_descriptor_t slice = (dm_descriptor_t)0; - - if ((error = devconfig_get_type(comp, &ctype)) != 0) { - volume_set_error( - gettext("error getting requested component type."), - voltype); - - continue; - } - - if ((error = devconfig_get_name(comp, &cname)) != 0) { - volume_set_error( - gettext("error getting requested component name.")); - - continue; - } - - if (cname == NULL || cname[0] == '\0') { - volume_set_error( - gettext("%s requested component has no name."), - voltype); - - error = -1; - continue; - } - - if (ctype == TYPE_SLICE) { - - boolean_t in_set = B_FALSE; - boolean_t is_avail = B_FALSE; - boolean_t is_rsvd = B_FALSE; - dm_descriptor_t disk = (dm_descriptor_t)0; - - /* is the slice known and explicitly available? */ - if ((error = slice_is_available(cname, req, - &is_avail)) != 0) { - - if (error == ENODEV) { - volume_set_error( - gettext("%s requested component does not " - "exist: \"%s\"."), - voltype, cname); - error = -1; - } - continue; - } - - if (is_avail != B_TRUE) { - volume_set_error( - gettext("%s requested component is " - "unavailable: \"%s\"."), - voltype, cname); - - error = -1; - continue; - } - - /* get slice and its disk */ - ((error = slice_get_by_name(cname, &slice)) != 0) || - (error = slice_get_disk(slice, &disk)) || - (error = is_reserved_slice(slice, &is_rsvd)) || - (error = is_disk_in_diskset(disk, dsname, &in_set)); - if (error != 0) { - continue; - } - - /* is disk in the set? */ - if (in_set != B_TRUE) { - volume_set_error( - gettext("%s specifies a component not in " - "disk set \"%s\": \"%s\"."), - voltype, dsname, cname); - - error = -1; - continue; - } - - /* was slice specified in some other request? */ - if (is_rsvd == B_TRUE) { - /* include aliases in the error */ - char *aliases = - get_device_aliases_string((dm_descriptor_t)slice); - - if (aliases[0] != NULL) { - volume_set_error( - gettext("%s specifies a previously used " - "component: \"%s\" " - "(aliases: %s).\n"), - voltype, cname, aliases); - } else { - volume_set_error( - gettext("%s specifies a previously used " - "component: \"%s\"\n"), - voltype, cname); - } - - error = -1; - continue; - } - - /* component is ok, reserve it */ - error = add_reserved_slice(slice); - - /* - * the reserved slice component still needs to be - * checked against slices in use by SVM, but that - * information isn't available yet: the usable - * slice derivation happens after validation. - * - * validate_reserved_slices() can be used to check - * them once the usable slices are determined. - */ - - } else { - volume_set_error( - gettext("%s requested component has illegal type."), - voltype); - - error = -1; - continue; - } - } - - if (error != 0) { - return (error); - } - - ncomp = dlist_length(list); - if ((ncomp > 0) && (type == TYPE_CONCAT || type == TYPE_STRIPE)) { - /* explicit size requested for the stripe/concat? */ - uint64_t size = 0; - if ((error = devconfig_get_size(req, &size)) != 0) { - if (error == ERR_ATTR_UNSET) { - error = 0; - } - } else { - /* size and components both specified */ - char *sizestr = NULL; - - (void) bytes_to_sizestr( - size, &sizestr, universal_units, B_FALSE); - - volume_set_error( - gettext("%s specifies both an explicit size (%s) " - "and components."), - voltype, sizestr); - - free(sizestr); - error = -1; - } - } - - if (error != 0) { - return (error); - } - - if ((ncomp > 0) && (type == TYPE_STRIPE)) { - /* does # of components agree with min & max comps? */ - uint16_t min = 0; - uint16_t max = 0; - if ((error = devconfig_get_stripe_mincomp(req, &min)) != 0) { - if (error == ERR_ATTR_UNSET) { - /* min comp not requested */ - error = 0; - } else { - /* error getting requested mincomp */ - return (error); - } - - } else if (ncomp < min) { - /* specified comps < requested mincomp */ - volume_set_error( - gettext("%s specifies fewer components (%d) than the " - "minimum number requested (%d).\n"), - voltype, ncomp, min); - - error = -1; - return (error); - } - - if ((error = devconfig_get_stripe_maxcomp(req, &max)) != 0) { - if (error == ERR_ATTR_UNSET) { - /* max comp not requested */ - error = 0; - } else { - /* error getting request maxcomp */ - return (error); - } - } else if (ncomp > max) { - /* specified comps > requested maxcomp */ - volume_set_error( - gettext("%s specifies more components (%d) than the " - "maximum number requested (%d).\n"), - voltype, ncomp, max); - error = -1; - return (error); - } - } - - return (error); -} - -/* - * Generate a list of known aliases for the input descriptor. - * - * The returned string buffer is in the form: "alias1", "alias2"... - */ -static char * -get_device_aliases_string( - dm_descriptor_t desc) -{ - static char buf[BUFSIZ]; - dlist_t *aliases = NULL; - dlist_t *iter = NULL; - - buf[0] = '\0'; - (void) get_aliases(desc, &aliases); - for (iter = aliases; iter != NULL; iter = iter->next) { - if (*buf == '\0') { - (void) snprintf(buf, BUFSIZ-1, "\"%s\"", (char *)iter->obj); - } else { - char tmp[BUFSIZ]; - (void) strcpy(buf, tmp); - (void) snprintf(buf, BUFSIZ-1, "%s, \"%s\"", - tmp, (char *)iter->obj); - } - } - dlist_free_items(aliases, free); - - return (buf); -} diff --git a/usr/src/cmd/lvm/metassist/layout/layout_validate.h b/usr/src/cmd/lvm/metassist/layout/layout_validate.h deleted file mode 100644 index f4af5e7839fe..000000000000 --- a/usr/src/cmd/lvm/metassist/layout/layout_validate.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _LAYOUT_VALIDATE_H -#define _LAYOUT_VALIDATE_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#ifdef __cplusplus -extern "C" { -#endif - -#include "volume_devconfig.h" - -extern int validate_request(devconfig_t *req); - -extern int validate_request_sizes(devconfig_t *req); - -extern int validate_request_avail_unavail(devconfig_t *req); - -extern int validate_reserved_slices(); - -extern int release_validation_caches(); - -extern int validate_basic_svm_config(); - -#ifdef __cplusplus -} -#endif - -#endif /* _LAYOUT_VALIDATE_H */ diff --git a/usr/src/cmd/lvm/metassist/scripts/Makefile b/usr/src/cmd/lvm/metassist/scripts/Makefile deleted file mode 100644 index 92eff55f20c7..000000000000 --- a/usr/src/cmd/lvm/metassist/scripts/Makefile +++ /dev/null @@ -1,40 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2003 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# ident "%Z%%M% %I% %E% SMI" -# - -METASSIST_TOPLEVEL = .. - -include $(METASSIST_TOPLEVEL)/../../Makefile.cmd -include $(METASSIST_TOPLEVEL)/Makefile.env - -SHFILES = errifoutput - -CLEANFILES += $(SHFILES) - -all: $(SHFILES) - -include $(METASSIST_TOPLEVEL)/Makefile.targ diff --git a/usr/src/cmd/lvm/metassist/scripts/errifoutput.sh b/usr/src/cmd/lvm/metassist/scripts/errifoutput.sh deleted file mode 100644 index cab32d4ab3ec..000000000000 --- a/usr/src/cmd/lvm/metassist/scripts/errifoutput.sh +++ /dev/null @@ -1,48 +0,0 @@ -#!/bin/sh -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2003 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# ident "%Z%%M% %I% %E% SMI" -# -# Runs the command passed as arguments, echoes the output to stderr. -# -# Exits with 0 (success) if the command exits with 0 and has no -# output. -# -# Exits with 1 (failure) if the command exits with 0 and has output. -# -# Exits with the exit code of the command if it exits with a non-zero -# exit code. - -output=`"$@" 2>&1` -result=$? - -if [ -n "$output" ] -then - echo "$output" >&2 - test $result = 0 && result=1 -fi - -exit $result diff --git a/usr/src/cmd/lvm/metassist/sysfiles/Makefile b/usr/src/cmd/lvm/metassist/sysfiles/Makefile deleted file mode 100644 index c6b297cecb0b..000000000000 --- a/usr/src/cmd/lvm/metassist/sysfiles/Makefile +++ /dev/null @@ -1,48 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2005 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# ident "%Z%%M% %I% %E% SMI" -# - -METASSIST_TOPLEVEL = .. - -# Files to be copied to /etc/default -DEFAULTFILES = \ - metassist.xml.dfl - -include $(METASSIST_TOPLEVEL)/../../Makefile.cmd -include $(METASSIST_TOPLEVEL)/Makefile.env - -# Files to be copied to /usr/share/lib/xml/dtd -DTDFILES = \ - volume-config.dtd \ - volume-defaults.dtd \ - volume-request.dtd - -# Files to be copied to /usr/share/lib/xml/style -STYLEFILES = \ - volume-command.xsl - -include $(METASSIST_TOPLEVEL)/Makefile.targ diff --git a/usr/src/cmd/lvm/metassist/sysfiles/metassist.xml.dfl b/usr/src/cmd/lvm/metassist/sysfiles/metassist.xml.dfl deleted file mode 100644 index 3fae19817418..000000000000 --- a/usr/src/cmd/lvm/metassist/sysfiles/metassist.xml.dfl +++ /dev/null @@ -1,37 +0,0 @@ - - - - - - - diff --git a/usr/src/cmd/lvm/metassist/sysfiles/volume-command.xsl b/usr/src/cmd/lvm/metassist/sysfiles/volume-command.xsl deleted file mode 100644 index 1d669ab4201c..000000000000 --- a/usr/src/cmd/lvm/metassist/sysfiles/volume-command.xsl +++ /dev/null @@ -1,706 +0,0 @@ - - - - - - - - en - - - - - - - - - - - #!/bin/sh - -# -# - - Environment - - - -# - -# - - Amend PATH - - - -PATH="/usr/sbin:/usr/bin:$PATH" -export PATH - -# - - Disk set name - - - - - diskset=' - - - - ' - - -# -# - - Functions - - - -# - -# - - Echo (verbose) and exec given command, exit on error - - - -execho () { - test -n "$verbose" && echo "$@" - "$@" || exit -} - -# - - Get full /dev/rdsk path of given slice - - - -fullpath () { - case "$1" in - /dev/dsk/*|/dev/did/dsk/*) echo "$1" | sed 's/dsk/rdsk/' ;; - /*) echo "$1" ;; - *) echo /dev/rdsk/"$1" ;; - esac -} - -# - - Run fmthard, ignore partboot error, error if output - - - -fmthard_special () { - ignore='Error writing partboot' - out=`fmthard "$@" 2>&1` - result=$? - echo "$out" | - case "$out" in - *"$ignore"*) grep -v "$ignore"; return 0 ;; - '') return "$result" ;; - *) cat; return 1 ;; - esac >&2 -} - -# -# - - Main - - - -# - -# - - Verify root - - - -if [ "`id | sed 's/^[^(]*(\([^)]*\).*/\1/'`" != root ] -then - echo " - - - This script must be run as root. - - - " >&2 - exit 1; -fi - -# - - Check for verbose option - - - -case "$1" in - -v) verbose=1 ;; - *) verbose= ;; -esac - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # - - Does the disk set exist? - - - -if metaset -s "$diskset" >/dev/null 2>&1 -then - # - - Take control of disk set - - - - execho metaset -s "$diskset" -t -else - # - - Create the disk set - - - - autotakeargs= - /usr/sbin/clinfo || autotakeargs='-A enable' - execho metaset -s "$diskset" $autotakeargs -a -h `uname -n | cut -f1 -d.` -fi - - - - - - - - # - - Add disks to set - - - - - - - execho metaset -s "$diskset" -a {1} - - - - - - - - - - - - # - - Format slices - - - - - - - - - - execho fmthard_special -d {1}:{5}:0:{2}:{3} `fullpath {4}` - - - - - - - - - 0 - - - - - - - - - - - - 0 - - 4 - - - - - - - - - - - - - - - - # - - - - Create {1} {2} - - - - - - - - - - - - - - # - - - - Does {1} exist? - - - - - - - - metahs -s "$diskset" -i {1} >/dev/null 2>&1 || { - - - - - - - - - - - execho metainit -s "$diskset" {1} - - - - } # - - - - Add slices to {1} - - - - - - - - - - execho metahs -s "$diskset" -a {1} {2} - - - - - - - - - - - - - - - - - - - - - - execho metainit -s "$diskset" - - - - - - 1 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 1 - - - - - - - - - - - - # - - - - Associate {1} {2} with hot spare pool {3} - - - - - - - - - - execho metaparam -s "$diskset" -h {1} {2} - - - - - - - - - - - - - - - - - - - - - execho metainit -s "$diskset" {1} -m {2} - - - - - - - - - -g - - - - - -r - - - - - - -S - - - - - - - - - - - - - - - execho metattach -s "$diskset" {1} {2} - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - {1} - - - - {2} - - - - {3} - - - - {4} - - - - {5} - - - - - - - Beispielanzeige - - - diff --git a/usr/src/cmd/lvm/metassist/sysfiles/volume-config.dtd b/usr/src/cmd/lvm/metassist/sysfiles/volume-config.dtd deleted file mode 100644 index 979e6e0e59a6..000000000000 --- a/usr/src/cmd/lvm/metassist/sysfiles/volume-config.dtd +++ /dev/null @@ -1,163 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/usr/src/cmd/lvm/metassist/sysfiles/volume-defaults.dtd b/usr/src/cmd/lvm/metassist/sysfiles/volume-defaults.dtd deleted file mode 100644 index 42e023e11607..000000000000 --- a/usr/src/cmd/lvm/metassist/sysfiles/volume-defaults.dtd +++ /dev/null @@ -1,82 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/usr/src/cmd/lvm/metassist/sysfiles/volume-request.dtd b/usr/src/cmd/lvm/metassist/sysfiles/volume-request.dtd deleted file mode 100644 index f4b3f9e68f6f..000000000000 --- a/usr/src/cmd/lvm/metassist/sysfiles/volume-request.dtd +++ /dev/null @@ -1,388 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/usr/src/cmd/lvm/metassist/xml/Makefile b/usr/src/cmd/lvm/metassist/xml/Makefile deleted file mode 100644 index 6d2a3217973e..000000000000 --- a/usr/src/cmd/lvm/metassist/xml/Makefile +++ /dev/null @@ -1,51 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# - -METASSIST_TOPLEVEL = .. - -SRCS = xml_convert.c -OBJS = $(SRCS:%.c=%.o) -HDRS = $(SRCS:%.c=%.h) -MSGFILES = $(SRCS:%.c=%.i) - -include $(METASSIST_TOPLEVEL)/../../Makefile.cmd -include $(METASSIST_TOPLEVEL)/Makefile.env - -INCLUDES += -I$(ADJUNCT_PROTO)/usr/include/libxml2 -I../common -CFLAGS += $(INCLUDES) - -POFILE = xmlp.po - -include $(METASSIST_TOPLEVEL)/Makefile.targ - -# Build .po file from message files -$(POFILE): $(MSGFILES) - $(BUILDPO.msgfiles) - -cstyle: - $(CSTYLE) $(CSTYLE_FLAGS) $(SRCS) $(HDRS) - -hdrchk: - $(HDRCHK) $(HDRCHK_FLAGS) $(HDRS) diff --git a/usr/src/cmd/lvm/metassist/xml/xml_convert.c b/usr/src/cmd/lvm/metassist/xml/xml_convert.c deleted file mode 100644 index fb2ef57e60eb..000000000000 --- a/usr/src/cmd/lvm/metassist/xml/xml_convert.c +++ /dev/null @@ -1,2311 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include "xml_convert.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "volume_error.h" -#include "volume_output.h" -#include "volume_string.h" - -/* - * IDs for localized messages in the generated command script - */ - -#define CMD_MSG_ENVIRONMENT "Environment" -#define CMD_MSG_AMEND_PATH "Amend PATH" -#define CMD_MSG_DISK_SET_NAME "Disk set name" -#define CMD_MSG_FUNCTIONS "Functions" -/* CSTYLED */ -#define CMD_MSG_ECHO_AND_EXEC "Echo (verbose) and exec given command, exit on error" -#define CMD_MSG_GET_FULL_PATH "Get full /dev/rdsk path of given slice" -/* CSTYLED */ -#define CMD_MSG_FMTHARD_SPECIAL "Run fmthard, ignore partboot error, error if output" -#define CMD_MSG_MAIN "Main" -#define CMD_MSG_VERIFY_ROOT "Verify root" -#define CMD_MSG_RUN_AS_ROOT "This script must be run as root." -#define CMD_MSG_CHECK_FOR_VERBOSE "Check for verbose option" -#define CMD_MSG_DOES_DISK_SET_EXIST "Does the disk set exist?" -#define CMD_MSG_TAKE_DISK_SET "Take control of disk set" -#define CMD_MSG_CREATE_THE_DISK_SET "Create the disk set" -#define CMD_MSG_ADD_DISKS_TO_SET "Add disks to set" -#define CMD_MSG_FORMAT_SLICES "Format slices" -#define CMD_MSG_CREATE "Create {1} {2}" -#define CMD_MSG_DOES_EXIST "Does {1} exist?" -#define CMD_MSG_ADD_SLICES_TO "Add slices to {1}" -/* CSTYLED */ -#define CMD_MSG_ASSOCIATE_WITH_HSP "Associate {1} {2} with hot spare pool {3}" - -/* - * ****************************************************************** - * - * Data types - * - * ****************************************************************** - */ - -/* - * Encapsulates the parsing of an XML attribute - */ -typedef struct { - - /* The name of the attribute */ - char *name; - - /* - * A function to validate and set the XML attribute value in - * the given devconfig_t structure. - * - * @param name - * the name of the XML attribute - * - * @param value - * the value of the XML attribute - * - * @return 0 if the given value was valid and set - * successfully, non-zero otherwise. - */ - int (*validate_set)(devconfig_t *device, char *name, char *value); - - /* - * A function to get the XML attribute value in the given - * devconfig_t structure. - * - * @param name - * the name of the XML attribute - * - * @param value - * the value of the XML attribute - * - * @return 0 if the given value was retrieved - * successfully, non-zero otherwise. - */ - int (*get_as_string)(devconfig_t *device, char *name, char **value); -} attr_t; - -/* - * Encapsulates the parsing of an XML element - */ -typedef struct { - /* The name of the element */ - char *name; - - /* The type of element to set in the devconfig_t */ - component_type_t type; - - /* - * When converting from XML to a devconfig_t hierarchy, - * indicates whether to create a new devconfig_t structure in - * the hierarchy when this XML element is encountered. - */ - boolean_t is_hierarchical; - - /* - * If is_hierarchical is B_TRUE, whether to use an existing - * devconfig_t structure of this type when this element is - * encountered - */ - boolean_t singleton; - - /* The valid XML attributes for this element */ - attr_t *attributes; -} element_t; - -typedef struct { - char *msgid; - char *message; -} l10nmessage_t; - -/* - * ****************************************************************** - * - * Function prototypes - * - * ****************************************************************** - */ - -static int validate_doc(xmlDocPtr doc, const char *name, const char *systemID); -static int devconfig_to_xml( - xmlNodePtr parent, element_t elements[], devconfig_t *device); -static int xml_to_devconfig( - xmlNodePtr cur, element_t elements[], devconfig_t *device); -static int compare_is_a_diskset(void *obj1, void *obj2); -static xmlNodePtr xml_find_node( - xmlNodePtr node, xmlChar *element, xmlChar *name); -static xmlDocPtr create_localized_message_doc(); -static int create_localized_message_file(char **tmpfile); -static int strtobool(char *str, boolean_t *value); -static int ofprintf_terse(void *unused, char *fmt, ...); -static int ofprintf_verbose(void *unused, char *fmt, ...); - -static int validate_set_size( - devconfig_t *volume, char *attr, char *value); -static int validate_set_size_in_blocks( - devconfig_t *slice, char *attr, char *value); -static int validate_set_diskset_name( - devconfig_t *diskset, char *attr, char *name); -static int validate_add_available_name( - devconfig_t *device, char *attr, char *name); -static int validate_add_unavailable_name( - devconfig_t *device, char *attr, char *name); -static int validate_set_hsp_name( - devconfig_t *hsp, char *attr, char *name); -static int validate_set_disk_name( - devconfig_t *disk, char *attr, char *name); -static int validate_set_slice_name( - devconfig_t *slice, char *attr, char *name); -static int validate_set_slice_start_block( - devconfig_t *slice, char *attr, char *value); -static int validate_set_volume_name( - devconfig_t *volume, char *attr, char *name); -static int validate_set_stripe_interlace( - devconfig_t *stripe, char *attr, char *value); -static int validate_set_stripe_mincomp( - devconfig_t *stripe, char *attr, char *value); -static int validate_set_stripe_maxcomp( - devconfig_t *stripe, char *attr, char *value); -static int validate_set_volume_usehsp( - devconfig_t *volume, char *attr, char *value); -static int validate_set_mirror_nsubmirrors( - devconfig_t *mirror, char *attr, char *value); -static int validate_set_mirror_read( - devconfig_t *mirror, char *attr, char *value); -static int validate_set_mirror_write( - devconfig_t *mirror, char *attr, char *value); -static int validate_set_mirror_passnum( - devconfig_t *mirror, char *attr, char *value); -static int validate_set_volume_redundancy( - devconfig_t *volume, char *attr, char *value); -static int validate_set_volume_datapaths( - devconfig_t *volume, char *attr, char *value); - -static int get_as_string_name( - devconfig_t *device, char *attr, char **value); -static int get_as_string_mirror_passnum( - devconfig_t *mirror, char *attr, char **value); -static int get_as_string_mirror_read( - devconfig_t *mirror, char *attr, char **value); -static int get_as_string_mirror_write( - devconfig_t *mirror, char *attr, char **value); -static int get_as_string_size_in_blocks( - devconfig_t *device, char *attr, char **value); -static int get_as_string_slice_start_block( - devconfig_t *slice, char *attr, char **value); -static int get_as_string_stripe_interlace( - devconfig_t *stripe, char *attr, char **value); - -/* - * ****************************************************************** - * - * Data - * - * ****************************************************************** - */ - -/* Valid units for the size attribute */ -units_t size_units[] = { - {UNIT_KILOBYTES, BYTES_PER_KILOBYTE}, - {UNIT_MEGABYTES, BYTES_PER_MEGABYTE}, - {UNIT_GIGABYTES, BYTES_PER_GIGABYTE}, - {UNIT_TERABYTES, BYTES_PER_TERABYTE}, - {NULL, 0} -}; - -/* Valid units for the interlace attribute */ -units_t interlace_units[] = { - {UNIT_BLOCKS, BYTES_PER_BLOCK}, - {UNIT_KILOBYTES, BYTES_PER_KILOBYTE}, - {UNIT_MEGABYTES, BYTES_PER_MEGABYTE}, - {NULL, 0} -}; - -/* attributes */ -static attr_t diskset_attrs[] = { - { ATTR_NAME, validate_set_diskset_name, get_as_string_name }, - { NULL, NULL, NULL } -}; - -/* attributes */ -static attr_t available_attrs[] = { - { ATTR_NAME, validate_add_available_name, NULL }, - { NULL, NULL, NULL } -}; - -/* attributes */ -static attr_t unavailable_attrs[] = { - { ATTR_NAME, validate_add_unavailable_name, NULL }, - { NULL, NULL, NULL } -}; - -/* attributes */ -static attr_t hsp_attrs[] = { - { ATTR_NAME, validate_set_hsp_name, get_as_string_name }, - { NULL, NULL, NULL } -}; - -/* attributes */ -static attr_t disk_attrs[] = { - { ATTR_NAME, validate_set_disk_name, get_as_string_name }, - { NULL, NULL, NULL } -}; - -/* attributes */ -static attr_t slice_attrs[] = { - { ATTR_NAME, validate_set_slice_name, get_as_string_name }, - { ATTR_SIZEINBLOCKS, validate_set_size_in_blocks, - get_as_string_size_in_blocks }, - { ATTR_SLICE_STARTSECTOR, validate_set_slice_start_block, - get_as_string_slice_start_block }, - { NULL, NULL, NULL } -}; - -/* attributes */ -static attr_t stripe_attrs[] = { - { ATTR_NAME, validate_set_volume_name, get_as_string_name }, - { ATTR_SIZEINBYTES, validate_set_size, NULL }, - { ATTR_STRIPE_MINCOMP, validate_set_stripe_mincomp, NULL }, - { ATTR_STRIPE_MAXCOMP, validate_set_stripe_maxcomp, NULL }, - { ATTR_STRIPE_INTERLACE, validate_set_stripe_interlace, - get_as_string_stripe_interlace }, - { ATTR_VOLUME_USEHSP, validate_set_volume_usehsp, NULL }, - { NULL, NULL, NULL } -}; - -/* attributes */ -static attr_t concat_attrs[] = { - { ATTR_NAME, validate_set_volume_name, get_as_string_name }, - { ATTR_SIZEINBYTES, validate_set_size, NULL }, - { ATTR_VOLUME_USEHSP, validate_set_volume_usehsp, NULL }, - { NULL, NULL, NULL } -}; - -/* attributes */ -static attr_t mirror_attrs[] = { - { ATTR_NAME, validate_set_volume_name, get_as_string_name }, - { ATTR_MIRROR_NSUBMIRRORS, validate_set_mirror_nsubmirrors, NULL }, - { ATTR_SIZEINBYTES, validate_set_size, NULL }, - { ATTR_MIRROR_READ, validate_set_mirror_read, - get_as_string_mirror_read }, - { ATTR_MIRROR_WRITE, validate_set_mirror_write, - get_as_string_mirror_write }, - { ATTR_MIRROR_PASSNUM, validate_set_mirror_passnum, - get_as_string_mirror_passnum }, - { ATTR_VOLUME_USEHSP, validate_set_volume_usehsp, NULL }, - { NULL, NULL, NULL } -}; - -/* attributes */ -static attr_t volume_attrs[] = { - { ATTR_NAME, validate_set_volume_name, get_as_string_name }, - { ATTR_SIZEINBYTES, validate_set_size, NULL }, - { ATTR_VOLUME_REDUNDANCY, validate_set_volume_redundancy, NULL }, - { ATTR_VOLUME_FAULTRECOVERY, validate_set_volume_usehsp, NULL }, - { ATTR_VOLUME_DATAPATHS, validate_set_volume_datapaths, NULL }, - { NULL, NULL, NULL } -}; - -/* volume-request elements */ -static element_t request_elements[] = { - { ELEMENT_DISKSET, TYPE_DISKSET, B_FALSE, B_FALSE, diskset_attrs }, - { ELEMENT_AVAILABLE, TYPE_UNKNOWN, B_FALSE, B_FALSE, available_attrs }, - { ELEMENT_UNAVAILABLE, TYPE_UNKNOWN, B_FALSE, B_FALSE, - unavailable_attrs }, - { ELEMENT_HSP, TYPE_HSP, B_TRUE, B_FALSE, hsp_attrs }, - { ELEMENT_SLICE, TYPE_SLICE, B_TRUE, B_FALSE, slice_attrs }, - { ELEMENT_STRIPE, TYPE_STRIPE, B_TRUE, B_FALSE, stripe_attrs }, - { ELEMENT_CONCAT, TYPE_CONCAT, B_TRUE, B_FALSE, concat_attrs }, - { ELEMENT_MIRROR, TYPE_MIRROR, B_TRUE, B_FALSE, mirror_attrs }, - { ELEMENT_VOLUME, TYPE_VOLUME, B_TRUE, B_FALSE, volume_attrs }, - { NULL, NULL, B_FALSE, B_FALSE, NULL } -}; - -/* volume-defaults elements */ -static element_t default_elements[] = { - { ELEMENT_DISKSET, TYPE_DISKSET, B_TRUE, B_FALSE, diskset_attrs }, - { ELEMENT_AVAILABLE, TYPE_UNKNOWN, B_FALSE, B_TRUE, available_attrs }, - { ELEMENT_UNAVAILABLE, TYPE_UNKNOWN, B_FALSE, B_TRUE, - unavailable_attrs }, - { ELEMENT_HSP, TYPE_HSP, B_TRUE, B_TRUE, hsp_attrs }, - { ELEMENT_SLICE, TYPE_SLICE, B_TRUE, B_TRUE, slice_attrs }, - { ELEMENT_STRIPE, TYPE_STRIPE, B_TRUE, B_TRUE, stripe_attrs }, - { ELEMENT_CONCAT, TYPE_CONCAT, B_TRUE, B_TRUE, concat_attrs }, - { ELEMENT_MIRROR, TYPE_MIRROR, B_TRUE, B_TRUE, mirror_attrs }, - { ELEMENT_VOLUME, TYPE_VOLUME, B_TRUE, B_TRUE, volume_attrs }, - { NULL, NULL, B_FALSE, B_FALSE, NULL } -}; - -/* volume-config elements */ -static element_t config_elements[] = { - { ELEMENT_DISKSET, TYPE_DISKSET, B_FALSE, B_FALSE, diskset_attrs }, - { ELEMENT_DISK, TYPE_DRIVE, B_TRUE, B_FALSE, disk_attrs }, - { ELEMENT_SLICE, TYPE_SLICE, B_TRUE, B_FALSE, slice_attrs }, - { ELEMENT_HSP, TYPE_HSP, B_TRUE, B_FALSE, hsp_attrs }, - { ELEMENT_STRIPE, TYPE_STRIPE, B_TRUE, B_FALSE, stripe_attrs }, - { ELEMENT_CONCAT, TYPE_CONCAT, B_TRUE, B_FALSE, concat_attrs }, - { ELEMENT_MIRROR, TYPE_MIRROR, B_TRUE, B_FALSE, mirror_attrs }, - { NULL, NULL, B_FALSE, B_FALSE, NULL } -}; - -/* - * ****************************************************************** - * - * External functions - * - * ****************************************************************** - */ - -/* - * Initialize the XML parser, setting defaults across all XML - * routines. - */ -void -init_xml() -{ - /* COMPAT: Do not generate nodes for formatting spaces */ - LIBXML_TEST_VERSION - xmlKeepBlanksDefault(0); - - /* Turn on line numbers for debugging */ - xmlLineNumbersDefault(1); - - /* Substitute entities as files are parsed */ - xmlSubstituteEntitiesDefault(1); - - /* Don't load external entity subsets */ - xmlLoadExtDtdDefaultValue = 0; - - /* Don't validate against DTD by default */ - xmlDoValidityCheckingDefaultValue = 0; - - /* Set up output handlers for XML parsing */ - xmlDefaultSAXHandler.warning = (warningSAXFunc)ofprintf_verbose; - xmlDefaultSAXHandler.error = (errorSAXFunc)ofprintf_terse; - xmlDefaultSAXHandler.fatalError = (fatalErrorSAXFunc)ofprintf_terse; -} - -/* - * Clean up any remaining structures before exiting. - */ -void -cleanup_xml() -{ - xsltCleanupGlobals(); - xmlCleanupParser(); -} - -/* - * Converts a volume-request XML document into a request_t. - * - * @param doc - * an existing volume-request XML document - * - * @param request - * RETURN: a new request_t which must be freed via - * free_request - * - * @return 0 on success, non-zero otherwise. - */ -int -xml_to_request( - xmlDocPtr doc, - request_t **request) -{ - int error = 0; - - *request = NULL; - - /* Validate doc against known DTD */ - if ((error = validate_doc( - doc, ELEMENT_VOLUMEREQUEST, VOLUME_REQUEST_DTD_LOC)) == 0) { - - /* Create a request */ - if ((error = new_request(request)) == 0) { - - /* Convert the XML doc into a request_t */ - error = xml_to_devconfig(xmlDocGetRootElement(doc), - request_elements, request_get_diskset_req(*request)); - } - } - - return (error); -} - -/* - * Converts a volume-defaults XML document into a defaults_t. - * - * @param doc - * an existing volume-defaults XML document - * - * @param defaults - * RETURN: a new defaults_t which must be freed via - * free_defaults - * - * @return 0 on success, non-zero otherwise. - */ -int -xml_to_defaults( - xmlDocPtr doc, - defaults_t **defaults) -{ - int error = 0; - - *defaults = NULL; - - /* Validate doc against known DTD */ - if ((error = validate_doc(doc, ELEMENT_VOLUMEDEFAULTS, - VOLUME_DEFAULTS_DTD_LOC)) == 0) { - - /* Create request defaults */ - if ((error = new_defaults(defaults)) == 0) { - - devconfig_t *global; - - /* Get defaults for all disk sets */ - if ((error = defaults_get_diskset_by_name( - *defaults, NULL, &global)) == 0) { - - /* Populate the global devconfig_t from the XML doc */ - if ((error = xml_to_devconfig(xmlDocGetRootElement(doc), - default_elements, global)) == 0) { - - /* Get the components of the global devconfig_t */ - dlist_t *list = devconfig_get_components(global); - - /* - * Move all named disk set settings out from - * under global settings - */ - /* CONSTANTCONDITION */ - while (1) { - dlist_t *removed = NULL; - devconfig_t *component; - - /* Remove named disk set from under global */ - list = dlist_remove_equivalent_item( - list, NULL, compare_is_a_diskset, &removed); - - if (removed == NULL) { - /* No named disk set found */ - break; - } - - component = removed->obj; - - /* Append named disk set to disk set list */ - defaults_set_disksets(*defaults, - dlist_append(dlist_new_item(component), - defaults_get_disksets(*defaults), AT_TAIL)); - } - } - } - } - } - - return (error); -} - -/* - * Converts a volume-config XML document into a devconfig_t. - * - * @param doc - * an existing volume-config XML document - * - * @param config - * RETURN: a new devconfig_t which must be freed via - * free_devconfig - * - * @return 0 on success, non-zero otherwise. - */ -int -xml_to_config( - xmlDocPtr doc, - devconfig_t **config) -{ - int error = 0; - - *config = NULL; - - /* Validate doc against known DTD */ - if ((error = validate_doc( - doc, ELEMENT_VOLUMECONFIG, VOLUME_CONFIG_DTD_LOC)) == 0) { - - /* Create a devconfig_t */ - if ((error = new_devconfig(config, TYPE_DISKSET)) == 0) { - - /* Populate the devconfig_t from the XML doc */ - error = xml_to_devconfig( - xmlDocGetRootElement(doc), config_elements, *config); - } - } - - return (error); -} - -/* - * Converts a devconfig_t into a volume-config XML document. - * - * @param config - * an existing devconfig_t representing a volume - * configuration. - * - * @param doc - * RETURN: a new volume-config XML document which must be - * freed via xmlFreeDoc - * - * @return 0 on success, non-zero otherwise. - */ -int -config_to_xml( - devconfig_t *config, - xmlDocPtr *doc) -{ - xmlNodePtr root; - int error = 0; - - /* Create the XML document */ - *doc = xmlNewDoc((xmlChar *)"1.0"); - - /* Create the root node */ - root = xmlNewDocNode( - *doc, NULL, (xmlChar *)ELEMENT_VOLUMECONFIG, NULL); - xmlAddChild((xmlNodePtr)*doc, (xmlNodePtr)root); - - /* Create sub-nodes from the config devconfig_t */ - if ((error = devconfig_to_xml(root, config_elements, config)) == 0) { - - /* Add DTD node and validate */ - error = validate_doc( - *doc, ELEMENT_VOLUMECONFIG, VOLUME_CONFIG_DTD_LOC); - } - - if (error) { - xmlFreeDoc(*doc); - } - - return (error); -} - -/* - * Converts a volume-config XML document into a Bourne shell script. - * - * @param doc - * an existing volume-config XML document - * - * @param commands - * RETURN: a new char* which must be freed - * - * @return 0 on success, non-zero otherwise. - */ -int -xml_to_commands( - xmlDocPtr doc, - char **commands) -{ - char *tmpfile = NULL; - int error = 0; - xsltStylesheetPtr style = NULL; - - /* Read in XSL stylesheet as a normal XML document */ - xmlDocPtr xsl_doc = xmlSAXParseFile((xmlSAXHandlerPtr) - &xmlDefaultSAXHandler, VOLUME_COMMAND_XSL_LOC, 0); - - if (xsl_doc != NULL && xsl_doc->xmlChildrenNode != NULL) { - - /* - * Find the "msgfile" variable node. This is where - * we'll set the location of the file we'll create - * containing the localized messages. - */ - xmlNodePtr msgfile_node = xml_find_node( - xmlDocGetRootElement(xsl_doc), (xmlChar *)ELEMENT_VARIABLE, - (xmlChar *)NAME_L10N_MESSAGE_FILE); - - /* - * Find the "lang" node. This is where we'll set the - * current locale. - */ - xmlNodePtr lang_node = xml_find_node(xmlDocGetRootElement(xsl_doc), - (xmlChar *)ELEMENT_PARAM, (xmlChar *)NAME_LANG); - - /* - * Ignore if the nodes are not found -- the script - * will default to the C locale. - */ - if (msgfile_node != NULL && lang_node != NULL) { - /* Get/set current locale in the "lang" node */ - char *locale = setlocale(LC_MESSAGES, NULL); - xmlNodeSetContent(lang_node, (xmlChar *)locale); - - /* Write localized messages to a temporary file */ - if ((error = create_localized_message_file(&tmpfile)) == 0) { - - char *newsel; - - /* Clear current value of select attribute, if any */ - xmlChar *cursel = xmlGetProp( - msgfile_node, (xmlChar *)ATTR_SELECT); - if (cursel != NULL) { - xmlFree(cursel); - } - - /* - * The select attribute calls the XSLT function - * document() to load an external XML file - */ - newsel = stralloccat(3, "document('", tmpfile, "')"); - - if (newsel == NULL) { - volume_set_error(gettext("out of memory")); - error = -1; - } else { - - /* Set the new value of the select attribute */ - xmlSetProp(msgfile_node, - (xmlChar *)ATTR_SELECT, (xmlChar *)newsel); - - free(newsel); - } - } - } - - if (error == 0) { - style = xsltParseStylesheetDoc(xsl_doc); - } - } - - if (style == NULL) { - volume_set_error( - gettext("could not load stylesheet from %s"), - VOLUME_COMMAND_XSL_LOC); - error = -1; - } else { - - xmlDocPtr result = xsltApplyStylesheet(style, doc, NULL); - - if (result == NULL) { - volume_set_error( - gettext("could not apply stylesheet to volume-config")); - error = -1; - } else { - int length; - - if (xsltSaveResultToString((xmlChar **)commands, - &length, result, style) == -1) { - error = ENOMEM; - } - } - - xsltFreeStylesheet(style); - } - - if (tmpfile != NULL) { - /* Ignore failure */ - unlink(tmpfile); - - free(tmpfile); - } - - return (error); -} - -/* - * ****************************************************************** - * - * Static functions - * - * ****************************************************************** - */ - -/* - * Sets the external DTD node in the given XML document and then - * validates it. - * - * @param doc - * an existing XML document - * - * @param name - * the expected root element name of the XML document - * - * @param systemID - * the location of the DTD - * - * @return 0 on success, non-zero otherwise. - */ -static int -validate_doc( - xmlDocPtr doc, - const char *name, - const char *systemID) -{ - xmlValidCtxt context; - xmlDtdPtr dtd; - - if (doc == NULL) { - volume_set_error(gettext("NULL %s document"), name); - return (-1); - } - - /* - * Assume that we can't trust any DTD but our own. - */ - - /* Was a DTD (external or internal) included in the document? */ - if ((dtd = xmlGetIntSubset(doc)) != NULL) { - /* Remove the DTD node */ - oprintf(OUTPUT_DEBUG, gettext("Removing DTD from %s\n"), name); - xmlUnlinkNode((xmlNodePtr)dtd); - xmlFreeDtd(dtd); - } - - /* Create the (external) DTD node */ - oprintf(OUTPUT_DEBUG, - gettext("Creating new external DTD for %s\n"), name); - dtd = xmlCreateIntSubset( - doc, (xmlChar *)name, NULL, (xmlChar *)systemID); - if (dtd == NULL) { - volume_set_error( - gettext("could not create DTD node from %s"), systemID); - return (-1); - } - - /* Validate against DTD */ - oprintf(OUTPUT_DEBUG, gettext("Validating %s against DTD\n"), name); - context.userData = NULL; - context.error = (xmlValidityErrorFunc)ofprintf_terse; - context.warning = (xmlValidityWarningFunc)ofprintf_terse; - if (!xmlValidateDocument(&context, doc)) { - volume_set_error(gettext("invalid %s"), name); - return (-1); - } - - return (0); -} - -/* - * Converts a devconfig_t into an XML node subject to the rules in - * the given element_t array. - * - * @param parent - * the XML node to which to add new XML nodes resulting - * from conversion of the given devconfig_t - * - * @param elements - * the element_ts that describe the structure of the XML - * document and govern the conversion of the given - * devconfig_t - * - * @param device - * the devconfig_t to convert - * - * @return 0 on success, non-zero otherwise. - */ -static int -devconfig_to_xml( - xmlNodePtr parent, - element_t elements[], - devconfig_t *device) -{ - int i; - int error = 0; - xmlNodePtr node = NULL; - - /* Get device type */ - component_type_t type; - if ((error = devconfig_get_type(device, &type)) != 0) { - return (error); - } - - /* Search for this element definition */ - for (i = 0; elements[i].name != NULL; i++) { - element_t *element = &(elements[i]); - - if (element->type == type) { - int j; - char **array; - dlist_t *components; - - oprintf(OUTPUT_DEBUG, gettext("Element: %s\n"), - devconfig_type_to_str(type)); - - /* Create the XML node */ - node = xmlNewChild( - parent, NULL, (xmlChar *)element->name, NULL); - - /* For each attribute defined for this element... */ - for (j = 0; element->attributes[j].name != NULL; j++) { - attr_t *attribute = &(element->attributes[j]); - char *value; - - /* Is there a valid accessor for this attribute? */ - if (attribute->get_as_string != NULL) { - - /* Get the attribute value from the device */ - switch (error = attribute->get_as_string( - device, attribute->name, &value)) { - - /* Attribute is set in this device */ - case 0: - oprintf(OUTPUT_DEBUG, " %s: %s\n", - attribute->name, value); - - /* Set the value in the XML node */ - xmlSetProp(node, (uchar_t *)attribute->name, - (uchar_t *)value); - free(value); - - /* FALLTHROUGH */ - - /* Attribute is not set in this device */ - case ERR_ATTR_UNSET: - - error = 0; - break; - - /* Error */ - default: - return (error); - } - } - } - - /* Is this node hierarchical? */ - if (element->is_hierarchical == B_FALSE) { - node = parent; - } - - /* Create nodes */ - array = devconfig_get_available(device); - if (array != NULL) { - for (j = 0; array[j] != NULL; j++) { - xmlNodePtr child = xmlNewChild( - node, NULL, (xmlChar *)ELEMENT_AVAILABLE, NULL); - xmlSetProp(child, - (xmlChar *)ATTR_NAME, (xmlChar *)array[j]); - } - } - - /* Create nodes */ - array = devconfig_get_unavailable(device); - if (array != NULL) { - for (j = 0; array[j] != NULL; j++) { - xmlNodePtr child = xmlNewChild( - node, NULL, (xmlChar *)ELEMENT_UNAVAILABLE, NULL); - xmlSetProp(child, - (xmlChar *)ATTR_NAME, (xmlChar *)array[j]); - } - } - - /* - * Recursively convert subcomponents of this device to - * XML, taking care to encode them in the order - * specified in the element_t list (which should - * mirror what's expected by the DTD). - */ - - /* For each element type... */ - for (j = 0; elements[j].name != NULL; j++) { - - /* For each component of this device... */ - for (components = devconfig_get_components(device); - components != NULL && error == 0; - components = components->next) { - - devconfig_t *component = (devconfig_t *)components->obj; - component_type_t t; - - /* Are the types the same? */ - if ((error = devconfig_get_type(component, &t)) != 0) { - return (error); - } else { - if (elements[j].type == t) { - /* Encode child */ - error = devconfig_to_xml( - node, elements, component); - } - } - } - } - - /* Element found */ - break; - } - } - - /* Was this device successfully converted? */ - if (node == NULL) { - volume_set_error( - gettext("can't convert device of type \"%s\" to XML element"), - devconfig_type_to_str(type)); - error = -1; - } - - return (error); -} - -/* - * Converts an XML node into a devconfig_t subject to the rules in - * the given element_t array. - * - * @param cure - * the existing XML node to convert - * - * @param elements - * the element_ts that describe the structure of the XML - * document and govern the conversion of the given XML - * node - * - * @param device - * the devconfig_t node to which to add new devconfig_ts - * resulting from conversion of the given XML node - * - * @return 0 on success, non-zero otherwise. - */ -static int -xml_to_devconfig( - xmlNodePtr cur, - element_t elements[], - devconfig_t *device) -{ - int error = 0; - - /* For each child node... */ - for (cur = cur->xmlChildrenNode; cur != NULL; cur = cur->next) { - int i; - boolean_t parsed_elem = B_FALSE; - - /* Search for this element definition */ - for (i = 0; elements[i].name != NULL; i++) { - element_t *element = &(elements[i]); - - if (xmlStrcmp(cur->name, (xmlChar *)element->name) == 0) { - int j; - devconfig_t *component = NULL; - - /* Flag that this element has been parsed */ - parsed_elem = B_TRUE; - - oprintf(OUTPUT_DEBUG, gettext("line %d: Element <%s>\n"), - XML_GET_LINE(cur), cur->name); - - /* Should a new device be created for this element? */ - if (element->is_hierarchical == B_TRUE) { - - /* Should we use an existing device of this type? */ - if (element->singleton) { - devconfig_get_component( - device, element->type, &component, B_FALSE); - } - - if (component == NULL) { - oprintf(OUTPUT_DEBUG, - gettext("Creating new device\n")); - - /* Create device of this type */ - if ((error = new_devconfig( - &component, element->type)) != 0) { - return (error); - } - - /* Add component to the toplevel device */ - devconfig_set_components( - device, dlist_append(dlist_new_item(component), - devconfig_get_components(device), AT_TAIL)); - } - } else { - component = device; - } - - /* For each attribute defined for this element... */ - for (j = 0; element->attributes[j].name != NULL; j++) { - attr_t *attribute = &(element->attributes[j]); - - /* Get the value of this attribute */ - char *value = (char *) - xmlGetProp(cur, (xmlChar *)attribute->name); - - /* Was this attribute specified? */ - if (value != NULL) { - oprintf(OUTPUT_DEBUG, - gettext("line %d:\tAttribute %s=%s\n"), - XML_GET_LINE(cur), attribute->name, value); - - /* Set this value in the device */ - if ((error = attribute->validate_set( - component, attribute->name, value)) != 0) { - return (error); - } - } - } - - /* Get recursive sub-elements */ - if ((error = xml_to_devconfig( - cur, elements, component)) != 0) { - return (error); - } - - /* Element found */ - break; - } - } - - - /* Make sure all non-text/comment elements were parsed */ - if (parsed_elem == B_FALSE && - xmlStrcmp(cur->name, (xmlChar *)ELEMENT_TEXT) != 0 && - xmlStrcmp(cur->name, (xmlChar *)ELEMENT_COMMENT) != 0) { - - oprintf(OUTPUT_DEBUG, gettext("Element <%s> NOT PARSED!!!\n"), - cur->name); - } - } - - return (0); -} - -/* - * Returns 0 if obj2 (devconfig_t *) is a disk set, 1 otherwise. - */ -static int -compare_is_a_diskset( - void *obj1, - void *obj2) -{ - return (devconfig_isA( - (devconfig_t *)obj2, TYPE_DISKSET) == B_TRUE ? 0 : 1); -} - -/* - * Recursively searches the given xmlNodePtr for an element of the - * specified type and name. - * - * @param node - * the root node to search - * - * @param element - * the name of the element type - * - * @param name - * the value of the name attribute - * - * @return a valid xmlNodePtr if an element of the specified - * type and name was found, NULL otherwise. - */ -static xmlNodePtr -xml_find_node( - xmlNodePtr node, - xmlChar *element, - xmlChar *name) -{ - xmlNodePtr child; - - /* Is the element the right type? */ - if (xmlStrcmp(element, node->name) == 0 && - - /* Does this element's name attribute match? */ - xmlStrcmp(name, xmlGetProp(node, (xmlChar *)ATTR_NAME)) == 0) { - - return (node); - } - - /* Check child nodes */ - for (child = node->xmlChildrenNode; child != NULL; - child = child->next) { - xmlNodePtr found = xml_find_node(child, element, name); - - if (found != NULL) { - return (found); - } - } - - return (NULL); -} - -/* - * Creates an XML document containing all of the localized message - * strings for the generated command script. - * - * @return a xmlDocPtr which must be freed via xmlFreeDoc - */ -static xmlDocPtr -create_localized_message_doc() -{ - int i; - char *locale; - xmlDocPtr doc; - xmlNodePtr root; - l10nmessage_t _cmd_messages[21]; - - /* Create the XML document */ - doc = xmlNewDoc((xmlChar *)"1.0"); - - /* Create the root node */ - root = xmlNewDocNode( - doc, NULL, (xmlChar *)ELEMENT_L10N, NULL); - xmlAddChild((xmlNodePtr) doc, (xmlNodePtr)root); - - _cmd_messages[0].msgid = CMD_MSG_ENVIRONMENT; - _cmd_messages[0].message = gettext(CMD_MSG_ENVIRONMENT); - _cmd_messages[1].msgid = CMD_MSG_AMEND_PATH; - _cmd_messages[1].message = gettext(CMD_MSG_AMEND_PATH); - _cmd_messages[2].msgid = CMD_MSG_DISK_SET_NAME; - _cmd_messages[2].message = gettext(CMD_MSG_DISK_SET_NAME); - _cmd_messages[3].msgid = CMD_MSG_FUNCTIONS; - _cmd_messages[3].message = gettext(CMD_MSG_FUNCTIONS); - _cmd_messages[4].msgid = CMD_MSG_ECHO_AND_EXEC; - _cmd_messages[4].message = gettext(CMD_MSG_ECHO_AND_EXEC); - _cmd_messages[5].msgid = CMD_MSG_FMTHARD_SPECIAL; - _cmd_messages[5].message = gettext(CMD_MSG_FMTHARD_SPECIAL); - _cmd_messages[6].msgid = CMD_MSG_GET_FULL_PATH; - _cmd_messages[6].message = gettext(CMD_MSG_GET_FULL_PATH); - _cmd_messages[7].msgid = CMD_MSG_MAIN; - _cmd_messages[7].message = gettext(CMD_MSG_MAIN); - _cmd_messages[8].msgid = CMD_MSG_VERIFY_ROOT; - _cmd_messages[8].message = gettext(CMD_MSG_VERIFY_ROOT); - _cmd_messages[9].msgid = CMD_MSG_RUN_AS_ROOT; - _cmd_messages[9].message = gettext(CMD_MSG_RUN_AS_ROOT); - _cmd_messages[10].msgid = CMD_MSG_CHECK_FOR_VERBOSE; - _cmd_messages[10].message = gettext(CMD_MSG_CHECK_FOR_VERBOSE); - _cmd_messages[11].msgid = (CMD_MSG_DOES_DISK_SET_EXIST); - _cmd_messages[11].message = gettext(CMD_MSG_DOES_DISK_SET_EXIST); - _cmd_messages[12].msgid = (CMD_MSG_TAKE_DISK_SET); - _cmd_messages[12].message = gettext(CMD_MSG_TAKE_DISK_SET); - _cmd_messages[13].msgid = (CMD_MSG_CREATE_THE_DISK_SET); - _cmd_messages[13].message = gettext(CMD_MSG_CREATE_THE_DISK_SET); - _cmd_messages[14].msgid = (CMD_MSG_ADD_DISKS_TO_SET); - _cmd_messages[14].message = gettext(CMD_MSG_ADD_DISKS_TO_SET); - _cmd_messages[15].msgid = (CMD_MSG_FORMAT_SLICES); - _cmd_messages[15].message = gettext(CMD_MSG_FORMAT_SLICES); - _cmd_messages[16].msgid = (CMD_MSG_CREATE); - _cmd_messages[16].message = gettext(CMD_MSG_CREATE); - _cmd_messages[17].msgid = (CMD_MSG_DOES_EXIST); - _cmd_messages[17].message = gettext(CMD_MSG_DOES_EXIST); - _cmd_messages[18].msgid = (CMD_MSG_ADD_SLICES_TO); - _cmd_messages[18].message = gettext(CMD_MSG_ADD_SLICES_TO); - _cmd_messages[19].msgid = (CMD_MSG_ASSOCIATE_WITH_HSP); - _cmd_messages[19].message = gettext(CMD_MSG_ASSOCIATE_WITH_HSP); - _cmd_messages[20].msgid = NULL; - - /* Get/set current locale in the "lang" node */ - locale = setlocale(LC_MESSAGES, NULL); - - /* Add localized elements to stylesheet */ - for (i = 0; _cmd_messages[i].msgid != NULL; i++) { - xmlNsPtr ns = xmlNewNs(NULL, NULL, NULL); - - xmlNodePtr node = xmlNewTextChild( - root, ns, (xmlChar *)ELEMENT_MESSAGE, - (xmlChar *)_cmd_messages[i].message); - /* Lang attribute */ - xmlSetProp(node, - (xmlChar *)ATTR_LANG, (xmlChar *)locale); - - /* Message ID attribute */ - xmlSetProp(node, (xmlChar *)ATTR_MESSAGEID, - (xmlChar *)_cmd_messages[i].msgid); - } - - if (get_max_verbosity() >= OUTPUT_DEBUG) { - xmlChar *text; - /* Get the text dump */ - xmlDocDumpFormatMemory(doc, &text, NULL, 1); - oprintf(OUTPUT_DEBUG, - gettext("Generated message file:\n%s"), text); - xmlFree(text); - } - - return (doc); -} - -/* - * Creates a temporary XML file containing all of the localized - * message strings for the generated command script. - * - * @param tmpfile - * RETURN: the name of the temporary XML file - * - * @return 0 on success, non-zero otherwise. - */ -static int -create_localized_message_file( - char **tmpfile) -{ - int error = 0; - - /* - * Create temporary file name -- "XXXXXX" is replaced with - * unique char sequence by mkstemp() - */ - *tmpfile = stralloccat(3, "/tmp/", ELEMENT_L10N, "XXXXXX"); - - if (*tmpfile == NULL) { - volume_set_error(gettext("out of memory")); - error = -1; - } else { - int fildes; - FILE *msgfile = NULL; - - /* Open temp file */ - if ((fildes = mkstemp(*tmpfile)) != -1) { - msgfile = fdopen(fildes, "w"); - } - - if (msgfile == NULL) { - volume_set_error(gettext( - "could not open file for writing: %s"), *tmpfile); - error = -1; - } else { - - xmlChar *text; - xmlDocPtr message_doc = create_localized_message_doc(); - xmlDocDumpFormatMemory(message_doc, &text, NULL, 1); - - if (fprintf(msgfile, "%s", text) < 0) { - volume_set_error(gettext( - "could not create localized message file: %s"), - *tmpfile); - error = -1; - } - - xmlFree(text); - xmlFreeDoc(message_doc); - } - - fclose(msgfile); - } - - return (error); -} - -/* - * Converts the given string into a boolean. The string must be - * either VALID_ATTR_TRUE or VALID_ATTR_FALSE. - * - * @param str - * the string to convert - * - * @param bool - * the addr of the boolean_t - * - * @return 0 if the given string could be converted to a boolean - * non-zero otherwise. - */ -static int -strtobool( - char *str, - boolean_t *value) -{ - int error = 0; - - if (strcmp(str, VALID_ATTR_TRUE) == 0) { - *value = B_TRUE; - } else - - if (strcmp(str, VALID_ATTR_FALSE) == 0) { - *value = B_FALSE; - } else - - error = -1; - - return (error); -} - -/* - * Wrapper for oprintf with a OUTPUT_TERSE level of verbosity. - * Provides an fprintf-like syntax to enable use as substitute output - * handler for man of the XML commands. - * - * @param unused - * unused, in favor of the FILE* passed to - * set_max_verbosity(). - * - * @param fmt - * a printf-style format string - * - * @return the number of characters output - */ -static int -ofprintf_terse( - void *unused, - char *fmt, - ...) -{ - int ret; - va_list ap; - - va_start(ap, fmt); - ret = oprintf_va(OUTPUT_TERSE, fmt, ap); - va_end(ap); - - return (ret); -} - -/* - * Wrapper for oprintf with a OUTPUT_VERBOSE level of verbosity. - * Provides an fprintf-like syntax to enable use as substitute output - * handler for man of the XML commands. - * - * @param unused - * unused, in favor of the FILE* passed to - * set_max_verbosity(). - * - * @param fmt - * a printf-style format string - * - * @return the number of characters output - */ -static int -ofprintf_verbose( - void *unused, - char *fmt, - ...) -{ - int ret; - va_list ap; - - va_start(ap, fmt); - ret = oprintf_va(OUTPUT_VERBOSE, fmt, ap); - va_end(ap); - - return (ret); -} - -/* - * ****************************************************************** - * - * XML attribute validators/mutators - * - * These functions convert the given XML attribute string to the - * appropriate data type, and then pass it on to the appropriate - * devconfig_t mutator. A non-zero status is returned if the given - * string could not be converted or was invalid. - * - * ****************************************************************** - */ - -/* - * Validate and set the size attribute in the given volume - * devconfig_t. - * - * @param volume - * the devconfig_t in which to set the size - * - * @param attr - * the name of the XML attribute - * - * @param value - * the value of the XML attribute - * - * @return 0 on success, non-zero otherwise. - */ -static int -validate_set_size( - devconfig_t *volume, - char *attr, - char *value) -{ - int error; - uint64_t size = 0; - - /* Convert size string to bytes */ - if ((error = sizestr_to_bytes(value, &size, size_units)) != 0) { - return (error); - } - - /* Set size in volume */ - return (devconfig_set_size(volume, size)); -} - -/* - * Validate and set the size_in_blocks attribute in the given slice - * devconfig_t. - * - * @param volume - * the devconfig_t in which to set the size_in_blocks - * - * @param attr - * the name of the XML attribute - * - * @param value - * the value of the XML attribute - * - * @return 0 on success, non-zero otherwise. - */ -static int -validate_set_size_in_blocks( - devconfig_t *slice, - char *attr, - char *value) -{ - long long size; - - /* Convert string to long long */ - if (sscanf(value, "%lld", &size) != 1) { - volume_set_error(gettext("%s: invalid size in blocks"), value); - return (-1); - } - - /* Set the number of submirrors in the slice */ - return (devconfig_set_size_in_blocks(slice, (uint64_t)size)); -} - -/* - * Validate and set the name attribute in the given diskset - * devconfig_t. - * - * @param volume - * the devconfig_t in which to set the name - * - * @param attr - * the name of the XML attribute - * - * @param name - * the value of the XML attribute - * - * @return 0 on success, non-zero otherwise. - */ -static int -validate_set_diskset_name( - devconfig_t *diskset, - char *attr, - char *name) -{ - return (devconfig_set_diskset_name(diskset, name)); -} - -/* - * Validate and add the given name to the list of available devices in - * the given volume devconfig_t. - * - * @param device - * the devconfig_t whose available device list to modify - * - * @param attr - * the name of the XML attribute - * - * @param name - * the value of the XML attribute - * - * @return 0 on success, non-zero otherwise. - */ -static int -validate_add_available_name( - devconfig_t *device, - char *attr, - char *name) -{ - char **available; - - /* Get available devices for this device */ - available = devconfig_get_available(device); - - /* Try to add name to array via realloc */ - if ((available = append_to_string_array(available, name)) == NULL) { - return (ENOMEM); - } - - /* Set available devices in the device */ - devconfig_set_available(device, available); - - return (0); -} - -/* - * Validate and add the given name to the list of unavailable devices - * in the given volume devconfig_t. - * - * @param device - * the devconfig_t whose unavailable device list to modify - * - * @param attr - * the name of the XML attribute - * - * @param name - * the value of the XML attribute - * - * @return 0 on success, non-zero otherwise. - */ -static int -validate_add_unavailable_name( - devconfig_t *device, - char *attr, - char *name) -{ - char **unavailable; - - /* Get unavailable devices for this device */ - unavailable = devconfig_get_unavailable(device); - - /* Try to add name to array via realloc */ - if ((unavailable = append_to_string_array(unavailable, name)) == NULL) { - return (ENOMEM); - } - - /* Set unavailable devices in the device */ - devconfig_set_unavailable(device, unavailable); - - return (0); -} - -/* - * Validate and set the name attribute in the given hsp devconfig_t. - * - * @param volume - * the devconfig_t in which to set the name - * - * @param attr - * the name of the XML attribute - * - * @param name - * the value of the XML attribute - * - * @return 0 on success, non-zero otherwise. - */ -static int -validate_set_hsp_name( - devconfig_t *hsp, - char *attr, - char *name) -{ - return (devconfig_set_hsp_name(hsp, name)); -} - -/* - * Validate and set the name attribute in the given disk devconfig_t. - * - * @param volume - * the devconfig_t in which to set the name - * - * @param attr - * the name of the XML attribute - * - * @param name - * the value of the XML attribute - * - * @return 0 on success, non-zero otherwise. - */ -static int -validate_set_disk_name( - devconfig_t *disk, - char *attr, - char *name) -{ - return (devconfig_set_name(disk, name)); -} - -/* - * Validate and set the name attribute in the given slice devconfig_t. - * - * @param volume - * the devconfig_t in which to set the name - * - * @param attr - * the name of the XML attribute - * - * @param name - * the value of the XML attribute - * - * @return 0 on success, non-zero otherwise. - */ -static int -validate_set_slice_name( - devconfig_t *slice, - char *attr, - char *name) -{ - return (devconfig_set_name(slice, name)); -} - -/* - * Validate and set the start_block attribute in the given slice - * devconfig_t. - * - * @param volume - * the devconfig_t in which to set the start_block - * - * @param attr - * the name of the XML attribute - * - * @param value - * the value of the XML attribute - * - * @return 0 on success, non-zero otherwise. - */ -static int -validate_set_slice_start_block( - devconfig_t *slice, - char *attr, - char *value) -{ - long long startsector; - - /* Convert string to long long */ - if (sscanf(value, "%lld", &startsector) != 1) { - volume_set_error(gettext("%s: invalid start sector"), value); - return (-1); - } - - /* Set the number of submirrors in the slice */ - return (devconfig_set_slice_start_block(slice, (uint64_t)startsector)); -} - -/* - * Validate and set the name attribute in the given volume - * devconfig_t. - * - * @param volume - * the devconfig_t in which to set the name - * - * @param attr - * the name of the XML attribute - * - * @param name - * the value of the XML attribute - * - * @return 0 on success, non-zero otherwise. - */ -static int -validate_set_volume_name( - devconfig_t *volume, - char *attr, - char *name) -{ - return (devconfig_set_volume_name(volume, name)); -} - -/* - * Validate and set the interlace attribute in the given stripe - * devconfig_t. - * - * @param volume - * the devconfig_t in which to set the interlace - * - * @param attr - * the name of the XML attribute - * - * @param value - * the value of the XML attribute - * - * @return 0 on success, non-zero otherwise. - */ -static int -validate_set_stripe_interlace( - devconfig_t *stripe, - char *attr, - char *value) -{ - int error; - uint64_t interlace = 0; - - /* Convert interlace string to bytes */ - if ((error = sizestr_to_bytes( - value, &interlace, interlace_units)) != 0) { - return (error); - } - - /* Set interlace in stripe */ - return (devconfig_set_stripe_interlace(stripe, interlace)); -} - -/* - * Validate and set the mincomp attribute in the given stripe - * devconfig_t. - * - * @param volume - * the devconfig_t in which to set the mincomp - * - * @param attr - * the name of the XML attribute - * - * @param value - * the value of the XML attribute - * - * @return 0 on success, non-zero otherwise. - */ -static int -validate_set_stripe_mincomp( - devconfig_t *stripe, - char *attr, - char *value) -{ - uint16_t mincomp; - - /* Convert string to a uint16_t */ - if (str_to_uint16(value, &mincomp) != 0) { - volume_set_error( - gettext("invalid minimum stripe components (%s): %s"), - attr, value); - return (-1); - } - - /* Set in stripe */ - return (devconfig_set_stripe_mincomp(stripe, mincomp)); -} - -/* - * Validate and set the maxcomp attribute in the given stripe - * devconfig_t. - * - * @param volume - * the devconfig_t in which to set the maxcomp - * - * @param attr - * the name of the XML attribute - * - * @param value - * the value of the XML attribute - * - * @return 0 on success, non-zero otherwise. - */ -static int -validate_set_stripe_maxcomp( - devconfig_t *stripe, - char *attr, - char *value) -{ - uint16_t maxcomp; - - /* Convert string to a uint16_t */ - if (str_to_uint16(value, &maxcomp) != 0) { - volume_set_error( - gettext("invalid maximum stripe components (%s): %s"), - attr, value); - return (-1); - } - - /* Set in stripe */ - return (devconfig_set_stripe_maxcomp(stripe, maxcomp)); -} - -/* - * Validate and set the usehsp attribute in the given volume - * devconfig_t. - * - * @param volume - * the devconfig_t in which to set the usehsp - * - * @param attr - * the name of the XML attribute - * - * @param value - * the value of the XML attribute - * - * @return 0 on success, non-zero otherwise. - */ -static int -validate_set_volume_usehsp( - devconfig_t *volume, - char *attr, - char *value) -{ - boolean_t usehsp; - - /* Get boolean value */ - if (strtobool(value, &usehsp) != 0) { - volume_set_error( - gettext("%s: invalid boolean value for \"%s\" attribute"), - value, attr); - return (-1); - } - - /* Set in volume */ - return (devconfig_set_volume_usehsp(volume, usehsp)); -} - -/* - * Validate and set the nsubmirrors attribute in the given mirror - * devconfig_t. - * - * @param volume - * the devconfig_t in which to set the nsubmirrors - * - * @param attr - * the name of the XML attribute - * - * @param value - * the value of the XML attribute - * - * @return 0 on success, non-zero otherwise. - */ -static int -validate_set_mirror_nsubmirrors( - devconfig_t *mirror, - char *attr, - char *value) -{ - uint16_t nsubmirrors; - - /* Convert string to a uint16_t */ - if (str_to_uint16(value, &nsubmirrors) != 0) { - volume_set_error( - gettext("invalid number of submirrors (%s): %s"), - attr, value); - return (-1); - } - - /* Set in stripe */ - return (devconfig_set_mirror_nsubs(mirror, nsubmirrors)); -} - -/* - * Validate and set the read attribute in the given mirror - * devconfig_t. - * - * @param volume - * the devconfig_t in which to set the read - * - * @param attr - * the name of the XML attribute - * - * @param value - * the value of the XML attribute - * - * @return 0 on success, non-zero otherwise. - */ -static int -validate_set_mirror_read( - devconfig_t *mirror, - char *attr, - char *value) -{ - mirror_read_strategy_t strategy; - - if (strcmp(value, VALID_MIRROR_READ_ROUNDROBIN) == 0) { - strategy = MIRROR_READ_ROUNDROBIN; - } else - - if (strcmp(value, VALID_MIRROR_READ_GEOMETRIC) == 0) { - strategy = MIRROR_READ_GEOMETRIC; - } else - - if (strcmp(value, VALID_MIRROR_READ_FIRST) == 0) { - strategy = MIRROR_READ_FIRST; - } else - - { - volume_set_error(gettext("%s: invalid mirror read value"), value); - return (-1); - } - - return (devconfig_set_mirror_read(mirror, strategy)); -} - -/* - * Validate and set the write attribute in the given mirror - * devconfig_t. - * - * @param volume - * the devconfig_t in which to set the write - * - * @param attr - * the name of the XML attribute - * - * @param value - * the value of the XML attribute - * - * @return 0 on success, non-zero otherwise. - */ -static int -validate_set_mirror_write( - devconfig_t *mirror, - char *attr, - char *value) -{ - mirror_write_strategy_t strategy; - - if (strcmp(value, VALID_MIRROR_WRITE_PARALLEL) == 0) { - strategy = MIRROR_WRITE_PARALLEL; - } else - - if (strcmp(value, VALID_MIRROR_WRITE_SERIAL) == 0) { - strategy = MIRROR_WRITE_SERIAL; - } else - - { - volume_set_error(gettext("%s: invalid mirror write value"), value); - return (-1); - } - - return (devconfig_set_mirror_write(mirror, strategy)); -} - -/* - * Validate and set the passnum attribute in the given mirror - * devconfig_t. - * - * @param volume - * the devconfig_t in which to set the passnum - * - * @param attr - * the name of the XML attribute - * - * @param value - * the value of the XML attribute - * - * @return 0 on success, non-zero otherwise. - */ -static int -validate_set_mirror_passnum( - devconfig_t *mirror, - char *attr, - char *value) -{ - uint16_t passnum; - - /* Convert string to a uint16_t */ - if (str_to_uint16(value, &passnum) != 0) { - volume_set_error( - gettext("invalid mirror pass number (%s): %s"), - attr, value); - return (-1); - } - - /* Set in stripe */ - return (devconfig_set_mirror_pass(mirror, passnum)); -} - -/* - * Validate and set the redundancy attribute in the given volume - * devconfig_t. - * - * @param volume - * the devconfig_t in which to set the redundancy - * - * @param attr - * the name of the XML attribute - * - * @param value - * the value of the XML attribute - * - * @return 0 on success, non-zero otherwise. - */ -static int -validate_set_volume_redundancy( - devconfig_t *volume, - char *attr, - char *value) -{ - uint16_t redundancy; - - /* Convert string to a uint16_t */ - if (str_to_uint16(value, &redundancy) != 0) { - volume_set_error( - gettext("invalid redundancy level (%s): %s"), - attr, value); - return (-1); - } - - /* Set in stripe */ - return (devconfig_set_volume_redundancy_level(volume, redundancy)); -} - -/* - * Validate and set the datapaths attribute in the given volume - * devconfig_t. - * - * @param volume - * the devconfig_t in which to set the datapaths - * - * @param attr - * the name of the XML attribute - * - * @param value - * the value of the XML attribute - * - * @return 0 on success, non-zero otherwise. - */ -static int -validate_set_volume_datapaths( - devconfig_t *volume, - char *attr, - char *value) -{ - uint16_t redundancy; - - /* Convert string to a uint16_t */ - if (str_to_uint16(value, &redundancy) != 0) { - volume_set_error( - gettext("invalid number of data paths (%s): %s"), - attr, value); - return (-1); - } - - /* Set in stripe */ - return (devconfig_set_volume_npaths(volume, redundancy)); -} - -/* - * ****************************************************************** - * - * XML attribute accessors/converters - * - * These functions get a value from the appropriate devconfig_t - * accessor, and then convert it to a string. - * - * ****************************************************************** - */ - -/* - * Get, as a string, the value of the name attribute of the given - * devconfig_t. This data must be freed. - * - * @param device - * the devconfig_t from which to retrieve the name - * - * @param attr - * the name of the XML attribute - * - * @param value - * RETURN: the value of the XML attribute - * - * @return 0 on success, non-zero otherwise. - */ -static int -get_as_string_name( - devconfig_t *device, - char *attr, - char **value) -{ - int error; - char *name; - - /* Get name */ - if ((error = devconfig_get_name(device, &name)) == 0) { - if ((*value = strdup(name)) == NULL) { - error = ENOMEM; - } - } - - return (error); -} - -/* - * Get, as a string, the value of the passnum attribute of the given - * mirror devconfig_t. This data must be freed. - * - * @param device - * the devconfig_t from which to retrieve the passnum - * - * @param attr - * the name of the XML attribute - * - * @param value - * RETURN: the value of the XML attribute - * - * @return 0 on success, non-zero otherwise. - */ -static int -get_as_string_mirror_passnum( - devconfig_t *mirror, - char *attr, - char **value) -{ - int error; - uint16_t passnum; - - /* Get mirror pass number */ - if ((error = devconfig_get_mirror_pass(mirror, &passnum)) == 0) { - error = ll_to_str(passnum, value); - } - - return (error); -} - -/* - * Get, as a string, the value of the read attribute of the given - * mirror devconfig_t. This data must be freed. - * - * @param device - * the devconfig_t from which to retrieve the read - * - * @param attr - * the name of the XML attribute - * - * @param value - * RETURN: the value of the XML attribute - * - * @return 0 on success, non-zero otherwise. - */ -static int -get_as_string_mirror_read( - devconfig_t *mirror, - char *attr, - char **value) -{ - int error; - mirror_read_strategy_t read; - - /* Get mirror read strategy */ - if ((error = devconfig_get_mirror_read(mirror, &read)) == 0) { - if ((*value = strdup( - devconfig_read_strategy_to_str(read))) == NULL) { - error = ENOMEM; - } - } - - return (error); -} - -/* - * Get, as a string, the value of the write attribute of the given - * mirror devconfig_t. This data must be freed. - * - * @param device - * the devconfig_t from which to retrieve the write - * - * @param attr - * the name of the XML attribute - * - * @param value - * RETURN: the value of the XML attribute - * - * @return 0 on success, non-zero otherwise. - */ -static int -get_as_string_mirror_write( - devconfig_t *mirror, - char *attr, - char **value) -{ - int error; - mirror_write_strategy_t write; - - /* Get mirror write strategy */ - if ((error = devconfig_get_mirror_write(mirror, &write)) == 0) { - if ((*value = strdup( - devconfig_write_strategy_to_str(write))) == NULL) { - error = ENOMEM; - } - } - - return (error); -} - -/* - * Get, as a string, the value of the in_blocks attribute of the given - * device devconfig_t. This data must be freed. - * - * @param device - * the devconfig_t from which to retrieve the in_blocks - * - * @param attr - * the name of the XML attribute - * - * @param value - * RETURN: the value of the XML attribute - * - * @return 0 on success, non-zero otherwise. - */ -static int -get_as_string_size_in_blocks( - devconfig_t *device, - char *attr, - char **value) -{ - int error; - uint64_t size; - - /* Get size in blocks */ - if ((error = devconfig_get_size_in_blocks(device, &size)) == 0) { - error = ll_to_str(size, value); - } - - return (error); -} - -/* - * Get, as a string, the value of the start_block attribute of the - * given slice devconfig_t. This data must be freed. - * - * @param device - * the devconfig_t from which to retrieve the start_block - * - * @param attr - * the name of the XML attribute - * - * @param value - * RETURN: the value of the XML attribute - * - * @return 0 on success, non-zero otherwise. - */ -static int -get_as_string_slice_start_block( - devconfig_t *slice, - char *attr, - char **value) -{ - int error; - uint64_t start; - - /* Get slice start block */ - if ((error = devconfig_get_slice_start_block(slice, &start)) == 0) { - error = ll_to_str(start, value); - } - - return (error); -} - -/* - * Get, as a string, the value of the interlace attribute of the given - * stripe devconfig_t. This data must be freed. - * - * @param device - * the devconfig_t from which to retrieve the interlace - * - * @param attr - * the name of the XML attribute - * - * @param value - * RETURN: the value of the XML attribute - * - * @return 0 on success, non-zero otherwise. - */ -static int -get_as_string_stripe_interlace( - devconfig_t *stripe, - char *attr, - char **value) -{ - int error; - uint64_t interlace; - - /* Get interlace */ - if ((error = devconfig_get_stripe_interlace( - stripe, &interlace)) == 0) { - error = bytes_to_sizestr(interlace, value, interlace_units, B_TRUE); - } - - return (error); -} diff --git a/usr/src/cmd/lvm/metassist/xml/xml_convert.h b/usr/src/cmd/lvm/metassist/xml/xml_convert.h deleted file mode 100644 index a6017111bcae..000000000000 --- a/usr/src/cmd/lvm/metassist/xml/xml_convert.h +++ /dev/null @@ -1,157 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _XML_CONVERT_H -#define _XML_CONVERT_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include "volume_request.h" -#include "volume_defaults.h" - -/* The location of the volume-request.dtd */ -#define VOLUME_REQUEST_DTD_LOC "/usr/share/lib/xml/dtd/volume-request.dtd" - -/* The location of the volume-request-defaults.dtd */ -#define VOLUME_DEFAULTS_DTD_LOC "/usr/share/lib/xml/dtd/volume-defaults.dtd" - -/* The location of the volume-config.dtd */ -#define VOLUME_CONFIG_DTD_LOC "/usr/share/lib/xml/dtd/volume-config.dtd" - -/* Location of the volume-command.xsl file */ -#define VOLUME_COMMAND_XSL_LOC "/usr/share/lib/xml/style/volume-command.xsl" - -/* - * Valid values for attributes - */ -#define VALID_ATTR_TRUE "TRUE" -#define VALID_ATTR_FALSE "FALSE" -#define VALID_MIRROR_READ_GEOMETRIC "GEOMETRIC" -#define VALID_MIRROR_READ_FIRST "FIRST" -#define VALID_MIRROR_READ_ROUNDROBIN "ROUNDROBIN" -#define VALID_MIRROR_WRITE_SERIAL "SERIAL" -#define VALID_MIRROR_WRITE_PARALLEL "PARALLEL" - -/* - * Standard units - */ -#define UNIT_BLOCKS "BLOCKS" -#define UNIT_KILOBYTES "KB" -#define UNIT_MEGABYTES "MB" -#define UNIT_GIGABYTES "GB" -#define UNIT_TERABYTES "TB" - -/* - * Initialize the XML parser, setting defaults across all XML - * routines. - */ -extern void init_xml(); - -/* - * Clean up any remaining structures before exiting. - */ -extern void cleanup_xml(); - -/* - * Converts a volume-request XML document into a request_t. - * - * @param doc - * an existing volume-request XML document - * - * @param request - * RETURN: a new request_t which must be freed via - * free_request - * - * @return 0 on success, non-zero otherwise. - */ -extern int xml_to_request(xmlDocPtr doc, request_t **request); - -/* - * Converts a volume-defaults XML document into a defaults_t. - * - * @param doc - * an existing volume-defaults XML document - * - * @param defaults - * RETURN: a new defaults_t which must be freed via - * free_defaults - * - * @return 0 on success, non-zero otherwise. - */ -extern int xml_to_defaults(xmlDocPtr doc, defaults_t **defaults); - -/* - * Converts a volume-config XML document into a devconfig_t. - * - * @param doc - * an existing volume-config XML document - * - * @param config - * RETURN: a new devconfig_t which must be freed via - * free_devconfig - * - * @return 0 on success, non-zero otherwise. - */ -extern int xml_to_config(xmlDocPtr doc, devconfig_t **config); - -/* - * Converts a devconfig_t into a volume-config XML document. - * - * @param config - * an existing devconfig_t representing a volume - * configuration. - * - * @param doc - * RETURN: a new volume-config XML document which must be - * freed via xmlFreeDoc - * - * @return 0 on success, non-zero otherwise. - */ -extern int config_to_xml(devconfig_t *config, xmlDocPtr *doc); - -/* - * Converts a volume-config XML document into a Bourne shell script. - * - * @param doc - * an existing volume-config XML document - * - * @param commands - * RETURN: a new char* which must be freed - * - * @return 0 on success, non-zero otherwise. - */ -extern int xml_to_commands(xmlDocPtr doc, char **commands); - -#ifdef __cplusplus -} -#endif - -#endif /* _XML_CONVERT_H */ diff --git a/usr/src/cmd/lvm/rpc.mdcommd/Makefile b/usr/src/cmd/lvm/rpc.mdcommd/Makefile deleted file mode 100644 index 47bd985d2087..000000000000 --- a/usr/src/cmd/lvm/rpc.mdcommd/Makefile +++ /dev/null @@ -1,67 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# - -MANIFEST= mdcomm.xml - -include ../../Makefile.cmd -include ../Makefile.lvm - -ROOTMANIFESTDIR= $(ROOTSVCNETWORKRPC) - -SUBDIRS= $(MACH) - -SRCS= mddoors.c mdmn_commd_server.c mdmn_commd_service.c mdmn_subr.c - -POFILES= $(SRCS:%.c=%.po) -POFILE= rpc.mdcommdp.po - -all := TARGET = all -install := TARGET = install -clean := TARGET = clean -clobber := TARGET = clobber -lint := TARGET = lint - -.KEEP_STATE: - -all: $(SUBDIRS) - -catalog: $(POFILE) - -$(POFILE): $(POFILES) - $(RM) $@ - cat $(POFILES) > $(POFILE) - -clean clobber lint: $(SUBDIRS) - -install: $(SUBDIRS) $(ROOTMANIFEST) - -check: $(CHKMANIFEST) - -$(SUBDIRS): FRC - @cd $@; pwd; $(MAKE) $(TARGET) - -FRC: - -include ../../Makefile.targ diff --git a/usr/src/cmd/lvm/rpc.mdcommd/i386/Makefile b/usr/src/cmd/lvm/rpc.mdcommd/i386/Makefile deleted file mode 100644 index bce5b2fa00df..000000000000 --- a/usr/src/cmd/lvm/rpc.mdcommd/i386/Makefile +++ /dev/null @@ -1,107 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2005 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# - -COMMD = rpc.mdcommd -MDDOORS = mddoors - -PROG = $(COMMD) $(MDDOORS) - -DERIVED_FILES = mdmn_commd_xdr.c - -OBJECTS = mdmn_commd_server.o \ - mdmn_commd_service.o \ - mddoors.o - -COMMD_OBJECTS = mdmn_commd_server.o \ - mdmn_commd_service.o \ - mdmn_subr.o \ - mdmn_commd_xdr.o - -MDDOORS_OBJECTS = mddoors.o - -LINTOBJECTS = mdmn_commd_server.o mdmn_commd_service.o mdmn_subr.o mddoors.o - -OBJECTS += $(DERIVED_FILES:.c=.o) - -SRCS = $(OBJECTS:%.o=../%.c) -LINTSRCS = $(LINTOBJECTS:%.o=../%.c) - -ROOTLIBLVM = $(ROOTLIB)/lvm - -include ../../../Makefile.cmd -include ../../Makefile.lvm - -LDLIBS += -lmeta - -CFLAGS += $(DEFINES) -# -# -lint := LINTFLAGS += -m - -%_svc.c := RPCGENFLAGS += -K -1 - -.KEEP_STATE: - -%.o: ../%.c - $(COMPILE.c) $< - -all: $(PROG) - -$(COMMD): $(DERIVED_FILES) $(COMMD_OBJECTS) - $(LINK.c) -o $@ $(COMMD_OBJECTS) $(LDLIBS) -lnsl - $(POST_PROCESS) - -$(MDDOORS): ../mddoors.c - $(COMPILE.c) ../mddoors.c - $(LINK.c) -o $@ $(MDDOORS_OBJECTS) $(LDLIBS) - $(POST_PROCESS) - -INSTPROGS = $(ROOTUSRSBIN)/$(COMMD) $(ROOTLIBLVM)/$(MDDOORS) -install: all $(ROOTLIBLVM) $(INSTPROGS) - -cstyle: - $(CSTYLE) $(SRCS) - -lint: - for f in $(LINTSRCS) ; do \ - $(LINT.c) $(LINTFLAGS) $$f ; \ - done - -clean: - $(RM) $(OBJECTS) $(DERIVED_FILES) *.o - -clobber: clean - $(RM) $(PROG) $(CLOBBERFILES) - -$(DERIVED_FILES): $(SRC)/uts/common/sys/lvm/mdmn_commd.x - $(RPCGEN) -c $(SRC)/uts/common/sys/lvm/mdmn_commd.x -o $@ - -$(ROOTLIBLVM)/%: % - $(INS.file) - -$(ROOTLIBLVM): - $(INS.dir) - diff --git a/usr/src/cmd/lvm/rpc.mdcommd/mdcomm.xml b/usr/src/cmd/lvm/rpc.mdcommd/mdcomm.xml deleted file mode 100644 index 5c9762edf706..000000000000 --- a/usr/src/cmd/lvm/rpc.mdcommd/mdcomm.xml +++ /dev/null @@ -1,121 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/usr/src/cmd/lvm/rpc.mdcommd/mddoors.c b/usr/src/cmd/lvm/rpc.mdcommd/mddoors.c deleted file mode 100644 index b8f09f6d11c9..000000000000 --- a/usr/src/cmd/lvm/rpc.mdcommd/mddoors.c +++ /dev/null @@ -1,270 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include -#include -#include -#include -#include - -static pid_t enter_daemon_lock(void); -static void exit_daemon_lock(void); -#define DAEMON_LOCK_FILE "/var/run/.mddoors.lock" - -static int hold_daemon_lock; -static const char *daemon_lock_file = DAEMON_LOCK_FILE; -static int daemon_lock_fd; - -void -daemon_cleanup() -{ - if (hold_daemon_lock) { - meta_mirror_resync_block_all(); - exit_daemon_lock(); - } -} - -/* - * Use an advisory lock to ensure that only one daemon process is - * active at any point in time. - */ -static pid_t -enter_daemon_lock(void) -{ - struct flock lock; - - daemon_lock_fd = open(daemon_lock_file, O_CREAT|O_RDWR, 0644); - - if (daemon_lock_fd < 0) { - exit(-1); - } - - lock.l_type = F_WRLCK; - lock.l_whence = SEEK_SET; - lock.l_start = 0; - lock.l_len = 0; - - if (fcntl(daemon_lock_fd, F_SETLK, &lock) == -1) { - - if (errno == EAGAIN || errno == EDEADLK) { - - if (fcntl(daemon_lock_fd, F_GETLK, &lock) == -1) { - exit(1); - } - return (lock.l_pid); - } - } - hold_daemon_lock = 1; - return (getpid()); -} - - -/* - * Drop the advisory daemon lock, close lock file - */ -static void -exit_daemon_lock(void) -{ - struct flock lock; - - lock.l_type = F_UNLCK; - lock.l_whence = SEEK_SET; - lock.l_start = 0; - lock.l_len = 0; - - if (fcntl(daemon_lock_fd, F_SETLK, &lock) == -1) { - syslog(LOG_DAEMON | LOG_DEBUG, gettext("unlock(%s) - %s"), - daemon_lock_file, strerror(errno)); - return; - } - - if (close(daemon_lock_fd) == -1) { - syslog(LOG_DAEMON | LOG_DEBUG, - gettext("close(%s) failed - %s\n"), - daemon_lock_file, strerror(errno)); - return; - } - (void) unlink(daemon_lock_file); -} - -/* - * Purpose of this routine is to accept a message from the local kernel and - * send this message using rpc to the master node. - * when an ok comes from the master we call door_return() - */ - -/* ARGSUSED */ -static void -door2rpc(void *cookie, /* required by the doors infrastructure */ - char *argp, - size_t arg_size, /* required by the doors infrastructure */ - door_desc_t *dp, /* required by the doors infrastructure */ - uint_t n_desc) /* required by the doors infrastructure */ -{ - int err; - int size; - md_error_t ep = mdnullerror; - md_mn_result_t *result = NULL; - md_mn_kresult_t kresult; - - md_mn_kmsg_t *kmsg = (md_mn_kmsg_t *)(void *)argp; - err = mdmn_send_message(kmsg->kmsg_setno, kmsg->kmsg_type, - kmsg->kmsg_flags, kmsg->kmsg_recipient, (char *)&(kmsg->kmsg_data), - kmsg->kmsg_size, &result, &ep); - - if (result == NULL) { - kresult.kmmr_comm_state = MDMNE_RPC_FAIL; - } else { - kresult.kmmr_comm_state = result->mmr_comm_state; - if (err == 0) { - kresult.kmmr_msgtype = result->mmr_msgtype; - kresult.kmmr_flags = result->mmr_flags; - kresult.kmmr_exitval = result->mmr_exitval; - kresult.kmmr_failing_node = result->mmr_failing_node; - size = result->mmr_out_size; - if (size > 0) { - /* This is the max data we can transfer, here */ - if (size > MDMN_MAX_KRES_DATA) { - size = MDMN_MAX_KRES_DATA; - } - bcopy(result->mmr_out, &(kresult.kmmr_res_data), - size); - kresult.kmmr_res_size = size; - } else { - kresult.kmmr_res_size = 0; - } - } - free_result(result); - } - - (void) door_return((char *)&kresult, sizeof (md_mn_kresult_t), NULL, 0); -} - - -/* ARGSUSED */ -int -main(void) -{ - - int i; - int mdmn_door_handle; - pid_t pid; - int size; - md_error_t ep = mdnullerror; - struct rlimit rl; - - /* - * Get the locale set up before calling any other routines - * with messages to ouput. Just in case we're not in a build - * environment, make sure that TEXT_DOMAIN gets set to - * something. - */ -#if !defined(TEXT_DOMAIN) -#define TEXT_DOMAIN "SYS_TEST" -#endif - (void) setlocale(LC_ALL, ""); - (void) textdomain(TEXT_DOMAIN); - - openlog("mddoors", LOG_PID, LOG_DAEMON); - - /* here beginneth the daemonizing code */ - pid = fork(); - if (pid < 0) { - syslog(LOG_DAEMON | LOG_ERR, gettext("Cannot fork")); - exit(1); - } - - if (pid) { - exit(0); - } - - /* - * Only one daemon can run at a time. - * If another instance is already running, this is not an error. - */ - if ((pid = enter_daemon_lock()) != getpid()) { - exit(0); - } - - rl.rlim_max = 0; - (void) getrlimit(RLIMIT_NOFILE, &rl); - if ((size = rl.rlim_max) == 0) { - syslog(LOG_DAEMON | LOG_ERR, gettext("Cannot getrlimit")); - exit(1); - } - - for (i = 0; i < size; i++) { - if (i == daemon_lock_fd) - continue; - (void) close(i); - } - - - i = open("/dev/null", 2); - (void) dup2(i, 1); - (void) dup2(i, 2); - (void) setsid(); - - /* here endeth the daemonizing code */ - - /* Block out the usual signals so we don't get killed unintentionally */ - (void) signal(SIGHUP, SIG_IGN); - (void) signal(SIGINT, SIG_IGN); - (void) signal(SIGQUIT, SIG_IGN); - (void) signal(SIGTERM, SIG_IGN); - - (void) atexit(daemon_cleanup); - - /* Resume any previously blocked resync */ - meta_mirror_resync_unblock_all(); - - /* - * At this point we are single threaded. - * We give mdmn_send_message() a chance to initialize safely. - */ - (void) mdmn_send_message(0, 0, 0, 0, 0, 0, 0, 0); - - /* setup the door handle */ - mdmn_door_handle = door_create(door2rpc, NULL, - DOOR_REFUSE_DESC | DOOR_NO_CANCEL); - if (mdmn_door_handle == -1) { - perror(gettext("door_create failed")); - syslog(LOG_DAEMON | LOG_ERR, gettext("door_create failed")); - exit(1); - } - - if (metaioctl(MD_MN_SET_DOORH, &mdmn_door_handle, &ep, - "mddoors") != 0) { - syslog(LOG_DAEMON | LOG_DEBUG, gettext( - "Couldn't set door handle")); - exit(1); - } - - (void) pause(); - syslog(LOG_DAEMON | LOG_ERR, gettext( - "Unexpected exit from pause()")); - return (1); -} diff --git a/usr/src/cmd/lvm/rpc.mdcommd/mdmn_commd_server.c b/usr/src/cmd/lvm/rpc.mdcommd/mdmn_commd_server.c deleted file mode 100644 index 644c2ad2c7ab..000000000000 --- a/usr/src/cmd/lvm/rpc.mdcommd/mdmn_commd_server.c +++ /dev/null @@ -1,3230 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "mdmn_subr.h" - -/* - * This is the communication daemon for SVM Multi Node Disksets. - * It runs on every node and provides the following rpc services: - * - mdmn_send_svc_2 - * - mdmn_work_svc_2 - * - mdmn_wakeup_initiator_svc_2 - * - mdmn_wakeup_master_svc_2 - * - mdmn_comm_lock_svc_2 - * - mdmn_comm_unlock_svc_2 - * - mdmn_comm_suspend_svc_2 - * - mdmn_comm_resume_svc_2 - * - mdmn_comm_reinit_set_svc_2 - * where send, lock, unlock and reinit are meant for external use, - * work and the two wakeups are for internal use only. - * - * NOTE: - * On every node only one of those xxx_2 functions can be active at the - * same time because the daemon is single threaded. - * - * (not quite true, as mdmn_send_svc_2 and mdmn_work_svc_2 do thr_create()s - * as part of their handlers, so those aspects are multi-threaded) - * - * In case an event occurs that has to be propagated to all the nodes... - * - * One node (the initiator) - * calls the libmeta function mdmn_send_message() - * This function calls the local daemon thru mdmn_send_svc_2. - * - * On the initiator: - * mdmn_send_svc_2() - * - starts a thread -> mdmn_send_to_work() and returns. - * mdmn_send_to_work() - * - sends this message over to the master of the diskset. - * This is done by calling mdmn_work_svc_2 on the master. - * - registers to the initiator_table - * - exits without doing a svc_sendreply() for the call to - * mdmn_send_svc_2. This means that call is blocked until somebody - * (see end of this comment) does a svc_sendreply(). - * This means mdmn_send_message() does not yet return. - * - A timeout surveillance is started at this point. - * This means in case the master doesn't reply at all in an - * aproppriate time, an error condition is returned - * to the caller. - * - * On the master: - * mdmn_work_svc_2() - * - starts a thread -> mdmn_master_process_msg() and returns - * mdmn_master_process_msg() - * - logs the message to the change log - * - executes the message locally - * - flags the message in the change log - * - sends the message to mdmn_work_svc_2() on all the - * other nodes (slaves) - * after each call to mdmn_work_svc_2 the thread goes to sleep and - * will be woken up by mdmn_wakeup_master_svc_2() as soon as the - * slave node is done with this message. - * - In case the slave doesn't respond in a apropriate time, an error - * is assumed to ensure the master doesn't wait forever. - * - * On a slave: - * mdmn_work_svc_2() - * - starts a thread -> mdmn_slave_process_msg() and returns - * mdmn_slave_process_msg() - * - processes this message locally by calling the appropriate message - * handler, that creates some result. - * - sends that result thru a call to mdmn_wakeup_master_svc_2() to - * the master. - * - * Back on the master: - * mdmn_wakeup_master_svc_2() - * - stores the result into the master_table. - * - signals the mdmn_master_process_msg-thread. - * - returns - * mdmn_master_process_msg() - * - after getting the results from all nodes - * - sends them back to the initiating node thru a call to - * mdmn_wakeup_initiator_svc_2. - * - * Back on the initiator: - * mdmn_wakeup_initiator_svc_2() - * - calls svc_sendreply() which makes the call to mdmn_send_svc_2() - * return. - * which allows the initial mdmn_send_message() call to return. - */ - -FILE *commdout; /* debug output for the commd */ -char *commdoutfile; /* file name for the above output */ -/* want at least 10 MB free space when logging into a file */ -#define MIN_FS_SPACE (10LL * 1024 * 1024) - -/* - * Number of outstanding messages that were initiated by this node. - * If zero, check_timeouts goes to sleep - */ -uint_t messages_on_their_way; -mutex_t check_timeout_mutex; /* need mutex to protect above */ -cond_t check_timeout_cv; /* trigger for check_timeouts */ - -/* for printing out time stamps */ -hrtime_t __savetime; - -/* RPC clients for every set and every node and their protecting locks */ -CLIENT *client[MD_MAXSETS][NNODES]; -rwlock_t client_rwlock[MD_MAXSETS]; - -/* the descriptors of all possible sets and their protectors */ -struct md_set_desc *set_descriptor[MD_MAXSETS]; -rwlock_t set_desc_rwlock[MD_MAXSETS]; - -/* the daemon to daemon communication has to timeout quickly */ -static struct timeval FOUR_SECS = { 4, 0 }; - -/* These indicate if a set has already been setup */ -int md_mn_set_inited[MD_MAXSETS]; - -/* For every set we have a message completion table and protecting mutexes */ -md_mn_mct_t *mct[MD_MAXSETS]; -mutex_t mct_mutex[MD_MAXSETS][MD_MN_NCLASSES]; - -/* Stuff to describe the global status of the commd on one node */ -#define MD_CGS_INITED 0x0001 -#define MD_CGS_ABORTED 0x0002 /* return everything with MDMNE_ABORT */ -uint_t md_commd_global_state = 0; /* No state when starting up */ - -/* - * Global verbosity level for the daemon - */ -uint_t md_commd_global_verb; - -/* - * libmeta doesn't like multiple threads in metaget_setdesc(). - * So we must protect access to it with a global lock - */ -mutex_t get_setdesc_mutex; - -/* - * Need a way to block single message types, - * hence an array with a status for every message type - */ -uint_t msgtype_lock_state[MD_MN_NMESSAGES]; - -/* for reading in the config file */ -#define MAX_LINE_SIZE 1024 - -extern char *commd_get_outfile(void); -extern uint_t commd_get_verbosity(void); - -/* - * mdmn_clnt_create is a helper function for meta_client_create_retry. It - * merely needs to call clnt_create_timed, and meta_client_create_retry - * will take care of the rest. - */ -/* ARGSUSED */ -static CLIENT * -mdmn_clnt_create(char *ignore, void *data, struct timeval *time_out) -{ - md_mnnode_desc *node = (md_mnnode_desc *)data; - - return (clnt_create_timed(node->nd_priv_ic, MDMN_COMMD, TWO, "tcp", - time_out)); -} - -#define FLUSH_DEBUGFILE() \ - if (commdout != (FILE *)NULL) { \ - (void) fflush(commdout); \ - (void) fsync(fileno(commdout)); \ - } - -static void -panic_system(int nid, md_mn_msgtype_t type, int master_err, int master_exitval, - md_mn_result_t *slave_result) -{ - md_mn_commd_err_t commd_err; - md_error_t mne = mdnullerror; - char *msg_buf; - - msg_buf = (char *)calloc(MAXPATHLEN + 1, sizeof (char)); - - FLUSH_DEBUGFILE(); - - if (master_err != MDMNE_ACK) { - (void) snprintf(msg_buf, MAXPATHLEN, "rpc.mdcommd: RPC " - "fail on master when processing message type %d\n", type); - } else if (slave_result == NULL) { - (void) snprintf(msg_buf, MAXPATHLEN, "rpc.mdcommd: RPC fail " - "on node %d when processing message type %d\n", nid, type); - } else { - (void) snprintf(msg_buf, MAXPATHLEN, "rpc.mdcommd: " - "Inconsistent return value from node %d when processing " - "message type %d. Master exitval = %d, " - "Slave exitval = %d\n", nid, type, master_exitval, - slave_result->mmr_exitval); - } - commd_err.size = strlen(msg_buf); - commd_err.md_message = (uint64_t)(uintptr_t)&msg_buf[0]; - - (void) metaioctl(MD_MN_COMMD_ERR, &commd_err, &mne, "rpc.mdcommd"); - (void) uadmin(A_DUMP, AD_BOOT, NULL); -} - -static void -flush_fcout() -{ - struct statvfs64 vfsbuf; - long long avail_bytes; - int warned = 0; - - for (; ; ) { - (void) sleep(10); - /* No output file, nothing to do */ - if (commdout == (FILE *)NULL) - continue; - - /* - * stat the appropriate filesystem to check for available space. - */ - if (statvfs64(commdoutfile, &vfsbuf)) { - continue; - } - - avail_bytes = vfsbuf.f_frsize * vfsbuf.f_bavail; - /* - * If we don't have enough space, we print out a warning. - * And we drop the verbosity level to NULL - * In case the condtion doesn't go away, we don't repeat - * the warning. - */ - if (avail_bytes < MIN_FS_SPACE) { - if (warned) { - continue; - } - commd_debug(MD_MMV_SYSLOG, - "NOT enough space available for logging\n"); - commd_debug(MD_MMV_SYSLOG, - "Have %lld bytes, need %lld bytes\n", - avail_bytes, MIN_FS_SPACE); - warned = 1; - md_commd_global_verb = MD_MMV_NULL; - } else { - warned = 0; - } - - (void) fflush(commdout); - } -} - -/* safer version of clnt_destroy. If clnt is NULL don't do anything */ -#define mdmn_clnt_destroy(clnt) { \ - if (clnt) \ - clnt_destroy(clnt); \ -} - -/* - * Own version of svc_sendreply that checks the integrity of the transport - * handle and so prevents us from core dumps in the real svc_sendreply() - */ -void -mdmn_svc_sendreply(SVCXPRT *transp, xdrproc_t xdr, caddr_t data) -{ - if (SVC_STAT(transp) == XPRT_DIED) { - commd_debug(MD_MMV_MISC, - "mdmn_svc_sendreply: XPRT_DIED\n"); - return; - } - (void) svc_sendreply(transp, xdr, data); -} - -/* - * timeout_initiator(set, class) - * - * Alas, I sent a message and didn't get a response back in aproppriate time. - * - * timeout_initiator() takes care for doing the needed svc_sendreply() to the - * calling mdmn_send_message, so that guy doesn't wait forever - * What is done here is pretty much the same as what is done in - * wakeup initiator. The difference is that we cannot provide for any results, - * of course and we set the comm_state to MDMNE_TIMEOUT. - * - * By doing so, mdmn_send_message can decide if a retry would make sense or not. - * It's not our's to decide that here. - */ -void -timeout_initiator(set_t setno, md_mn_msgclass_t class) -{ - SVCXPRT *transp; - md_mn_msgid_t mid; - md_mn_result_t *resultp; - - resultp = Zalloc(sizeof (md_mn_result_t)); - resultp->mmr_comm_state = MDMNE_TIMEOUT; - - commd_debug(MD_MMV_MISC, - "timeout_initiator set = %d, class = %d\n", setno, class); - - transp = mdmn_get_initiator_table_transp(setno, class); - mdmn_get_initiator_table_id(setno, class, &mid); - - commd_debug(MD_MMV_MISC, "timeout_ini: (%d, 0x%llx-%d)\n", - MSGID_ELEMS(mid)); - /* - * Give the result the corresponding msgid from the failed message. - */ - MSGID_COPY(&mid, &(resultp->mmr_msgid)); - - /* return to mdmn_send_message() and let it deal with the situation */ - mdmn_svc_sendreply(transp, xdr_md_mn_result_t, (char *)resultp); - - free(resultp); - commd_debug(MD_MMV_MISC, "timeout_ini: sendreplied\n"); - svc_done(transp); - mdmn_unregister_initiator_table(setno, class); -} - - -/* - * check_timeouts - thread - * - * This implements a timeout surveillance for messages sent from the - * initiator to the master. - * - * If a message is started, this thread is triggered thru - * cond_signal(&check_timeout_cv) and we keep track of the numbers of - * messages that are outstanding (messages_on_their_way). - * - * As long as there are messages on their way, this thread never goes to sleep. - * It'll keep checking all class/set combinations for outstanding messages. - * If one is found, it's checked if this message is overdue. In that case, - * timeout_initiator() is called to wakeup the calling mdmn_send_message and - * to clean up the mess. - * - * If the result from the master arrives later, this message is considered - * to be unsolicited. And will be ignored. - */ - -void -check_timeouts() -{ - set_t setno; - time_t now, then; - mutex_t *mx; - md_mn_msgclass_t class; - - for (; ; ) { - now = time((time_t *)NULL); - for (setno = 1; setno < MD_MAXSETS; setno++) { - if (md_mn_set_inited[setno] != MDMN_SET_READY) { - continue; - } - for (class = MD_MSG_CLASS1; class < MD_MN_NCLASSES; - class++) { - mx = mdmn_get_initiator_table_mx(setno, class); - (void) mutex_lock(mx); - - /* then is the registered time */ - then = - mdmn_get_initiator_table_time(setno, class); - if ((then != 0) && (now > then)) { - timeout_initiator(setno, class); - } - (void) mutex_unlock(mx); - } - } - /* it's ok to check only once per second */ - (void) sleep(1); - - /* is there work to do? */ - (void) mutex_lock(&check_timeout_mutex); - if (messages_on_their_way == 0) { - (void) cond_wait(&check_timeout_cv, - &check_timeout_mutex); - } - (void) mutex_unlock(&check_timeout_mutex); - } -} - -void -setup_debug(void) -{ - char *tmp_dir; - - /* Read in the debug-controlling tokens from runtime.cf */ - md_commd_global_verb = commd_get_verbosity(); - /* - * If the user didn't specify a verbosity level in runtime.cf - * we can safely return here. As we don't intend to printout - * debug messages, we don't need to check for the output file. - */ - if (md_commd_global_verb == 0) { - return; - } - - /* if commdout is non-NULL it is an open FILE, we'd better close it */ - if (commdout != (FILE *)NULL) { - (void) fclose(commdout); - } - - commdoutfile = commd_get_outfile(); - - /* setup the debug output */ - if (commdoutfile == (char *)NULL) { - /* if no valid file was specified, use the default */ - commdoutfile = "/var/run/commd.out"; - commdout = fopen(commdoutfile, "a"); - } else { - /* check if the directory exists and is writable */ - tmp_dir = strdup(commdoutfile); - if ((access(dirname(tmp_dir), X_OK|W_OK)) || - ((commdout = fopen(commdoutfile, "a")) == (FILE *)NULL)) { - syslog(LOG_ERR, - "Can't write to specified output file %s,\n" - "using /var/run/commd.out instead\n", commdoutfile); - free(commdoutfile); - commdoutfile = "/var/run/commd.out"; - commdout = fopen(commdoutfile, "a"); - } - free(tmp_dir); - } - - if (commdout == (FILE *)NULL) { - syslog(LOG_ERR, "Can't write to debug output file %s\n", - commdoutfile); - } -} - -/* - * mdmn_is_node_dead checks to see if a node is dead using - * the SunCluster infrastructure which is a stable interface. - * If unable to contact SunCuster the node is assumed to be alive. - * Return values: - * 1 - node is dead - * 0 - node is alive - */ -int -mdmn_is_node_dead(md_mnnode_desc *node) -{ - char *fmt = "/usr/cluster/bin/scha_cluster_get -O NODESTATE_NODE "; - char *cmd; - size_t size; - char buf[10]; - FILE *ptr; - int retval = 0; - - /* I know that I'm alive */ - if (strcmp(node->nd_nodename, mynode()) == 0) - return (retval); - - size = strlen(fmt) + strlen(node->nd_nodename) + 1; - cmd = Zalloc(size); - (void) strlcat(cmd, fmt, size); - (void) strlcat(cmd, node->nd_nodename, size); - - if ((ptr = popen(cmd, "r")) != NULL) { - if (fgets(buf, sizeof (buf), ptr) != NULL) { - /* If scha_cluster_get returned DOWN - return dead */ - if (strncmp(buf, "DOWN", 4) == 0) - retval = 1; - } - (void) pclose(ptr); - } - Free(cmd); - return (retval); -} - -/* - * global_init() - * - * Perform some global initializations. - * - * the following routines have to call this before operation can start: - * - mdmn_send_svc_2 - * - mdmn_work_svc_2 - * - mdmn_comm_lock_svc_2 - * - mdmn_comm_unlock_svc_2 - * - mdmn_comm_suspend_svc_2 - * - mdmn_comm_resume_svc_2 - * - mdmn_comm_reinit_set_svc_2 - * - * This is a single threaded daemon, so it can only be in one of the above - * routines at the same time. - * This means, global_init() cannot be called more than once at the same time. - * Hence, no lock is needed. - */ -void -global_init(void) -{ - set_t set; - md_mn_msgclass_t class; - struct sigaction sighandler; - time_t clock_val; - struct rlimit commd_limit; - - - - /* Do these global initializations only once */ - if (md_commd_global_state & MD_CGS_INITED) { - return; - } - (void) sdssc_bind_library(); - - /* setup the debug options from the config file */ - setup_debug(); - - /* make sure that we don't run out of file descriptors */ - commd_limit.rlim_cur = commd_limit.rlim_max = RLIM_INFINITY; - if (setrlimit(RLIMIT_NOFILE, &commd_limit) != 0) { - syslog(LOG_WARNING, gettext("setrlimit failed." - "Could not increase the max file descriptors")); - } - - /* Make setup_debug() be the action in case of SIGHUP */ - sighandler.sa_flags = 0; - (void) sigfillset(&sighandler.sa_mask); - sighandler.sa_handler = (void (*)(int)) setup_debug; - (void) sigaction(SIGHUP, &sighandler, NULL); - - __savetime = gethrtime(); - (void) time(&clock_val); - commd_debug(MD_MMV_MISC, "global init called %s\n", ctime(&clock_val)); - - /* start a thread that flushes out the debug on a regular basis */ - (void) thr_create(NULL, 0, (void *(*)(void *))flush_fcout, - (void *) NULL, THR_DETACHED, NULL); - - /* global rwlock's / mutex's / cond_t's go here */ - (void) mutex_init(&check_timeout_mutex, USYNC_THREAD, NULL); - (void) cond_init(&check_timeout_cv, USYNC_THREAD, NULL); - (void) mutex_init(&get_setdesc_mutex, USYNC_THREAD, NULL); - - /* Make sure the initiator table is initialized correctly */ - for (set = 0; set < MD_MAXSETS; set++) { - for (class = 0; class < MD_MN_NCLASSES; class++) { - mdmn_unregister_initiator_table(set, class); - } - } - - - /* setup the check for timeouts */ - (void) thr_create(NULL, 0, (void *(*)(void *))check_timeouts, - (void *) NULL, THR_DETACHED, NULL); - - md_commd_global_state |= MD_CGS_INITED; -} - - -/* - * mdmn_init_client(setno, nodeid) - * called if client[setno][nodeid] is NULL - * - * NOTE: Must be called with set_desc_rwlock held as a reader - * NOTE: Must be called with client_rwlock held as a writer - * - * If the rpc client for this node has not been setup for any set, we do it now. - * - * Returns 0 on success (node found in set, rpc client setup) - * -1 if metaget_setdesc failed, - * -2 if node not part of set - * -3 if clnt_create fails - */ -static int -mdmn_init_client(set_t setno, md_mn_nodeid_t nid) -{ - md_error_t ep = mdnullerror; - md_mnnode_desc *node; - md_set_desc *sd; /* just an abbr for set_descriptor[setno] */ - - sd = set_descriptor[setno]; - - /* - * Is the appropriate set_descriptor already initialized ? - * Can't think of a scenario where this is not the case, but we'd better - * check for it anyway. - */ - if (sd == NULL) { - mdsetname_t *sp; - - /* readlock -> writelock */ - (void) rw_unlock(&set_desc_rwlock[setno]); - (void) rw_wrlock(&set_desc_rwlock[setno]); - sp = metasetnosetname(setno, &ep); - /* Only one thread is supposed to be in metaget_setdesc() */ - (void) mutex_lock(&get_setdesc_mutex); - sd = metaget_setdesc(sp, &ep); - (void) mutex_unlock(&get_setdesc_mutex); - if (sd == NULL) { - /* back to ... */ - (void) rw_unlock(&set_desc_rwlock[setno]); - /* ... readlock */ - (void) rw_rdlock(&set_desc_rwlock[setno]); - return (-1); - } - set_descriptor[setno] = sd; - /* back to readlock */ - (void) rw_unlock(&set_desc_rwlock[setno]); - (void) rw_rdlock(&set_desc_rwlock[setno]); - } - - /* first we have to find the node name for this node id */ - for (node = sd->sd_nodelist; node; node = node->nd_next) { - if (node->nd_nodeid == nid) - break; /* we found our node in this set */ - } - - - if (node == (md_mnnode_desc *)NULL) { - commd_debug(MD_MMV_SYSLOG, - "FATAL: node %d not found in set %d\n", nid, setno); - (void) rw_unlock(&set_desc_rwlock[setno]); - return (-2); - } - - commd_debug(MD_MMV_INIT, "init: %s has the flags: 0x%x\n", - node->nd_nodename ? node->nd_nodename : "NULL", node->nd_flags); - - /* Did this node join the diskset? */ - if ((node->nd_flags & MD_MN_NODE_OWN) == 0) { - commd_debug(MD_MMV_INIT, "init: %s didn't join set %d\n", - node->nd_nodename ? node->nd_nodename : "NULL", setno); - (void) rw_unlock(&set_desc_rwlock[setno]); - return (-2); - } - - /* if clnt_create has not been done for that node, do it now */ - if (client[setno][nid] == (CLIENT *) NULL) { - time_t tout = 0; - - /* - * While trying to create a connection to a node, - * periodically check to see if the node has been marked - * dead by the SunCluster infrastructure. - * This periodic check is needed since a non-responsive - * rpc.mdcommd (while it is attempting to create a connection - * to a dead node) can lead to large delays and/or failures - * in the reconfig steps. - */ - while ((client[setno][nid] == (CLIENT *) NULL) && - (tout < MD_CLNT_CREATE_TOUT)) { - client[setno][nid] = meta_client_create_retry( - node->nd_nodename, mdmn_clnt_create, - (void *) node, MD_CLNT_CREATE_SUBTIMEOUT, &ep); - /* Is the node dead? */ - if (mdmn_is_node_dead(node) == 1) { - commd_debug(MD_MMV_SYSLOG, - "rpc.mdcommd: no client for dead node %s\n", - node->nd_nodename); - break; - } else - tout += MD_CLNT_CREATE_SUBTIMEOUT; - } - - if (client[setno][nid] == (CLIENT *) NULL) { - clnt_pcreateerror(node->nd_nodename); - (void) rw_unlock(&set_desc_rwlock[setno]); - return (-3); - } - /* this node has the license to send */ - commd_debug(MD_MMV_MISC, "init_client: calling add_lic\n"); - add_license(node); - - /* set the timeout value */ - clnt_control(client[setno][nid], CLSET_TIMEOUT, - (char *)&FOUR_SECS); - - } - (void) rw_unlock(&set_desc_rwlock[setno]); - return (0); -} - -/* - * check_client(setno, nodeid) - * - * must be called with reader lock held for set_desc_rwlock[setno] - * and must be called with reader lock held for client_rwlock[setno] - * Checks if the client for this set/node combination is already setup - * if not it upgrades the lock to a writer lock - * and tries to initialize the client. - * Finally it's checked if the client nulled out again due to some race - * - * returns 0 if there is a usable client - * returns MDMNE_RPC_FAIL otherwise - */ -static int -check_client(set_t setno, md_mn_nodeid_t nodeid) -{ - int ret = 0; - - while ((client[setno][nodeid] == (CLIENT *)NULL) && (ret == 0)) { - /* upgrade reader ... */ - (void) rw_unlock(&client_rwlock[setno]); - /* ... to writer lock. */ - (void) rw_wrlock(&client_rwlock[setno]); - if (mdmn_init_client(setno, nodeid) != 0) { - ret = MDMNE_RPC_FAIL; - } - /* downgrade writer ... */ - (void) rw_unlock(&client_rwlock[setno]); - /* ... back to reader lock. */ - (void) rw_rdlock(&client_rwlock[setno]); - } - return (ret); -} - -/* - * mdmn_init_set(setno, todo) - * setno is the number of the set to be initialized. - * todo is one of the MDMN_SET_* thingies or MDMN_SET_READY - * If called with MDMN_SET_READY everything is initialized. - * - * If the set mutexes are already initialized, the caller has to hold - * both set_desc_rwlock[setno] and client_rwlock[setno] as a writer, before - * calling mdmn_init_set() - */ -int -mdmn_init_set(set_t setno, int todo) -{ - int class; - md_mnnode_desc *node; - md_set_desc *sd; /* just an abbr for set_descriptor[setno] */ - mdsetname_t *sp; - md_error_t ep = mdnullerror; - md_mn_nodeid_t nid; - - /* - * Check if we are told to setup the mutexes and - * if these are not yet setup - */ - if ((todo & MDMN_SET_MUTEXES) && - ((md_mn_set_inited[setno] & MDMN_SET_MUTEXES) == 0)) { - (void) mutex_init(&mdmn_busy_mutex[setno], USYNC_THREAD, NULL); - (void) cond_init(&mdmn_busy_cv[setno], USYNC_THREAD, NULL); - (void) rwlock_init(&client_rwlock[setno], USYNC_THREAD, NULL); - (void) rwlock_init(&set_desc_rwlock[setno], USYNC_THREAD, NULL); - - for (class = MD_MSG_CLASS1; class < MD_MN_NCLASSES; class++) { - (void) mutex_init(mdmn_get_master_table_mx(setno, - class), USYNC_THREAD, NULL); - (void) cond_init(mdmn_get_master_table_cv(setno, class), - USYNC_THREAD, NULL); - (void) mutex_init(mdmn_get_initiator_table_mx(setno, - class), USYNC_THREAD, NULL); - } - md_mn_set_inited[setno] |= MDMN_SET_MUTEXES; - } - if ((todo & MDMN_SET_MCT) && - ((md_mn_set_inited[setno] & MDMN_SET_MCT) == 0)) { - int fd; - size_t filesize; - caddr_t addr; - char table_name[32]; - struct flock fl; - - filesize = (sizeof (md_mn_mct_t)); - (void) snprintf(table_name, sizeof (table_name), "%s%d", - MD_MN_MSG_COMP_TABLE, setno); - /* - * If the mct file exists we map it into memory. - * Otherwise we create an empty file of appropriate - * size and map that into memory. - * The mapped areas are stored in mct[setno]. - */ - fd = open(table_name, O_RDWR|O_CREAT|O_DSYNC, 0600); - if (fd < 0) { - commd_debug(MD_MMV_MISC, - "init_set: Can't open MCT\n"); - return (-1); - } - /* - * Ensure that we are the only process that has this file - * mapped. If another instance of rpc.mdcommd has beaten us - * then we display the failing process and attempt to terminate - * it. The next call of this routine should establish us as - * the only rpc.mdcommd on the system. - */ - (void) memset(&fl, 0, sizeof (fl)); - fl.l_type = F_WRLCK; - fl.l_whence = SEEK_SET; - fl.l_start = 0; - fl.l_len = filesize + 1; - - if (fcntl(fd, F_SETLK, &fl) == -1) { - commd_debug(MD_MMV_SYSLOG, - "init_set: Cannot lock MCT '%s'\n", table_name); - if (fcntl(fd, F_GETLK, &fl) != -1) { - commd_debug(MD_MMV_SYSLOG, "rpc.mdcommd:" - "Process %d holds lock\n", fl.l_pid); - (void) close(fd); - } else { - commd_debug(MD_MMV_SYSLOG, "rpc.mdcommd:" - "F_GETLK failed\n"); - (void) close(fd); - return (-1); - } - - /* - * Try to terminate other mdcommd process so that we - * can establish ourselves. - */ - if (sigsend(P_PID, fl.l_pid, 0) == 0) { - if (sigsend(P_PID, fl.l_pid, SIGKILL) < 0) { - commd_debug(MD_MMV_SYSLOG, - "rpc.mdcommd:" - "SIGKILL of %d failed\n", fl.l_pid); - } else { - commd_debug(MD_MMV_SYSLOG, - "rpc.mdcommd:" - "Process %d killed\n", fl.l_pid); - } - } else { - commd_debug(MD_MMV_SYSLOG, "rpc.mdcommd:" - "Process %d not killable\n", fl.l_pid); - } - return (-1); - } - /* - * To ensure that the file has the appropriate size, - * we write a byte at the end of the file. - */ - (void) lseek(fd, filesize + 1, SEEK_SET); - (void) write(fd, "\0", 1); - - /* at this point we have a file in place that we can mmap */ - addr = mmap(0, filesize, PROT_READ | PROT_WRITE, - MAP_SHARED, fd, (off_t)0); - if (addr == MAP_FAILED) { - commd_debug(MD_MMV_INIT, - "init_set: mmap mct error %d\n", - errno); - return (-1); - } - /* LINTED pointer alignment */ - mct[setno] = (md_mn_mct_t *)addr; - - /* finally we initialize the mutexes that protect the mct */ - for (class = MD_MSG_CLASS1; class < MD_MN_NCLASSES; class++) { - (void) mutex_init(&(mct_mutex[setno][class]), - USYNC_THREAD, NULL); - } - - md_mn_set_inited[setno] |= MDMN_SET_MCT; - } - /* - * Check if we are told to setup the nodes and - * if these are not yet setup - * (Attention: negative logic here compared to above!) - */ - if (((todo & MDMN_SET_NODES) == 0) || - (md_mn_set_inited[setno] & MDMN_SET_NODES)) { - return (0); /* success */ - } - - if ((sp = metasetnosetname(setno, &ep)) == NULL) { - commd_debug(MD_MMV_SYSLOG, - "metasetnosetname(%d) returned NULL\n", setno); - return (MDMNE_NOT_JOINED); - } - - /* flush local copy of rpc.metad data */ - metaflushsetname(sp); - - (void) mutex_lock(&get_setdesc_mutex); - sd = metaget_setdesc(sp, &ep); - (void) mutex_unlock(&get_setdesc_mutex); - - if (sd == NULL) { - commd_debug(MD_MMV_SYSLOG, - "metaget_setdesc(%d) returned NULL\n", setno); - return (MDMNE_NOT_JOINED); - } - - /* - * if this set is not a multinode set or - * this node didn't join yet the diskset, better don't do anything - */ - if ((MD_MNSET_DESC(sd) == 0) || - (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN) == 0) { - commd_debug(MD_MMV_INIT, "didn't yet join set %d\n", setno); - return (MDMNE_NOT_JOINED); - } - - for (node = sd->sd_nodelist; node != NULL; node = node->nd_next) { - time_t tout = 0; - nid = node->nd_nodeid; - - commd_debug(MD_MMV_INIT, - "setting up: node=%s, priv_ic=%s, flags=0x%x\n", - node->nd_nodename ? node->nd_nodename : "NULL", - node->nd_priv_ic ? node->nd_priv_ic : "NULL", - node->nd_flags); - - if ((node->nd_flags & MD_MN_NODE_OWN) == 0) { - commd_debug(MD_MMV_INIT, - "init: %s didn't join set %d\n", - node->nd_nodename ? node->nd_nodename : "NULL", - setno); - continue; - } - - if (client[setno][nid] != (CLIENT *) NULL) { - /* already inited */ - commd_debug(MD_MMV_INIT, "init: already: node=%s\n", - node->nd_nodename ? node->nd_nodename : "NULL"); - continue; - } - - /* - * While trying to create a connection to a node, - * periodically check to see if the node has been marked - * dead by the SunCluster infrastructure. - * This periodic check is needed since a non-responsive - * rpc.mdcommd (while it is attempting to create a connection - * to a dead node) can lead to large delays and/or failures - * in the reconfig steps. - */ - while ((client[setno][nid] == (CLIENT *) NULL) && - (tout < MD_CLNT_CREATE_TOUT)) { - client[setno][nid] = meta_client_create_retry( - node->nd_nodename, mdmn_clnt_create, - (void *) node, MD_CLNT_CREATE_SUBTIMEOUT, &ep); - /* Is the node dead? */ - if (mdmn_is_node_dead(node) == 1) { - commd_debug(MD_MMV_SYSLOG, - "rpc.mdcommd: no client for dead node %s\n", - node->nd_nodename); - break; - } else - tout += MD_CLNT_CREATE_SUBTIMEOUT; - } - - if (client[setno][nid] == (CLIENT *) NULL) { - clnt_pcreateerror(node->nd_nodename); - /* - * If we cannot connect to a single node - * (maybe because it is down) we mark this node as not - * owned and continue with the next node in the list. - * This is better than failing the entire starting up - * of the commd system. - */ - node->nd_flags &= ~MD_MN_NODE_OWN; - commd_debug(MD_MMV_SYSLOG, - "WARNING couldn't create client for %s\n" - "Reconfig cycle required\n", - node->nd_nodename); - commd_debug(MD_MMV_INIT, - "WARNING couldn't create client for %s\n" - "Reconfig cycle required\n", - node->nd_nodename); - continue; - } - /* this node has the license to send */ - commd_debug(MD_MMV_MISC, "init_set: calling add_lic\n"); - add_license(node); - - /* set the timeout value */ - clnt_control(client[setno][nid], CLSET_TIMEOUT, - (char *)&FOUR_SECS); - - commd_debug(MD_MMV_INIT, "init: done: node=%s\n", - node->nd_nodename ? node->nd_nodename : "NULL"); - } - - set_descriptor[setno] = sd; - md_mn_set_inited[setno] |= MDMN_SET_NODES; - return (0); /* success */ -} - -void * -mdmn_send_to_work(void *arg) -{ - int *rpc_err = NULL; - int success; - int try_master; - set_t setno; - mutex_t *mx; /* protection for initiator_table */ - SVCXPRT *transp; - md_mn_msg_t *msg; - md_mn_nodeid_t set_master; - md_mn_msgclass_t class; - md_mn_msg_and_transp_t *matp = (md_mn_msg_and_transp_t *)arg; - - msg = matp->mat_msg; - transp = matp->mat_transp; - - class = mdmn_get_message_class(msg->msg_type); - setno = msg->msg_setno; - - /* set the sender, so the master knows who to send the results */ - (void) rw_rdlock(&set_desc_rwlock[setno]); - msg->msg_sender = set_descriptor[setno]->sd_mn_mynode->nd_nodeid; - set_master = set_descriptor[setno]->sd_mn_master_nodeid; - - mx = mdmn_get_initiator_table_mx(setno, class); - (void) mutex_lock(mx); - - /* - * Here we check, if the initiator table slot for this set/class - * combination is free to use. - * If this is not the case, we return CLASS_BUSY forcing the - * initiating send_message call to retry - */ - success = mdmn_check_initiator_table(setno, class); - if (success == MDMNE_CLASS_BUSY) { - md_mn_msgid_t active_mid; - - mdmn_get_initiator_table_id(setno, class, &active_mid); - - commd_debug(MD_MMV_SEND, - "send_to_work: received but locally busy " - "(%d, 0x%llx-%d), set=%d, class=%d, type=%d, " - "active msg=(%d, 0x%llx-%d)\n", - MSGID_ELEMS(msg->msg_msgid), setno, class, - msg->msg_type, MSGID_ELEMS(active_mid)); - } else { - commd_debug(MD_MMV_SEND, - "send_to_work: received (%d, 0x%llx-%d), " - "set=%d, class=%d, type=%d\n", - MSGID_ELEMS(msg->msg_msgid), setno, class, msg->msg_type); - } - - try_master = 2; /* return failure after two retries */ - while ((success == MDMNE_ACK) && (try_master--)) { - (void) rw_rdlock(&client_rwlock[setno]); - /* is the rpc client to the master still around ? */ - if (check_client(setno, set_master)) { - success = MDMNE_RPC_FAIL; - FLUSH_DEBUGFILE(); - (void) rw_unlock(&client_rwlock[setno]); - break; /* out of try_master-loop */ - } - - /* - * Send the request to the work function on the master - * this call will return immediately - */ - rpc_err = mdmn_work_2(msg, client[setno][set_master], - set_master); - - /* Everything's Ok? */ - if (rpc_err == NULL) { - success = MDMNE_RPC_FAIL; - /* - * Probably something happened to the daemon on the - * master. Kill the client, and try again... - */ - (void) rw_unlock(&client_rwlock[setno]); - (void) rw_wrlock(&client_rwlock[setno]); - mdmn_clnt_destroy(client[setno][set_master]); - if (client[setno][set_master] != (CLIENT *)NULL) { - client[setno][set_master] = (CLIENT *)NULL; - } - (void) rw_unlock(&client_rwlock[setno]); - continue; - - } else if (*rpc_err != MDMNE_ACK) { - /* something went wrong, break out */ - success = *rpc_err; - free(rpc_err); - (void) rw_unlock(&client_rwlock[setno]); - break; /* out of try_master-loop */ - } - - (void) rw_unlock(&client_rwlock[setno]); - free(rpc_err); - - /* - * If we are here, we sucessfully delivered the message. - * We register the initiator_table, so that - * wakeup_initiator_2 can do the sendreply with the - * results for us. - */ - success = MDMNE_ACK; - mdmn_register_initiator_table(setno, class, msg, transp); - - /* tell check_timeouts, there's work to do */ - (void) mutex_lock(&check_timeout_mutex); - messages_on_their_way++; - (void) cond_signal(&check_timeout_cv); - (void) mutex_unlock(&check_timeout_mutex); - break; /* out of try_master-loop */ - } - - (void) rw_unlock(&set_desc_rwlock[setno]); - - if (success == MDMNE_ACK) { - commd_debug(MD_MMV_SEND, - "send_to_work: registered (%d, 0x%llx-%d)\n", - MSGID_ELEMS(msg->msg_msgid)); - } else { - /* In case of failure do the sendreply now */ - md_mn_result_t *resultp; - resultp = Zalloc(sizeof (md_mn_result_t)); - resultp->mmr_comm_state = success; - /* - * copy the MSGID so that we know _which_ message - * failed (if the transp has got mangled) - */ - MSGID_COPY(&(msg->msg_msgid), &(resultp->mmr_msgid)); - mdmn_svc_sendreply(transp, xdr_md_mn_result_t, (char *)resultp); - commd_debug(MD_MMV_SEND, - "send_to_work: not registered (%d, 0x%llx-%d) cs=%d\n", - MSGID_ELEMS(msg->msg_msgid), success); - free_result(resultp); - /* - * We don't have a timeout registered to wake us up, so we're - * now done with this handle. Release it back to the pool. - */ - svc_done(transp); - - } - - free_msg(msg); - /* the alloc was done in mdmn_send_svc_2 */ - Free(matp); - (void) mutex_unlock(mx); - return (NULL); - -} - -/* - * do_message_locally(msg, result) - * Process a message locally on the master - * Lookup the MCT if the message has already been processed. - * If not, call the handler and store the result - * If yes, retrieve the result from the MCT. - * Return: - * MDMNE_ACK in case of success - * MDMNE_LOG_FAIL if the MCT could not be checked - */ -static int -do_message_locally(md_mn_msg_t *msg, md_mn_result_t *result) -{ - int completed; - set_t setno; - md_mn_msgtype_t msgtype = msg->msg_type; - md_mn_msgclass_t class; - - void (*handler)(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *res); - - handler = mdmn_get_handler(msgtype); - if (handler == NULL) { - result->mmr_exitval = 0; - /* let the sender decide if this is an error or not */ - result->mmr_comm_state = MDMNE_NO_HANDLER; - return (MDMNE_NO_HANDLER); - } - - class = mdmn_get_message_class(msg->msg_type); - setno = msg->msg_setno; - - result->mmr_msgtype = msgtype; - result->mmr_flags = msg->msg_flags; - MSGID_COPY(&(msg->msg_msgid), &(result->mmr_msgid)); - - (void) mutex_lock(&mct_mutex[setno][class]); - completed = mdmn_check_completion(msg, result); - if (completed == MDMN_MCT_NOT_DONE) { - /* message not yet processed locally */ - commd_debug(MD_MMV_PROC_M, "proc_mas: " - "calling handler for (%d,0x%llx-%d) type %d\n", - MSGID_ELEMS(msg->msg_msgid), msgtype); - - /* - * Mark the message as being currently processed, - * so we won't start a second handler for it - */ - (void) mdmn_mark_completion(msg, NULL, MDMN_MCT_IN_PROGRESS); - (void) mutex_unlock(&mct_mutex[setno][class]); - - /* here we actually process the message on the master */ - (*handler)(msg, MD_MSGF_ON_MASTER, result); - - commd_debug(MD_MMV_PROC_M, "proc_mas: " - "finished handler for (%d,0x%llx-%d) type %d\n", - MSGID_ELEMS(msg->msg_msgid), msgtype); - - /* Mark the message as fully processed, store the result */ - (void) mutex_lock(&mct_mutex[setno][class]); - (void) mdmn_mark_completion(msg, result, MDMN_MCT_DONE); - } else if (completed == MDMN_MCT_DONE) { - commd_debug(MD_MMV_PROC_M, "proc_mas: " - "result for (%d, 0x%llx-%d) from MCT\n", - MSGID_ELEMS(msg->msg_msgid), msgtype); - } else if (completed == MDMN_MCT_IN_PROGRESS) { - commd_debug(MD_MMV_PROC_M, "proc_mas: " - "(%d, 0x%llx-%d) is currently being processed\n", - MSGID_ELEMS(msg->msg_msgid), msgtype); - } else { - /* MCT error occurred (should never happen) */ - (void) mutex_unlock(&mct_mutex[setno][class]); - result->mmr_comm_state = MDMNE_LOG_FAIL; - commd_debug(MD_MMV_SYSLOG, "WARNING " - "mdmn_check_completion returned %d " - "for (%d,0x%llx-%d)\n", completed, - MSGID_ELEMS(msg->msg_msgid)); - return (MDMNE_LOG_FAIL); - } - (void) mutex_unlock(&mct_mutex[setno][class]); - return (MDMNE_ACK); - -} - -/* - * do_send_message(msg, node) - * - * Send a message to a given node and wait for a acknowledgment, that the - * message has arrived on the remote node. - * Make sure that the client for the set is setup correctly. - * If no ACK arrives, destroy and recreate the RPC client and retry the - * message one time - * After actually sending wait no longer than the appropriate number of - * before timing out the message. - * - * Note must be called with set_desc_wrlock held in reader mode - */ -static int -do_send_message(md_mn_msg_t *msg, md_mnnode_desc *node) -{ - int err; - int rpc_retries; - int timeout_retries = 0; - int *ret = NULL; - set_t setno; - cond_t *cv; /* see mdmn_wakeup_master_svc_2 */ - mutex_t *mx; /* protection for class_busy */ - timestruc_t timeout; /* surveillance for remote daemon */ - md_mn_nodeid_t nid; - md_mn_msgtype_t msgtype; - md_mn_msgclass_t class; - - nid = node->nd_nodeid; - msgtype = msg->msg_type; - setno = msg->msg_setno; - class = mdmn_get_message_class(msgtype); - mx = mdmn_get_master_table_mx(setno, class); - cv = mdmn_get_master_table_cv(setno, class); - -retry_rpc: - - /* We try two times to send the message */ - rpc_retries = 2; - - /* - * if sending the message doesn't succeed the first time due to a - * RPC problem, we retry one time - */ - while ((rpc_retries != 0) && (ret == NULL)) { - /* in abort state, we error out immediately */ - if (md_commd_global_state & MD_CGS_ABORTED) { - return (MDMNE_ABORT); - } - - (void) rw_rdlock(&client_rwlock[setno]); - /* unable to create client? Ignore it */ - if (check_client(setno, nid)) { - /* - * In case we cannot establish an RPC client, we - * take this node out of our considerations. - * This will be reset by a reconfig - * cycle that should come pretty soon. - * MNISSUE: Should a reconfig cycle - * be forced on SunCluster? - */ - node->nd_flags &= ~MD_MN_NODE_OWN; - commd_debug(MD_MMV_SYSLOG, - "WARNING couldn't create client for %s\n" - "Reconfig cycle required\n", - node->nd_nodename); - commd_debug(MD_MMV_PROC_M, "proc_mas: (%d,0x%llx-%d) " - "WARNING couldn't create client for %s\n", - MSGID_ELEMS(msg->msg_msgid), node->nd_nodename); - (void) rw_unlock(&client_rwlock[setno]); - return (MDMNE_IGNORE_NODE); - } - /* let's be paranoid and check again before sending */ - if (client[setno][nid] == NULL) { - /* - * if this is true, strange enough, we catch our breath, - * and then continue, so that the client is set up - * once again. - */ - commd_debug(MD_MMV_PROC_M, "client is NULL\n"); - (void) rw_unlock(&client_rwlock[setno]); - (void) sleep(1); - continue; - } - - /* send it over, it will return immediately */ - ret = mdmn_work_2(msg, client[setno][nid], nid); - - (void) rw_unlock(&client_rwlock[setno]); - - if (ret != NULL) { - commd_debug(MD_MMV_PROC_M, - "proc_mas: sending (%d,0x%llx-%d) to %d returned " - " 0x%x\n", - MSGID_ELEMS(msg->msg_msgid), nid, *ret); - } else { - commd_debug(MD_MMV_PROC_M, - "proc_mas: sending (%d,0x%llx-%d) to %d returned " - " NULL \n", - MSGID_ELEMS(msg->msg_msgid), nid); - } - - if ((ret == NULL) || (*ret == MDMNE_CANNOT_CONNECT) || - (*ret == MDMNE_THR_CREATE_FAIL)) { - /* - * Something happened to the daemon on the other side. - * Kill the client, and try again. - * check_client() will create a new client - */ - (void) rw_wrlock(&client_rwlock[setno]); - mdmn_clnt_destroy(client[setno][nid]); - if (client[setno][nid] != (CLIENT *)NULL) { - client[setno][nid] = (CLIENT *)NULL; - } - (void) rw_unlock(&client_rwlock[setno]); - - /* ... but don't try infinitely */ - --rpc_retries; - continue; - } - /* - * If the class is locked on the other node, keep trying. - * This situation will go away automatically, - * if we wait long enough - */ - if (*ret == MDMNE_CLASS_LOCKED) { - (void) sleep(1); - free(ret); - ret = NULL; - continue; - } - } - if (ret == NULL) { - return (MDMNE_RPC_FAIL); - } - - - /* if the slave is in abort state, we just ignore it. */ - if (*ret == MDMNE_ABORT) { - commd_debug(MD_MMV_PROC_M, - "proc_mas: work(%d,0x%llx-%d) returned " - "MDMNE_ABORT\n", - MSGID_ELEMS(msg->msg_msgid)); - free(ret); - return (MDMNE_IGNORE_NODE); - } - - /* Did the remote processing succeed? */ - if (*ret != MDMNE_ACK) { - /* - * Some commd failure in the middle of sending the msg - * to the nodes. We don't continue here. - */ - commd_debug(MD_MMV_PROC_M, - "proc_mas: work(%d,0x%llx-%d) returns %d\n", - MSGID_ELEMS(msg->msg_msgid), *ret); - free(ret); - return (MDMNE_RPC_FAIL); - } - free(ret); - ret = NULL; - - /* - * When we are here, we have sent the message to the other node and - * we know that node has accepted it. - * We go to sleep and have trust to be woken up by wakeup. - * If we wakeup due to a timeout, or a signal, no result has been - * placed in the appropriate slot. - * If we timeout, it is likely that this is because the node has - * gone away, so we will destroy the client and try it again in the - * expectation that the rpc will fail and we will return - * MDMNE_IGNORE_NODE. If that is not the case, the message must still - * be being processed on the slave. In this case just timeout for 4 - * more seconds and then return RPC_FAIL if the message is not complete. - */ - timeout.tv_nsec = 0; - timeout.tv_sec = (timeout_retries == 0) ? mdmn_get_timeout(msgtype) : - FOUR_SECS.tv_sec; - err = cond_reltimedwait(cv, mx, &timeout); - - if (err == 0) { - /* everything's fine, return success */ - return (MDMNE_ACK); - } - - if (err == ETIME) { - commd_debug(MD_MMV_PROC_M, "proc_mas: " - "timeout occured, set=%d, class=%d, " - "msgid=(%d, 0x%llx-%d), timeout_retries=%d\n", - setno, class, MSGID_ELEMS(msg->msg_msgid), timeout_retries); - if (timeout_retries == 0) { - timeout_retries++; - /* - * Destroy the client and try the rpc call again - */ - (void) rw_wrlock(&client_rwlock[setno]); - mdmn_clnt_destroy(client[setno][nid]); - client[setno][nid] = (CLIENT *)NULL; - (void) rw_unlock(&client_rwlock[setno]); - goto retry_rpc; - } - } else if (err == EINTR) { - commd_debug(MD_MMV_PROC_M, "proc_mas: " - "commd signalled, set=%d, class=%d, " - "msgid=(%d, 0x%llx-%d)\n", - setno, class, MSGID_ELEMS(msg->msg_msgid)); - } else { - commd_debug(MD_MMV_PROC_M, "proc_mas: " - "cond_reltimedwait err=%d, set=%d, " - "class=%d, msgid=(%d, 0x%llx-%d)\n", - err, setno, class, - MSGID_ELEMS(msg->msg_msgid)); - } - - /* some failure happened */ - return (MDMNE_RPC_FAIL); -} - -/* - * before we return we have to - * free_msg(msg); because we are working on a copied message - */ -void -mdmn_master_process_msg(md_mn_msg_t *msg) -{ - int *ret; - int err; - int nmsgs; /* total number of msgs */ - int curmsg; /* index of current msg */ - set_t setno; - uint_t inherit_flags = 0; - uint_t secdiff, usecdiff; /* runtime of this message */ - md_error_t mde = mdnullerror; - md_mn_msg_t *msglist[MAX_SUBMESSAGES]; /* all msgs to process */ - md_mn_msg_t *cmsg; /* current msg */ - md_mn_msgid_t dummyid; - md_mn_result_t *result; - md_mn_result_t *slave_result; - md_mn_nodeid_t sender; - md_mn_nodeid_t set_master; - md_mnnode_desc *node; - md_mn_msgtype_t orig_type; /* type of the original message */ - md_mn_msgtype_t msgtype; /* type of the current message */ - md_mn_msgclass_t orig_class; /* class of the original message */ - md_mn_msgclass_t class; /* class of the current message */ - - int (*smgen)(md_mn_msg_t *msg, md_mn_msg_t **msglist); - - orig_type = msgtype = msg->msg_type; - sender = msg->msg_sender; - setno = msg->msg_setno; - - result = Zalloc(sizeof (md_mn_result_t)); - result->mmr_setno = setno; - result->mmr_msgtype = msgtype; - MSGID_COPY(&(msg->msg_msgid), &(result->mmr_msgid)); - - orig_class = mdmn_get_message_class(msgtype); - - commd_debug(MD_MMV_PROC_M, - "proc_mas: received (%d, 0x%llx-%d) set=%d, class=%d, type=%d\n", - MSGID_ELEMS(msg->msg_msgid), setno, orig_class, msgtype); - - (void) rw_rdlock(&set_desc_rwlock[setno]); - set_master = set_descriptor[setno]->sd_mn_master_nodeid; - result->mmr_sender = set_master; - /* - * Put message into the change log unless told otherwise - * Note that we only log original messages. - * If they are generated by some smgen, we don't log them! - * Replay messages aren't logged either. - * Note, that replay messages are unlogged on completion. - */ - if ((msg->msg_flags & (MD_MSGF_NO_LOG | MD_MSGF_REPLAY_MSG)) == 0) { - commd_debug(MD_MMV_PROC_M, - "proc_mas: calling log_msg for (%d,0x%llx-%d) type %d\n", - MSGID_ELEMS(msg->msg_msgid), msgtype); - err = mdmn_log_msg(msg); - if (err == MDMNE_NULL) { - /* msg logged successfully */ - commd_debug(MD_MMV_PROC_M, "proc_mas: " - "done log_msg for (%d,0x%llx-%d) type %d\n", - MSGID_ELEMS(msg->msg_msgid), msgtype); - goto proceed; - } - if (err == MDMNE_ACK) { - /* Same msg in the slot, proceed */ - commd_debug(MD_MMV_PROC_M, "proc_mas: " - "already logged (%d,0x%llx-%d) type %d\n", - MSGID_ELEMS(msg->msg_msgid), msgtype); - goto proceed; - } - if (err == MDMNE_LOG_FAIL) { - /* Oh, bad, the log is non functional. */ - result->mmr_comm_state = MDMNE_LOG_FAIL; - /* - * Note that the mark_busy was already done by - * mdmn_work_svc_2() - */ - (void) mutex_lock(&mdmn_busy_mutex[setno]); - mdmn_mark_class_unbusy(setno, orig_class); - (void) mutex_unlock(&mdmn_busy_mutex[setno]); - - } - if (err == MDMNE_CLASS_BUSY) { - /* - * The log is occupied with a different message - * that needs to be played first. - * We reject the current message with MDMNE_CLASS_BUSY - * to the initiator and do not unbusy the set/class, - * because we will proceed with the logged message, - * which has the same set/class combination - */ - result->mmr_comm_state = MDMNE_CLASS_BUSY; - } - ret = (int *)NULL; - (void) rw_rdlock(&client_rwlock[setno]); - - if (check_client(setno, sender)) { - commd_debug(MD_MMV_SYSLOG, - "proc_mas: No client for initiator \n"); - } else { - ret = mdmn_wakeup_initiator_2(result, - client[setno][sender], sender); - } - (void) rw_unlock(&client_rwlock[setno]); - - if (ret == (int *)NULL) { - commd_debug(MD_MMV_SYSLOG, - "proc_mas: couldn't wakeup_initiator \n"); - } else { - if (*ret != MDMNE_ACK) { - commd_debug(MD_MMV_SYSLOG, "proc_mas: " - "wakeup_initiator returned %d\n", *ret); - } - free(ret); - } - free_msg(msg); - - if (err == MDMNE_LOG_FAIL) { - /* we can't proceed here */ - free_result(result); - (void) rw_unlock(&set_desc_rwlock[setno]); - return; - } else if (err == MDMNE_CLASS_BUSY) { - mdmn_changelog_record_t *lr; - lr = mdmn_get_changelogrec(setno, orig_class); - assert(lr != NULL); - - /* proceed with the logged message */ - msg = copy_msg(&(lr->lr_msg), NULL); - - /* - * The logged message has to have the same class but - * type and sender can be different - */ - orig_type = msgtype = msg->msg_type; - sender = msg->msg_sender; - - commd_debug(MD_MMV_PROC_M, - "proc_mas: Got new message from change log: " - "(%d,0x%llx-%d) type %d\n", - MSGID_ELEMS(msg->msg_msgid), msgtype); - - /* continue normal operation with this message */ - } - } - -proceed: - smgen = mdmn_get_submessage_generator(msgtype); - if (smgen == NULL) { - /* no submessages to create, just use the original message */ - msglist[0] = msg; - nmsgs = 1; - } else { - /* some bits are passed on to submessages */ - inherit_flags = msg->msg_flags & MD_MSGF_INHERIT_BITS; - - nmsgs = smgen(msg, msglist); - - /* some settings for the submessages */ - for (curmsg = 0; curmsg < nmsgs; curmsg++) { - cmsg = msglist[curmsg]; - - /* Apply the inherited flags */ - cmsg->msg_flags |= inherit_flags; - - /* - * Make sure the submessage ID is set correctly - * Note: first submessage has mid_smid of 1 (not 0) - */ - cmsg->msg_msgid.mid_smid = curmsg + 1; - - /* need the original class set in msgID (for MCT) */ - cmsg->msg_msgid.mid_oclass = orig_class; - } - - commd_debug(MD_MMV_PROC_M, - "smgen generated %d submsgs, origclass = %d\n", - nmsgs, orig_class); - } - /* - * This big loop does the following. - * For all messages: - * process message on the master first (a message completion - * table MCT ensures a message is not processed twice) - * in case of an error break out of message loop - * for all nodes -- unless MD_MSGF_NO_BCAST is set -- - * send message to node until that succeeds - * merge result -- not yet implemented - * respect MD_MSGF_STOP_ON_ERROR - */ - for (curmsg = 0; curmsg < nmsgs; curmsg++) { - int break_msg_loop = 0; - mutex_t *mx; /* protection for class_busy */ - int master_err; - int master_exitval = -1; - - cmsg = msglist[curmsg]; - msgtype = cmsg->msg_type; - class = mdmn_get_message_class(msgtype); - node = NULL; - mx = mdmn_get_master_table_mx(setno, class); - - /* If we are in the abort state, we error out immediately */ - if (md_commd_global_state & MD_CGS_ABORTED) { - break; /* out of the message loop */ - } - - commd_debug(MD_MMV_PROC_M, "class=%d, orig_class=%d\n", - class, orig_class); - /* - * If the current class is different from the original class, - * we have to lock it down. - * The original class is already marked busy. - * At this point we cannot refuse the message because the - * class is busy right now, so we wait until the class becomes - * available again. As soon as something changes for this set - * we will be cond_signal'ed (in mdmn_mark_class_unbusy) - * - * Granularity could be finer (setno/class) - */ - if (class != orig_class) { - (void) mutex_lock(&mdmn_busy_mutex[setno]); - while (mdmn_mark_class_busy(setno, class) == FALSE) { - (void) cond_wait(&mdmn_busy_cv[setno], - &mdmn_busy_mutex[setno]); - } - (void) mutex_unlock(&mdmn_busy_mutex[setno]); - } - - master_err = do_message_locally(cmsg, result); - - if ((master_err != MDMNE_ACK) || - ((master_err == MDMNE_ACK) && (result->mmr_exitval != 0))) { - result->mmr_failing_node = set_master; - if (cmsg->msg_flags & MD_MSGF_STOP_ON_ERROR) { - /* - * if appropriate, unbusy the class and - * break out of the message loop - */ - if (class != orig_class) { - (void) mutex_lock( - &mdmn_busy_mutex[setno]); - mdmn_mark_class_unbusy(setno, class); - (void) mutex_unlock( - &mdmn_busy_mutex[setno]); - } - break; - } - } - - if (master_err == MDMNE_ACK) - master_exitval = result->mmr_exitval; - - /* No broadcast? => next message */ - if (cmsg->msg_flags & MD_MSGF_NO_BCAST) { - /* if appropriate, unbusy the class */ - if (class != orig_class) { - (void) mutex_lock(&mdmn_busy_mutex[setno]); - mdmn_mark_class_unbusy(setno, class); - (void) mutex_unlock(&mdmn_busy_mutex[setno]); - } - continue; - } - - - /* fake sender, so we get notified when the results are avail */ - cmsg->msg_sender = set_master; - /* - * register to the master_table. It's needed by wakeup_master to - * wakeup the sleeping thread. - * Access is protected by the class lock: mdmn_mark_class_busy() - */ - mdmn_set_master_table_id(setno, class, &(cmsg->msg_msgid)); - - - - (void) rw_rdlock(&set_desc_rwlock[setno]); - /* Send the message to all other nodes */ - for (node = set_descriptor[setno]->sd_nodelist; node; - node = node->nd_next) { - md_mn_nodeid_t nid = node->nd_nodeid; - - /* We are master and have already processed the msg */ - if (node == set_descriptor[setno]->sd_mn_masternode) { - continue; - } - - /* If this node didn't join the disk set, ignore it */ - if ((node->nd_flags & MD_MN_NODE_OWN) == 0) { - continue; - } - - /* If a DIRECTED message, skip non-recipient nodes */ - if ((cmsg->msg_flags & MD_MSGF_DIRECTED) && - nid != cmsg->msg_recipient) { - continue; - } - - (void) mutex_lock(mx); - /* - * Register the node that is addressed, - * so we can detect unsolicited messages - */ - mdmn_set_master_table_addr(setno, class, nid); - slave_result = (md_mn_result_t *)NULL; - - /* - * Now send it. do_send_message() will return if - * a failure occurs or - * the results are available - */ - err = do_send_message(cmsg, node); - - /* in abort state, we error out immediately */ - if (md_commd_global_state & MD_CGS_ABORTED) { - break; - } - - if (err == MDMNE_ACK) { - slave_result = - mdmn_get_master_table_res(setno, class); - commd_debug(MD_MMV_PROC_M, - "proc_mas: got result for (%d,0x%llx-%d)\n", - MSGID_ELEMS(cmsg->msg_msgid)); - } else if (err == MDMNE_IGNORE_NODE) { - (void) mutex_unlock(mx); - continue; /* send to next node */ - } - (void) mutex_unlock(mx); - - - /* - * If the result is NULL, or err doesn't show success, - * something went wrong with this RPC call. - */ - if ((slave_result == NULL) || (err != MDMNE_ACK)) { - /* - * If PANIC_WHEN_INCONSISTENT set, - * panic if the master succeeded while - * this node failed - */ - if ((cmsg->msg_flags & - MD_MSGF_PANIC_WHEN_INCONSISTENT) && - (master_err == MDMNE_ACK)) - panic_system(nid, cmsg->msg_type, - master_err, master_exitval, - slave_result); - - result->mmr_failing_node = nid; - /* are we supposed to stop in case of error? */ - if (cmsg->msg_flags & MD_MSGF_STOP_ON_ERROR) { - result->mmr_exitval = MDMNE_RPC_FAIL; - commd_debug(MD_MMV_SYSLOG, "proc_mas: " - "result (%d,0x%llx-%d) is NULL\n", - MSGID_ELEMS(cmsg->msg_msgid)); - FLUSH_DEBUGFILE(); - break_msg_loop = 1; - break; /* out of node loop first */ - } else { - /* send msg to the next node */ - continue; - } - - } - - /* - * Message processed on remote node. - * If PANIC_WHEN_INCONSISTENT set, panic if the - * result is different on this node from the result - * on the master - */ - if ((cmsg->msg_flags & - MD_MSGF_PANIC_WHEN_INCONSISTENT) && - ((master_err != MDMNE_ACK) || - (slave_result->mmr_exitval != master_exitval))) - panic_system(nid, cmsg->msg_type, master_err, - master_exitval, slave_result); - - /* - * At this point we know we have a message that was - * processed on the remote node. - * We now check if the exitval is non zero. - * In that case we discard the previous result and - * rather use the current. - * This means: If a message fails on no node, - * the result from the master will be returned. - * There's currently no such thing as merge of results - * If additionally STOP_ON_ERROR is set, we bail out - */ - if (slave_result->mmr_exitval != 0) { - /* throw away the previously allocated result */ - free_result(result); - - /* copy_result() allocates new memory */ - result = copy_result(slave_result); - free_result(slave_result); - - dump_result(MD_MMV_PROC_M, "proc_mas", result); - - result->mmr_failing_node = nid; - if (cmsg->msg_flags & MD_MSGF_STOP_ON_ERROR) { - break_msg_loop = 1; - break; /* out of node loop */ - } - continue; /* try next node */ - - } else { - /* - * MNIssue: may want to merge the results - * from all slaves. Currently only report - * the results from the master. - */ - free_result(slave_result); - } - - } /* End of loop over the nodes */ - (void) rw_unlock(&set_desc_rwlock[setno]); - - - /* release the current class again */ - if (class != orig_class) { - (void) mutex_lock(&mdmn_busy_mutex[setno]); - mdmn_mark_class_unbusy(setno, class); - (void) mutex_unlock(&mdmn_busy_mutex[setno]); - } - - /* are we supposed to quit entirely ? */ - if (break_msg_loop || - (md_commd_global_state & MD_CGS_ABORTED)) { - break; /* out of msg loop */ - } - - } /* End of loop over the messages */ - /* - * If we are here, there's two possibilities: - * - we processed all messages on all nodes without an error. - * In this case we return the result from the master. - * (to be implemented: return the merged result) - * - we encountered an error in which case result has been - * set accordingly already. - */ - - if (md_commd_global_state & MD_CGS_ABORTED) { - result->mmr_comm_state = MDMNE_ABORT; - } - - /* - * This message has been processed completely. - * Remove it from the changelog. - * Do this for replay messages too. - * Note that the message is unlogged before waking up the - * initiator. This is done for two reasons. - * 1. Remove a race condition that occurs when back to back - * messages are sent for the same class, the registeration is - * is lost. - * 2. If the initiator died but the action was completed on all the - * the nodes, we want that to be marked "done" quickly. - */ - - if ((msg->msg_flags & MD_MSGF_NO_LOG) == 0) { - commd_debug(MD_MMV_PROC_M, - "proc_mas: calling unlog_msg for (%d,0x%llx-%d) type %d\n", - MSGID_ELEMS(msg->msg_msgid), msgtype); - (void) mdmn_unlog_msg(msg); - commd_debug(MD_MMV_PROC_M, - "proc_mas: done unlog_msg for (%d,0x%llx-%d) type %d\n", - MSGID_ELEMS(msg->msg_msgid), msgtype); - } - - /* - * In case of submessages, we increased the submessage ID in the - * result structure. We restore the message ID to the value that - * the initiator is waiting for. - */ - result->mmr_msgid.mid_smid = 0; - result->mmr_msgtype = orig_type; - result->mmr_sender = set_master; - - /* if we have an inited client, send result */ - ret = (int *)NULL; - - (void) rw_rdlock(&client_rwlock[setno]); - if (check_client(setno, sender)) { - commd_debug(MD_MMV_SYSLOG, - "proc_mas: unable to create client for initiator\n"); - } else { - ret = mdmn_wakeup_initiator_2(result, client[setno][sender], - sender); - } - (void) rw_unlock(&client_rwlock[setno]); - - if (ret == (int *)NULL) { - commd_debug(MD_MMV_PROC_M, - "proc_mas: couldn't wakeup initiator\n"); - } else { - if (*ret != MDMNE_ACK) { - commd_debug(MD_MMV_PROC_M, - "proc_mas: wakeup_initiator returned %d\n", - *ret); - } - free(ret); - } - - (void) rw_unlock(&set_desc_rwlock[setno]); - /* Free all submessages, if there were any */ - if (nmsgs > 1) { - for (curmsg = 0; curmsg < nmsgs; curmsg++) { - free_msg(msglist[curmsg]); - } - } - /* Free the result */ - free_result(result); - - (void) mutex_lock(&mdmn_busy_mutex[setno]); - mdmn_mark_class_unbusy(setno, orig_class); - (void) mutex_unlock(&mdmn_busy_mutex[setno]); - - - /* - * We use this ioctl just to get the time in the same format as used in - * the messageID. If it fails, all we get is a bad runtime output. - */ - (void) metaioctl(MD_IOCGUNIQMSGID, &dummyid, &mde, NULL); - secdiff = (dummyid.mid_time - msg->msg_msgid.mid_time) >> 32; - usecdiff = (dummyid.mid_time - msg->msg_msgid.mid_time) & 0xfffff; - - /* catching possible overflow */ - if (usecdiff >= 1000000) { - usecdiff -= 1000000; - secdiff++; - } - - - commd_debug(MD_MMV_PROC_M, "proc_mas: done (%d, 0x%llx-%d) type=%02d " - "%5d.%06d secs runtime\n", - MSGID_ELEMS(msg->msg_msgid), orig_type, secdiff, usecdiff); - - /* Free the original message */ - free_msg(msg); -} - -void -mdmn_slave_process_msg(md_mn_msg_t *msg) -{ - int *ret = NULL; - int completed; - int retries; - int successfully_returned; - set_t setno; - md_mn_result_t *result; - md_mn_nodeid_t sender; - md_mn_nodeid_t whoami; - md_mn_msgtype_t msgtype; - md_mn_msgclass_t class; - - void (*handler)(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *res); - - setno = msg->msg_setno; - sender = msg->msg_sender; /* this is always the master of the set */ - msgtype = msg->msg_type; - - (void) rw_rdlock(&set_desc_rwlock[setno]); - whoami = set_descriptor[setno]->sd_mn_mynode->nd_nodeid; - (void) rw_unlock(&set_desc_rwlock[setno]); - - result = Zalloc(sizeof (md_mn_result_t)); - result->mmr_flags = msg->msg_flags; - result->mmr_setno = setno; - result->mmr_msgtype = msgtype; - result->mmr_sender = whoami; - result->mmr_comm_state = MDMNE_ACK; /* Ok state */ - MSGID_COPY(&(msg->msg_msgid), &(result->mmr_msgid)); - class = mdmn_get_message_class(msgtype); - - commd_debug(MD_MMV_PROC_S, - "proc_sla: received (%d, 0x%llx-%d) set=%d, class=%d, type=%d\n", - MSGID_ELEMS(msg->msg_msgid), setno, class, msgtype); - - handler = mdmn_get_handler(msgtype); - - if (handler == NULL) { - result->mmr_exitval = 0; - /* let the sender decide if this is an error or not */ - result->mmr_comm_state = MDMNE_NO_HANDLER; - commd_debug(MD_MMV_PROC_S, - "proc_sla: No handler for (%d, 0x%llx-%d)\n", - MSGID_ELEMS(msg->msg_msgid)); - } else { - - /* Did we already process this message ? */ - (void) mutex_lock(&mct_mutex[setno][class]); - completed = mdmn_check_completion(msg, result); - - if (completed == MDMN_MCT_NOT_DONE) { - /* message not yet processed locally */ - commd_debug(MD_MMV_PROC_S, - "proc_sla: calling handler for (%d, 0x%llx-%d)\n", - MSGID_ELEMS(msg->msg_msgid)); - - /* - * Mark the message as being currently processed, - * so we won't start a second handler for it - */ - (void) mdmn_mark_completion(msg, NULL, - MDMN_MCT_IN_PROGRESS); - - (void) mutex_unlock(&mct_mutex[setno][class]); - (*handler)(msg, MD_MSGF_ON_SLAVE, result); - - commd_debug(MD_MMV_PROC_S, - "proc_sla: finished handler for (%d, 0x%llx-%d)\n", - MSGID_ELEMS(msg->msg_msgid)); - - (void) mutex_lock(&mct_mutex[setno][class]); - /* Mark the message as fully done, store the result */ - (void) mdmn_mark_completion(msg, result, MDMN_MCT_DONE); - - } else if (completed == MDMN_MCT_DONE) { - /* message processed previously, got result from MCT */ - commd_debug(MD_MMV_PROC_S, - "proc_sla: result for (%d, 0x%llx-%d) from MCT\n", - MSGID_ELEMS(msg->msg_msgid)); - } else if (completed == MDMN_MCT_IN_PROGRESS) { - /* - * If the message is curruntly being processed, - * we can return here, without sending a result back. - * This will be done by the initial message handling - * thread - */ - (void) mutex_unlock(&mct_mutex[setno][class]); - commd_debug(MD_MMV_PROC_M, "proc_sla: " - "(%d, 0x%llx-%d) is currently being processed\n", - MSGID_ELEMS(msg->msg_msgid), msgtype); - - free_msg(msg); - free_result(result); - return; - } else { - /* MCT error occurred (should never happen) */ - result->mmr_comm_state = MDMNE_LOG_FAIL; - commd_debug(MD_MMV_PROC_S, - "proc_sla: MCT error for (%d, 0x%llx-%d)\n", - MSGID_ELEMS(msg->msg_msgid)); - } - (void) mutex_unlock(&mct_mutex[setno][class]); - } - - /* - * At this point we have a result (even in an error case) - * that we return to the master. - */ - (void) rw_rdlock(&set_desc_rwlock[setno]); - retries = 2; /* we will try two times to send the results */ - successfully_returned = 0; - - while (!successfully_returned && (retries != 0)) { - ret = (int *)NULL; - (void) rw_rdlock(&client_rwlock[setno]); - if (check_client(setno, sender)) { - /* - * If we cannot setup the rpc connection to the master, - * we can't do anything besides logging this fact. - */ - commd_debug(MD_MMV_SYSLOG, - "proc_mas: unable to create client for master\n"); - (void) rw_unlock(&client_rwlock[setno]); - break; - } else { - ret = mdmn_wakeup_master_2(result, - client[setno][sender], sender); - /* - * if mdmn_wakeup_master_2 returns NULL, it can be that - * the master (or the commd on the master) had died. - * In that case, we destroy the client to the master - * and retry. - * If mdmn_wakeup_master_2 doesn't return MDMNE_ACK, - * the commd on the master is alive but - * something else is wrong, - * in that case a retry doesn't make sense => break out - */ - if (ret == (int *)NULL) { - commd_debug(MD_MMV_PROC_S, - "proc_sla: wakeup_master returned NULL\n"); - /* release reader lock, grab writer lock */ - (void) rw_unlock(&client_rwlock[setno]); - (void) rw_wrlock(&client_rwlock[setno]); - mdmn_clnt_destroy(client[setno][sender]); - if (client[setno][sender] != (CLIENT *)NULL) { - client[setno][sender] = (CLIENT *)NULL; - } - (void) rw_unlock(&client_rwlock[setno]); - retries--; - commd_debug(MD_MMV_PROC_S, - "retries = %d\n", retries); - continue; - } - if (*ret != MDMNE_ACK) { - commd_debug(MD_MMV_PROC_S, "proc_sla: " - "wakeup_master returned %d\n", *ret); - (void) rw_unlock(&client_rwlock[setno]); - break; - } else { /* Good case */ - successfully_returned = 1; - (void) rw_unlock(&client_rwlock[setno]); - } - } - } - - (void) rw_unlock(&set_desc_rwlock[setno]); - commd_debug(MD_MMV_PROC_S, "proc_sla: done (%d, 0x%llx-%d)\n", - MSGID_ELEMS(msg->msg_msgid)); - - if (ret != (int *)NULL) - free(ret); - free_msg(msg); - free_result(result); -} - - -/* - * mdmn_send_svc_2: - * --------------- - * Check that the issuing node is a legitimate one (i.e. is licensed to send - * messages to us), that the RPC request can be staged. - * - * Returns: - * 0 => no RPC request is in-flight, no deferred svc_sendreply() - * 1 => queued RPC request in-flight. Completion will be made (later) - * by a wakeup_initiator_2() [hopefully] - */ -int -mdmn_send_svc_2(md_mn_msg_t *omsg, struct svc_req *rqstp) -{ - int err; - set_t setno; - SVCXPRT *transp = rqstp->rq_xprt; - md_mn_msg_t *msg; - md_mn_result_t *resultp; - md_mn_msgclass_t class; - md_mn_msg_and_transp_t *matp; - - msg = copy_msg(omsg, NULL); - xdr_free(xdr_md_mn_msg_t, (caddr_t)omsg); - - setno = msg->msg_setno; - class = mdmn_get_message_class(msg->msg_type); - - /* If we are in the abort state, we error out immediately */ - if (md_commd_global_state & MD_CGS_ABORTED) { - resultp = Zalloc(sizeof (md_mn_result_t)); - resultp->mmr_comm_state = MDMNE_ABORT; - mdmn_svc_sendreply(transp, xdr_md_mn_result_t, (char *)resultp); - free_result(resultp); - svc_freeargs(transp, xdr_md_mn_msg_t, (caddr_t)msg); - return (0); - } - - /* check if the global initialization is done */ - if ((md_commd_global_state & MD_CGS_INITED) == 0) { - global_init(); - } - - commd_debug(MD_MMV_SEND, - "send: received (%d, 0x%llx-%d), set=%d, class=%d, type=%d\n", - MSGID_ELEMS(msg->msg_msgid), setno, class, msg->msg_type); - - /* Check for verbosity related message */ - if (msg->msg_type == MD_MN_MSG_VERBOSITY) { - md_mn_verbose_t *d; - - d = (md_mn_verbose_t *)((void *)(msg->msg_event_data)); - md_commd_global_verb = d->mmv_what; - /* everytime the bitmask is set, we reset the timer */ - __savetime = gethrtime(); - /* - * If local-only-flag is set, we are done here, - * otherwise we pass that message on to the master. - */ - if (msg->msg_flags & MD_MSGF_LOCAL_ONLY) { - resultp = Zalloc(sizeof (md_mn_result_t)); - resultp->mmr_comm_state = MDMNE_ACK; - mdmn_svc_sendreply(transp, xdr_md_mn_result_t, - (char *)resultp); - free_result(resultp); - svc_freeargs(transp, xdr_md_mn_msg_t, (caddr_t)msg); - return (0); - } - } - - /* - * Are we entering the abort state? - * Here we don't even need to check for MD_MSGF_LOCAL_ONLY, because - * this message cannot be distributed anyway. - * So, it's safe to return immediately. - */ - if (msg->msg_type == MD_MN_MSG_ABORT) { - md_commd_global_state |= MD_CGS_ABORTED; - resultp = Zalloc(sizeof (md_mn_result_t)); - resultp->mmr_comm_state = MDMNE_ACK; - mdmn_svc_sendreply(transp, xdr_md_mn_result_t, (char *)resultp); - free_result(resultp); - svc_freeargs(transp, xdr_md_mn_msg_t, (caddr_t)msg); - return (0); - } - - - /* - * Is this message type blocked? - * If so we return MDMNE_CLASS_LOCKED, immediately - */ - if (msgtype_lock_state[msg->msg_type] == MMTL_LOCK) { - resultp = Zalloc(sizeof (md_mn_result_t)); - resultp->mmr_comm_state = MDMNE_CLASS_LOCKED; - mdmn_svc_sendreply(transp, xdr_md_mn_result_t, (char *)resultp); - free_result(resultp); - svc_freeargs(transp, xdr_md_mn_msg_t, (caddr_t)msg); - commd_debug(MD_MMV_SEND, - "send: type locked (%d, 0x%llx-%d), set=%d, class=%d, " - "type=%d\n", MSGID_ELEMS(msg->msg_msgid), setno, class, - msg->msg_type); - return (0); - } - - - if (md_mn_set_inited[setno] != MDMN_SET_READY) { - /* Can only use the appropriate mutexes if they are inited */ - if (md_mn_set_inited[setno] & MDMN_SET_MUTEXES) { - (void) rw_wrlock(&set_desc_rwlock[setno]); - (void) rw_wrlock(&client_rwlock[setno]); - err = mdmn_init_set(setno, MDMN_SET_READY); - (void) rw_unlock(&client_rwlock[setno]); - (void) rw_unlock(&set_desc_rwlock[setno]); - } else { - err = mdmn_init_set(setno, MDMN_SET_READY); - } - - if (err) { - /* couldn't initialize connections, cannot proceed */ - resultp = Zalloc(sizeof (md_mn_result_t)); - resultp->mmr_comm_state = err; - mdmn_svc_sendreply(transp, xdr_md_mn_result_t, - (char *)resultp); - svc_freeargs(transp, xdr_md_mn_msg_t, (caddr_t)msg); - free_result(resultp); - commd_debug(MD_MMV_SEND, - "send: init err = %d\n", err); - return (0); - } - } - - (void) mutex_lock(&mdmn_busy_mutex[setno]); - if ((mdmn_is_class_suspended(setno, class) == TRUE) && - ((msg->msg_flags & MD_MSGF_OVERRIDE_SUSPEND) == 0)) { - (void) mutex_unlock(&mdmn_busy_mutex[setno]); - resultp = Zalloc(sizeof (md_mn_result_t)); - resultp->mmr_comm_state = MDMNE_SUSPENDED; - mdmn_svc_sendreply(transp, xdr_md_mn_result_t, (char *)resultp); - svc_freeargs(transp, xdr_md_mn_msg_t, (caddr_t)msg); - free_result(resultp); - commd_debug(MD_MMV_SEND, - "send: class suspended (%d, 0x%llx-%d), set=%d, " - "class=%d, type=%d\n", MSGID_ELEMS(msg->msg_msgid), - setno, class, msg->msg_type); - return (0); - } - (void) mutex_unlock(&mdmn_busy_mutex[setno]); - - /* is this rpc request coming from the local node? */ - if (check_license(rqstp, 0) == FALSE) { - svc_freeargs(transp, xdr_md_mn_msg_t, (caddr_t)msg); - commd_debug(MD_MMV_SEND, - "send: check licence fail(%d, 0x%llx-%d), set=%d, " - "class=%d, type=%d\n", MSGID_ELEMS(msg->msg_msgid), - setno, class, msg->msg_type); - return (0); - } - - - /* - * We allocate a structure that can take two pointers in order to pass - * both the message and the transp into thread_create. - * The free for this alloc is done in mdmn_send_to_work() - */ - matp = Malloc(sizeof (md_mn_msg_and_transp_t)); - matp->mat_msg = msg; - matp->mat_transp = transp; - - /* - * create a thread here that calls work on the master. - * If we are already on the master, this would block if running - * in the same context. (our service is single threaded)( - * Make it a detached thread because it will not communicate with - * anybody thru thr_* mechanisms - */ - (void) thr_create(NULL, 0, mdmn_send_to_work, (void *) matp, - THR_DETACHED, NULL); - - commd_debug(MD_MMV_SEND, "send: done (%d, 0x%llx-%d)\n", - MSGID_ELEMS(msg->msg_msgid)); - /* - * We return here without sending results. This will be done by - * mdmn_wakeup_initiator_svc_2() as soon as the results are available. - * Until then the calling send_message will be blocked, while we - * are able to take calls. - */ - - return (1); -} - -/* ARGSUSED */ -int * -mdmn_work_svc_2(md_mn_msg_t *omsg, struct svc_req *rqstp) -{ - int err; - set_t setno; - thread_t tid; - int *retval; - md_mn_msg_t *msg; - md_mn_msgclass_t class; - - retval = Malloc(sizeof (int)); - - /* If we are in the abort state, we error out immediately */ - if (md_commd_global_state & MD_CGS_ABORTED) { - xdr_free(xdr_md_mn_msg_t, (caddr_t)omsg); - *retval = MDMNE_ABORT; - return (retval); - } - - msg = copy_msg(omsg, NULL); - xdr_free(xdr_md_mn_msg_t, (caddr_t)omsg); - - /* - * Is this message type blocked? - * If so we return MDMNE_CLASS_LOCKED, immediately. - * This check is performed on master and slave. - */ - if (msgtype_lock_state[msg->msg_type] == MMTL_LOCK) { - *retval = MDMNE_CLASS_LOCKED; - return (retval); - } - - /* check if the global initialization is done */ - if ((md_commd_global_state & MD_CGS_INITED) == 0) { - global_init(); - } - - class = mdmn_get_message_class(msg->msg_type); - setno = msg->msg_setno; - - if (md_mn_set_inited[setno] != MDMN_SET_READY) { - /* Can only use the appropriate mutexes if they are inited */ - if (md_mn_set_inited[setno] & MDMN_SET_MUTEXES) { - (void) rw_wrlock(&set_desc_rwlock[setno]); - (void) rw_wrlock(&client_rwlock[setno]); - err = mdmn_init_set(setno, MDMN_SET_READY); - (void) rw_unlock(&client_rwlock[setno]); - (void) rw_unlock(&set_desc_rwlock[setno]); - } else { - err = mdmn_init_set(setno, MDMN_SET_READY); - } - - if (err) { - *retval = MDMNE_CANNOT_CONNECT; - free_msg(msg); - return (retval); - } - } - - /* is this rpc request coming from a licensed node? */ - if (check_license(rqstp, msg->msg_sender) == FALSE) { - free_msg(msg); - *retval = MDMNE_RPC_FAIL; - return (retval); - } - - commd_debug(MD_MMV_WORK, - "work: received (%d, 0x%llx-%d), set=%d, class=%d, type=%d, " - "flags=0x%x\n", - MSGID_ELEMS(msg->msg_msgid), setno, class, msg->msg_type, - msg->msg_flags); - - /* Check for various CLASS0 message types */ - if (msg->msg_type == MD_MN_MSG_VERBOSITY) { - md_mn_verbose_t *d; - - d = (md_mn_verbose_t *)((void *)(msg->msg_event_data)); - /* for now we ignore set / class in md_mn_verbose_t */ - md_commd_global_verb = d->mmv_what; - /* everytime the bitmask is set, we reset the timer */ - __savetime = gethrtime(); - } - - (void) mutex_lock(&mdmn_busy_mutex[setno]); - - /* check if class is locked via a call to mdmn_comm_lock_svc_2 */ - if (mdmn_is_class_locked(setno, class) == TRUE) { - (void) mutex_unlock(&mdmn_busy_mutex[setno]); - *retval = MDMNE_CLASS_LOCKED; - free_msg(msg); - return (retval); - } - (void) mutex_unlock(&mdmn_busy_mutex[setno]); - - /* Check if the class is busy right now. Do it only on the master */ - (void) rw_rdlock(&set_desc_rwlock[setno]); - if (set_descriptor[setno]->sd_mn_am_i_master) { - (void) rw_unlock(&set_desc_rwlock[setno]); - /* - * If the class is currently suspended, don't accept new - * messages, unless they are flagged with an override bit. - */ - (void) mutex_lock(&mdmn_busy_mutex[setno]); - if ((mdmn_is_class_suspended(setno, class) == TRUE) && - ((msg->msg_flags & MD_MSGF_OVERRIDE_SUSPEND) == 0)) { - (void) mutex_unlock(&mdmn_busy_mutex[setno]); - *retval = MDMNE_SUSPENDED; - commd_debug(MD_MMV_SEND, - "send: set %d is suspended\n", setno); - free_msg(msg); - return (retval); - } - if (mdmn_mark_class_busy(setno, class) == FALSE) { - (void) mutex_unlock(&mdmn_busy_mutex[setno]); - *retval = MDMNE_CLASS_BUSY; - free_msg(msg); - return (retval); - } - (void) mutex_unlock(&mdmn_busy_mutex[setno]); - /* - * Because the real processing of the message takes time we - * create a thread for it. So the master thread can continue - * to run and accept further messages. - */ - *retval = thr_create(NULL, 0, - (void *(*)(void *))mdmn_master_process_msg, (void *)msg, - THR_DETACHED|THR_SUSPENDED, &tid); - } else { - (void) rw_unlock(&set_desc_rwlock[setno]); - *retval = thr_create(NULL, 0, - (void *(*)(void *)) mdmn_slave_process_msg, (void *)msg, - THR_DETACHED|THR_SUSPENDED, &tid); - } - - if (*retval != 0) { - *retval = MDMNE_THR_CREATE_FAIL; - free_msg(msg); - return (retval); - } - - /* Now run the new thread */ - (void) thr_continue(tid); - - commd_debug(MD_MMV_WORK, - "work: done (%d, 0x%llx-%d), set=%d, class=%d, type=%d\n", - MSGID_ELEMS(msg->msg_msgid), setno, class, msg->msg_type); - - *retval = MDMNE_ACK; /* this means success */ - return (retval); -} - -/* ARGSUSED */ -int * -mdmn_wakeup_initiator_svc_2(md_mn_result_t *res, struct svc_req *rqstp) -{ - - int *retval; - int err; - set_t setno; - mutex_t *mx; /* protection of initiator_table */ - SVCXPRT *transp = NULL; - md_mn_msgid_t initiator_table_id; - md_mn_msgclass_t class; - - retval = Malloc(sizeof (int)); - - /* check if the global initialization is done */ - if ((md_commd_global_state & MD_CGS_INITED) == 0) { - global_init(); - } - - setno = res->mmr_setno; - - if (md_mn_set_inited[setno] != MDMN_SET_READY) { - /* set not ready means we just crashed are restarted now */ - /* Can only use the appropriate mutexes if they are inited */ - if (md_mn_set_inited[setno] & MDMN_SET_MUTEXES) { - (void) rw_wrlock(&set_desc_rwlock[setno]); - (void) rw_wrlock(&client_rwlock[setno]); - err = mdmn_init_set(setno, MDMN_SET_READY); - (void) rw_unlock(&client_rwlock[setno]); - (void) rw_unlock(&set_desc_rwlock[setno]); - } else { - err = mdmn_init_set(setno, MDMN_SET_READY); - } - - if (err) { - *retval = MDMNE_CANNOT_CONNECT; - xdr_free(xdr_md_mn_result_t, (caddr_t)res); - return (retval); - } - } - - /* is this rpc request coming from a licensed node? */ - if (check_license(rqstp, res->mmr_sender) == FALSE) { - xdr_free(xdr_md_mn_result_t, (caddr_t)res); - *retval = MDMNE_RPC_FAIL; - return (retval); - } - - - class = mdmn_get_message_class(res->mmr_msgtype); - mx = mdmn_get_initiator_table_mx(setno, class); - - commd_debug(MD_MMV_WAKE_I, - "wake_ini: received (%d, 0x%llx-%d) set=%d, class=%d, type=%d\n", - MSGID_ELEMS(res->mmr_msgid), setno, class, res->mmr_msgtype); - - (void) mutex_lock(mx); - - /* - * Search the initiator wakeup table. - * If we find an entry here (which should always be true) - * we are on the initiating node and we wakeup the original - * local rpc call. - */ - mdmn_get_initiator_table_id(setno, class, &initiator_table_id); - - if (MSGID_CMP(&(initiator_table_id), &(res->mmr_msgid))) { - transp = mdmn_get_initiator_table_transp(setno, class); - mdmn_svc_sendreply(transp, xdr_md_mn_result_t, (char *)res); - svc_done(transp); - mdmn_unregister_initiator_table(setno, class); - *retval = MDMNE_ACK; - - commd_debug(MD_MMV_WAKE_I, - "wake_ini: replied (%d, 0x%llx-%d)\n", - MSGID_ELEMS(res->mmr_msgid)); - } else { - commd_debug(MD_MMV_WAKE_I, - "wakeup initiator: unsolicited message (%d, 0x%llx-%d)\n", - MSGID_ELEMS(res->mmr_msgid)); - *retval = MDMNE_NO_WAKEUP_ENTRY; - } - (void) mutex_unlock(mx); - /* less work for check_timeouts */ - (void) mutex_lock(&check_timeout_mutex); - if (messages_on_their_way == 0) { - commd_debug(MD_MMV_WAKE_I, - "Oops, messages_on_their_way < 0 (%d, 0x%llx-%d)\n", - MSGID_ELEMS(res->mmr_msgid)); - } else { - messages_on_their_way--; - } - (void) mutex_unlock(&check_timeout_mutex); - xdr_free(xdr_md_mn_result_t, (caddr_t)res); - - return (retval); -} - - -/* - * res must be free'd by the thread we wake up - */ -/* ARGSUSED */ -int * -mdmn_wakeup_master_svc_2(md_mn_result_t *ores, struct svc_req *rqstp) -{ - - int *retval; - int err; - set_t setno; - cond_t *cv; - mutex_t *mx; - md_mn_msgid_t master_table_id; - md_mn_nodeid_t sender; - md_mn_result_t *res; - md_mn_msgclass_t class; - - retval = Malloc(sizeof (int)); - - /* check if the global initialization is done */ - if ((md_commd_global_state & MD_CGS_INITED) == 0) { - global_init(); - } - - /* Need to copy the results here, as they are static for RPC */ - res = copy_result(ores); - xdr_free(xdr_md_mn_result_t, (caddr_t)ores); - - class = mdmn_get_message_class(res->mmr_msgtype); - setno = res->mmr_setno; - - if (md_mn_set_inited[setno] != MDMN_SET_READY) { - /* set not ready means we just crashed are restarted now */ - /* Can only use the appropriate mutexes if they are inited */ - if (md_mn_set_inited[setno] & MDMN_SET_MUTEXES) { - (void) rw_wrlock(&set_desc_rwlock[setno]); - (void) rw_wrlock(&client_rwlock[setno]); - err = mdmn_init_set(setno, MDMN_SET_READY); - (void) rw_unlock(&client_rwlock[setno]); - (void) rw_unlock(&set_desc_rwlock[setno]); - } else { - err = mdmn_init_set(setno, MDMN_SET_READY); - } - - if (err) { - *retval = MDMNE_CANNOT_CONNECT; - xdr_free(xdr_md_mn_result_t, (caddr_t)res); - return (retval); - } - } - - /* is this rpc request coming from a licensed node? */ - if (check_license(rqstp, res->mmr_sender) == FALSE) { - *retval = MDMNE_RPC_FAIL; - xdr_free(xdr_md_mn_result_t, (caddr_t)res); - return (retval); - } - - - commd_debug(MD_MMV_WAKE_M, - "wake_mas: received (%d, 0x%llx-%d) set=%d, class=%d, type=%d " - "from %d\n", - MSGID_ELEMS(res->mmr_msgid), setno, class, res->mmr_msgtype, - res->mmr_sender); - /* - * The mutex and cv are needed for waking up the thread - * sleeping in mdmn_master_process_msg() - */ - mx = mdmn_get_master_table_mx(setno, class); - cv = mdmn_get_master_table_cv(setno, class); - - /* - * lookup the master wakeup table - * If we find our message, we are on the master and - * called by a slave that finished processing a message. - * We store the results in the appropriate slot and - * wakeup the thread (mdmn_master_process_msg()) waiting for them. - */ - (void) mutex_lock(mx); - mdmn_get_master_table_id(setno, class, &master_table_id); - sender = mdmn_get_master_table_addr(setno, class); - - if (MSGID_CMP(&(master_table_id), &(res->mmr_msgid))) { - if (sender == res->mmr_sender) { - mdmn_set_master_table_res(setno, class, res); - (void) cond_signal(cv); - *retval = MDMNE_ACK; - } else { - /* id is correct but wrong sender (I smell a timeout) */ - commd_debug(MD_MMV_WAKE_M, - "wakeup master got unsolicited message: " - "(%d, 0x%llx-%d) from %d\n", - MSGID_ELEMS(res->mmr_msgid), res->mmr_sender); - free_result(res); - *retval = MDMNE_TIMEOUT; - } - } else { - /* id is wrong, smells like a very late timeout */ - commd_debug(MD_MMV_WAKE_M, - "wakeup master got unsolicited message: " - "(%d, 0x%llx-%d) from %d, expected (%d, 0x%llx-%d)\n", - MSGID_ELEMS(res->mmr_msgid), res->mmr_sender, - MSGID_ELEMS(master_table_id)); - free_result(res); - *retval = MDMNE_NO_WAKEUP_ENTRY; - } - - (void) mutex_unlock(mx); - - return (retval); -} - -/* - * Lock a set/class combination. - * This is mainly done for debug purpose. - * This set/class combination immediately is blocked, - * even in the middle of sending messages to multiple slaves. - * This remains until the user issues a mdmn_comm_unlock_svc_2 for the same - * set/class combination. - * - * Special messages of class MD_MSG_CLASS0 can never be locked. - * e.g. MD_MN_MSG_VERBOSITY, MD_MN_MSG_ABORT - * - * That means, if MD_MSG_CLASS0 is specified, we lock all classes from - * >= MD_MSG_CLASS1 to < MD_MN_NCLASSES - * - * set must be between 1 and MD_MAXSETS - * class can be: - * MD_MSG_CLASS0 which means all other classes in this case - * or one specific class (< MD_MN_NCLASSES) - * - * Returns: - * MDMNE_ACK on sucess (locking a locked class is Ok) - * MDMNE_EINVAL if a parameter is out of range - */ - -/* ARGSUSED */ -int * -mdmn_comm_lock_svc_2(md_mn_set_and_class_t *msc, struct svc_req *rqstp) -{ - int *retval; - set_t setno = msc->msc_set; - md_mn_msgclass_t class = msc->msc_class; - - retval = Malloc(sizeof (int)); - - /* check if the global initialization is done */ - if ((md_commd_global_state & MD_CGS_INITED) == 0) { - global_init(); - } - - /* is this rpc request coming from the local node ? */ - if (check_license(rqstp, 0) == FALSE) { - xdr_free(xdr_md_mn_set_and_class_t, (caddr_t)msc); - *retval = MDMNE_RPC_FAIL; - return (retval); - } - - /* Perform some range checking */ - if ((setno == 0) || (setno >= MD_MAXSETS) || - (class < MD_MSG_CLASS0) || (class >= MD_MN_NCLASSES)) { - *retval = MDMNE_EINVAL; - return (retval); - } - - commd_debug(MD_MMV_MISC, "lock: set=%d, class=%d\n", setno, class); - (void) mutex_lock(&mdmn_busy_mutex[setno]); - if (class != MD_MSG_CLASS0) { - mdmn_mark_class_locked(setno, class); - } else { - /* MD_MSG_CLASS0 is used as a wild card for all classes */ - for (class = MD_MSG_CLASS1; class < MD_MN_NCLASSES; class++) { - mdmn_mark_class_locked(setno, class); - } - } - (void) mutex_unlock(&mdmn_busy_mutex[setno]); - - *retval = MDMNE_ACK; - return (retval); -} - -/* - * Unlock a set/class combination. - * set must be between 1 and MD_MAXSETS - * class can be: - * MD_MSG_CLASS0 which means all other classes in this case (like above) - * or one specific class (< MD_MN_NCLASSES) - * - * Returns: - * MDMNE_ACK on sucess (unlocking an unlocked class is Ok) - * MDMNE_EINVAL if a parameter is out of range - */ -/* ARGSUSED */ -int * -mdmn_comm_unlock_svc_2(md_mn_set_and_class_t *msc, struct svc_req *rqstp) -{ - int *retval; - set_t setno = msc->msc_set; - md_mn_msgclass_t class = msc->msc_class; - - retval = Malloc(sizeof (int)); - - /* check if the global initialization is done */ - if ((md_commd_global_state & MD_CGS_INITED) == 0) { - global_init(); - } - - /* is this rpc request coming from the local node ? */ - if (check_license(rqstp, 0) == FALSE) { - xdr_free(xdr_md_mn_set_and_class_t, (caddr_t)msc); - *retval = MDMNE_RPC_FAIL; - return (retval); - } - - /* Perform some range checking */ - if ((setno == 0) || (setno >= MD_MAXSETS) || - (class < MD_MSG_CLASS0) || (class >= MD_MN_NCLASSES)) { - *retval = MDMNE_EINVAL; - return (retval); - } - commd_debug(MD_MMV_MISC, "unlock: set=%d, class=%d\n", setno, class); - - (void) mutex_lock(&mdmn_busy_mutex[setno]); - if (class != MD_MSG_CLASS0) { - mdmn_mark_class_unlocked(setno, class); - } else { - /* MD_MSG_CLASS0 is used as a wild card for all classes */ - for (class = MD_MSG_CLASS1; class < MD_MN_NCLASSES; class++) { - mdmn_mark_class_unlocked(setno, class); - } - } - (void) mutex_unlock(&mdmn_busy_mutex[setno]); - - *retval = MDMNE_ACK; - return (retval); -} - -/* - * mdmn_comm_suspend_svc_2(setno, class) - * - * Drain all outstanding messages for a given set/class combination - * and don't allow new messages to be processed. - * - * Special messages of class MD_MSG_CLASS0 can never be locked. - * e.g. MD_MN_MSG_VERBOSITY - * - * 1 <= setno < MD_MAXSETS or setno == MD_COMM_ALL_SETS - * 1 <= class < MD_MN_NCLASSES or class == MD_COMM_ALL_CLASSES - * - * If class _is_not_ MD_COMM_ALL_CLASSES, then we simply mark this - * one class as being suspended. - * If messages for this class are currently on their way, - * MDMNE_SET_NOT_DRAINED is returned. Otherwise MDMNE_ACK is returned. - * - * If class _is_ MD_COMM_ALL_CLASSES we drain all classes of this set. - * Messages must be generated in ascending order. - * This means, a message cannot create submessages with the same or lower class. - * Draining messages must go from 1 to NCLASSES in order to ensure we don't - * generate a hanging situation here. - * We mark class 1 as being suspended. - * if the class is not busy, we proceed with class 2 - * and so on - * if a class *is* busy, we cannot continue here, but return - * MDMNE_SET_NOT_DRAINED. - * We expect the caller to hold on for some seconds and try again. - * When that message, that held the class busy is done in - * mdmn_master_process_msg(), mdmn_mark_class_unbusy() called. - * There it is checked if the class is about to drain. - * In that case it tries to drain all higher classes there. - * - * If setno is MD_COMM_ALL_SETS then we perform this on all possible sets. - * In that case we return MDMNE_SET_NOT_DRAINED if not all sets are - * completely drained. - * - * Returns: - * MDMNE_ACK on sucess (set is drained, no outstanding messages) - * MDMNE_SET_NOT_DRAINED if drain process is started, but there are - * still outstanding messages for this set(s) - * MDMNE_EINVAL if setno is out of range - * MDMNE_NOT_JOINED if the set is not yet initialized on this node - */ - -/* ARGSUSED */ -int * -mdmn_comm_suspend_svc_2(md_mn_set_and_class_t *msc, struct svc_req *rqstp) -{ - int *retval; - int failure = 0; - set_t startset, endset; - set_t setno = msc->msc_set; - md_mn_msgclass_t oclass = msc->msc_class; -#ifdef NOT_YET_NEEDED - uint_t flags = msc->msc_flags; -#endif /* NOT_YET_NEEDED */ - md_mn_msgclass_t class; - - retval = Malloc(sizeof (int)); - - /* check if the global initialization is done */ - if ((md_commd_global_state & MD_CGS_INITED) == 0) { - global_init(); - } - - /* is this rpc request coming from the local node ? */ - if (check_license(rqstp, 0) == FALSE) { - xdr_free(xdr_md_mn_set_and_class_t, (caddr_t)msc); - *retval = MDMNE_RPC_FAIL; - return (retval); - } - - commd_debug(MD_MMV_MISC, "suspend: called for set=%d class=%d\n", - setno, oclass); - - /* Perform some range checking */ - if (setno >= MD_MAXSETS) { - *retval = MDMNE_EINVAL; - commd_debug(MD_MMV_MISC, "suspend: returning MDMNE_EINVAL\n"); - return (retval); - } - - /* setno == MD_COMM_ALL_SETS means: we walk thru all possible sets. */ - if (setno == MD_COMM_ALL_SETS) { - startset = 1; - endset = MD_MAXSETS - 1; - } else { - startset = setno; - endset = setno; - } - - for (setno = startset; setno <= endset; setno++) { - /* Here we need the mutexes for the set to be setup */ - if (md_mn_set_inited[setno] != MDMN_SET_MUTEXES) { - (void) mdmn_init_set(setno, MDMN_SET_MUTEXES); - } - - (void) mutex_lock(&mdmn_busy_mutex[setno]); - /* shall we drain all classes of this set? */ - if (oclass == MD_COMM_ALL_CLASSES) { - for (class = 1; class < MD_MN_NCLASSES; class ++) { - commd_debug(MD_MMV_MISC, - "suspend: suspending set %d, class %d\n", - setno, class); - *retval = mdmn_mark_class_suspended(setno, - class, MDMN_SUSPEND_ALL); - if (*retval == MDMNE_SET_NOT_DRAINED) { - failure++; - } - } - } else { - /* only drain one specific class */ - commd_debug(MD_MMV_MISC, - "suspend: suspending set=%d class=%d\n", - setno, oclass); - *retval = mdmn_mark_class_suspended(setno, oclass, - MDMN_SUSPEND_1); - if (*retval == MDMNE_SET_NOT_DRAINED) { - failure++; - } - } - (void) mutex_unlock(&mdmn_busy_mutex[setno]); - } - /* If one or more sets are not entirely drained, failure is non-zero */ - if (failure != 0) { - *retval = MDMNE_SET_NOT_DRAINED; - commd_debug(MD_MMV_MISC, - "suspend: returning MDMNE_SET_NOT_DRAINED\n"); - } else { - *retval = MDMNE_ACK; - } - - return (retval); -} - -/* - * mdmn_comm_resume_svc_2(setno, class) - * - * Resume processing messages for a given set. - * This incorporates the repeal of a previous suspend operation. - * - * 1 <= setno < MD_MAXSETS or setno == MD_COMM_ALL_SETS - * 1 <= class < MD_MN_NCLASSES or class == MD_COMM_ALL_CLASSES - * - * If class _is_not_ MD_COMM_ALL_CLASSES, then we simply mark this - * one class as being resumed. - * - * If class _is_ MD_COMM_ALL_CLASSES we resume all classes of this set. - * - * If setno is MD_COMM_ALL_SETS then we perform this on all possible sets. - * - * If both setno is MD_COMM_ALL_SETS and class is MD_COMM_ALL_CLASSES we also - * reset any ABORT flag from the global state. - * - * Returns: - * MDMNE_ACK on sucess (resuming an unlocked set is Ok) - * MDMNE_EINVAL if setno is out of range - * MDMNE_NOT_JOINED if the set is not yet initialized on this node - */ -/* ARGSUSED */ -int * -mdmn_comm_resume_svc_2(md_mn_set_and_class_t *msc, struct svc_req *rqstp) -{ - int *retval; - set_t startset, endset; - set_t setno = msc->msc_set; - md_mn_msgclass_t oclass = msc->msc_class; - uint_t flags = msc->msc_flags; - md_mn_msgclass_t class; - - retval = Malloc(sizeof (int)); - - /* check if the global initialization is done */ - if ((md_commd_global_state & MD_CGS_INITED) == 0) { - global_init(); - } - - /* is this rpc request coming from the local node ? */ - if (check_license(rqstp, 0) == FALSE) { - xdr_free(xdr_md_mn_set_and_class_t, (caddr_t)msc); - *retval = MDMNE_RPC_FAIL; - return (retval); - } - - commd_debug(MD_MMV_MISC, "resume: called for set=%d class=%d\n", - setno, oclass); - - /* Perform some range checking */ - if (setno > MD_MAXSETS) { - *retval = MDMNE_EINVAL; - return (retval); - } - - if (setno == MD_COMM_ALL_SETS) { - startset = 1; - endset = MD_MAXSETS - 1; - if (oclass == MD_COMM_ALL_CLASSES) { - /* This is the point where we "unabort" the commd */ - commd_debug(MD_MMV_MISC, "resume: resetting ABORT\n"); - md_commd_global_state &= ~MD_CGS_ABORTED; - } - } else { - startset = setno; - endset = setno; - } - - for (setno = startset; setno <= endset; setno++) { - - /* Here we need the mutexes for the set to be setup */ - if ((md_mn_set_inited[setno] & MDMN_SET_MUTEXES) == 0) { - (void) mdmn_init_set(setno, MDMN_SET_MUTEXES); - } - - (void) mutex_lock(&mdmn_busy_mutex[setno]); - - if (oclass == MD_COMM_ALL_CLASSES) { - int end_class = 1; - /* - * When SUSPENDing all classes, we go - * from 1 to MD_MN_NCLASSES-1 - * The correct reverse action is RESUMing - * from MD_MN_NCLASSES-1 to 1 (or 2) - */ - - if (flags & MD_MSCF_DONT_RESUME_CLASS1) { - end_class = 2; - } - - /* - * Then mark all classes of this set as no longer - * suspended. This supersedes any previous suspend(1) - * calls and resumes the set entirely. - */ - for (class = MD_MN_NCLASSES - 1; class >= end_class; - class --) { - commd_debug(MD_MMV_MISC, - "resume: resuming set=%d class=%d\n", - setno, class); - mdmn_mark_class_resumed(setno, class, - (MDMN_SUSPEND_ALL | MDMN_SUSPEND_1)); - } - } else { - /* - * In this case only one class is marked as not - * suspended. If a suspend(all) is currently active for - * this set, this class will still be suspended. - * That state will be cleared by a suspend(all) - * (see above) - */ - commd_debug(MD_MMV_MISC, - "resume: resuming set=%d class=%d\n", - setno, oclass); - mdmn_mark_class_resumed(setno, oclass, MDMN_SUSPEND_1); - } - - (void) mutex_unlock(&mdmn_busy_mutex[setno]); - } - - *retval = MDMNE_ACK; - return (retval); -} -/* ARGSUSED */ -int * -mdmn_comm_reinit_set_svc_2(set_t *setnop, struct svc_req *rqstp) -{ - int *retval; - md_mnnode_desc *node; - set_t setno = *setnop; - - retval = Malloc(sizeof (int)); - - /* check if the global initialization is done */ - if ((md_commd_global_state & MD_CGS_INITED) == 0) { - global_init(); - } - - /* is this rpc request coming from the local node ? */ - if (check_license(rqstp, 0) == FALSE) { - xdr_free(xdr_set_t, (caddr_t)setnop); - *retval = MDMNE_RPC_FAIL; - return (retval); - } - - commd_debug(MD_MMV_MISC, "reinit: set=%d\n", setno); - - (void) rw_rdlock(&set_desc_rwlock[setno]); - /* - * We assume, that all messages have been suspended previously. - * - * As we are modifying lots of clients here we grab the client_rwlock - * in writer mode. This ensures, no new messages come in. - */ - (void) rw_wrlock(&client_rwlock[setno]); - /* This set is no longer initialized */ - - if ((set_descriptor[setno] != NULL) && - (md_mn_set_inited[setno] & MDMN_SET_NODES)) { - /* destroy all rpc clients from this set */ - for (node = set_descriptor[setno]->sd_nodelist; node; - node = node->nd_next) { - /* - * Since the CLIENT for ourself will be recreated - * shortly, and this node is guaranteed to be - * there after a reconfig, there's no reason to go - * through destroying it. It also avoids an issue - * with calling clnt_create() later from within the - * server thread, which can effectively deadlock - * itself due to RPC design limitations. - */ - if (node == set_descriptor[setno]->sd_mn_mynode) - continue; - mdmn_clnt_destroy(client[setno][node->nd_nodeid]); - if (client[setno][node->nd_nodeid] != (CLIENT *)NULL) { - client[setno][node->nd_nodeid] = (CLIENT *)NULL; - } - } - md_mn_set_inited[setno] &= ~MDMN_SET_NODES; - } - - commd_debug(MD_MMV_MISC, "reinit: done init_set(%d)\n", setno); - - (void) rw_unlock(&client_rwlock[setno]); - (void) rw_unlock(&set_desc_rwlock[setno]); - *retval = MDMNE_ACK; - return (retval); -} - -/* - * This is just an interface for testing purpose. - * Here we can disable single message types. - * If we block a message type, this is valid for all MN sets. - * If a message arrives later, and it's message type is blocked, it will - * be returned immediately with MDMNE_CLASS_LOCKED, which causes the sender to - * resend this message over and over again. - */ - -/* ARGSUSED */ -int * -mdmn_comm_msglock_svc_2(md_mn_type_and_lock_t *mmtl, struct svc_req *rqstp) -{ - int *retval; - md_mn_msgtype_t type = mmtl->mmtl_type; - uint_t lock = mmtl->mmtl_lock; - - retval = Malloc(sizeof (int)); - - /* check if the global initialization is done */ - if ((md_commd_global_state & MD_CGS_INITED) == 0) { - global_init(); - } - - /* is this rpc request coming from the local node ? */ - if (check_license(rqstp, 0) == FALSE) { - xdr_free(xdr_md_mn_type_and_lock_t, (caddr_t)mmtl); - *retval = MDMNE_RPC_FAIL; - return (retval); - } - - /* Perform some range checking */ - if ((type == 0) || (type >= MD_MN_NMESSAGES)) { - *retval = MDMNE_EINVAL; - return (retval); - } - - commd_debug(MD_MMV_MISC, "msglock: type=%d, lock=%d\n", type, lock); - msgtype_lock_state[type] = lock; - - *retval = MDMNE_ACK; - return (retval); -} diff --git a/usr/src/cmd/lvm/rpc.mdcommd/mdmn_commd_service.c b/usr/src/cmd/lvm/rpc.mdcommd/mdmn_commd_service.c deleted file mode 100644 index d1d6dbd52d31..000000000000 --- a/usr/src/cmd/lvm/rpc.mdcommd/mdmn_commd_service.c +++ /dev/null @@ -1,388 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include -#include -#include /* getenv, exit */ -#include -#include -#include -#include -#include -#include -#include /* rlimit */ -#include -#include - -#ifdef DEBUG -#define RPC_SVC_FG -#endif - -/* - * This means we shutdown rpc.mdcommd at some time in the window - * after 1201 seconds and before 2400 seconds of inactivity. - */ -#define _RPCSVC_CLOSEDOWN 2400 - -#ifdef RPC_SVC_FG -static int _rpcpmstart; /* Started by a port monitor ? */ -#endif /* RPC_SVC_FG */ -/* States a server can be in wrt request */ - -#define _IDLE 0 -#define _SERVED 1 - -static int _rpcsvcstate = _IDLE; /* Set when a request is serviced */ -static int _rpcsvccount = 0; /* Number of requests being serviced */ - -extern int mdmn_send_svc_2(); -extern int *mdmn_work_svc_2(); -extern int *mdmn_wakeup_initiator_svc_2(); -extern int *mdmn_wakeup_master_svc_2(); -extern int *mdmn_comm_lock_svc_2(); -extern int *mdmn_comm_unlock_svc_2(); -extern int *mdmn_comm_suspend_svc_2(); -extern int *mdmn_comm_resume_svc_2(); -extern int *mdmn_comm_reinit_set_svc_2(); -extern int *mdmn_comm_msglock_svc_2(); - - -static void -_msgout(msg) - char *msg; -{ -#ifdef RPC_SVC_FG - if (_rpcpmstart) - syslog(LOG_ERR, "%s", msg); - else - (void) fprintf(stderr, "%s\n", msg); -#else - syslog(LOG_ERR, "%s", msg); -#endif -} - -static void -closedown(void) -{ - if (_rpcsvcstate == _IDLE && _rpcsvccount == 0) { - int size; - int i, openfd = 0; - - size = svc_max_pollfd; - for (i = 0; i < size && openfd < 2; i++) - if (svc_pollfd[i].fd >= 0) - openfd++; - if (openfd <= 1) - exit(0); - } else - _rpcsvcstate = _IDLE; - - (void) signal(SIGALRM, (void(*)()) closedown); - (void) alarm(_RPCSVC_CLOSEDOWN/2); -} - -static void -mdmn_commd_2(rqstp, transp) - struct svc_req *rqstp; - register SVCXPRT *transp; -{ - union { - md_mn_msg_t mdmn_send_1_arg; - md_mn_msg_t mdmn_work_1_arg; - md_mn_result_t mdmn_wakeup_1_arg; - md_mn_msgclass_t mdmn_comm_lock_1_arg; - md_mn_msgclass_t mdmn_comm_unlock_1_arg; - uint_t mdmn_comm_reinit_1_arg; - } argument; - char *result; - bool_t (*_xdr_argument)(), (*_xdr_result)(); - char *(*local)(); - int free_result = 0; - - _rpcsvccount++; - switch (rqstp->rq_proc) { - case NULLPROC: - (void) svc_sendreply(transp, xdr_void, - (char *)NULL); - _rpcsvccount--; - _rpcsvcstate = _SERVED; - svc_done(transp); - return; - - case mdmn_send: - _xdr_argument = xdr_md_mn_msg_t; - _xdr_result = xdr_md_mn_result_t; - (void) memset((char *)&argument, 0, sizeof (argument)); - if (!svc_getargs(transp, _xdr_argument, (caddr_t)&argument)) { - svcerr_decode(transp); - svc_done(transp); - _rpcsvccount--; - _rpcsvcstate = _SERVED; - return; - } - /* - * mdmn_send_2 will not always do a sendreply. - * it will register in a table and let the mdmn_wakeup1 - * do the sendreply for that call. - * in order to register properly we need the transp handle - * If we get a 0 back from mdmn_send_svc_2() we have no pending - * RPC in-flight, so we drop the service count. - */ - if (mdmn_send_svc_2((md_mn_msg_t *)&argument, rqstp) == 0) { - _rpcsvccount--; - _rpcsvcstate = _SERVED; - svc_done(rqstp->rq_xprt); - } - - return; /* xdr_free is called by mdmn_wakeup_initiator_svc_2 */ - - case mdmn_work: - _xdr_argument = xdr_md_mn_msg_t; - _xdr_result = xdr_int; - local = (char *(*)()) mdmn_work_svc_2; - free_result = 1; - break; - - case mdmn_wakeup_master: - _xdr_argument = xdr_md_mn_result_t; - _xdr_result = xdr_int; - local = (char *(*)()) mdmn_wakeup_master_svc_2; - free_result = 1; - break; - - case mdmn_wakeup_initiator: - /* - * We must have had an in-flight RPC request to get here, - * so drop the in-flight count. - */ - _xdr_argument = xdr_md_mn_result_t; - _xdr_result = xdr_int; - local = (char *(*)()) mdmn_wakeup_initiator_svc_2; - free_result = 1; - _rpcsvccount--; - break; - - case mdmn_comm_lock: - _xdr_argument = xdr_md_mn_set_and_class_t; - _xdr_result = xdr_int; - local = (char *(*)()) mdmn_comm_lock_svc_2; - break; - - case mdmn_comm_unlock: - _xdr_argument = xdr_md_mn_set_and_class_t; - _xdr_result = xdr_int; - local = (char *(*)()) mdmn_comm_unlock_svc_2; - break; - - case mdmn_comm_suspend: - _xdr_argument = xdr_md_mn_set_and_class_t; - _xdr_result = xdr_int; - local = (char *(*)()) mdmn_comm_suspend_svc_2; - break; - - case mdmn_comm_resume: - _xdr_argument = xdr_md_mn_set_and_class_t; - _xdr_result = xdr_int; - local = (char *(*)()) mdmn_comm_resume_svc_2; - break; - - case mdmn_comm_reinit_set: - _xdr_argument = xdr_u_int; - _xdr_result = xdr_int; - local = (char *(*)()) mdmn_comm_reinit_set_svc_2; - break; - - case mdmn_comm_msglock: - _xdr_argument = xdr_md_mn_type_and_lock_t; - _xdr_result = xdr_int; - local = (char *(*)()) mdmn_comm_msglock_svc_2; - break; - - default: - svcerr_noproc(transp); - _rpcsvccount--; - _rpcsvcstate = _SERVED; - svc_done(transp); - return; - } - (void) memset((char *)&argument, 0, sizeof (argument)); - if (!svc_getargs(transp, _xdr_argument, (caddr_t)&argument)) { - svcerr_decode(transp); - _rpcsvccount--; - _rpcsvcstate = _SERVED; - svc_done(transp); - return; - } - result = (*local)(&argument, rqstp); - if (_xdr_result && result != NULL && - !svc_sendreply(transp, _xdr_result, result)) { - svcerr_systemerr(transp); - } - if (!svc_freeargs(transp, _xdr_argument, (caddr_t)&argument)) { - _msgout(gettext("unable to free arguments")); - svc_done(transp); - exit(1); - } - - if (free_result == 1) { - free(result); - } - - svc_done(transp); - _rpcsvccount--; - _rpcsvcstate = _SERVED; -} - -/* - * atexit handler to flag the lack of commd to the kernel so that we don't - * panic due to RPC failures when the commd has been killed. - */ -static void -exit_commd() -{ - md_error_t ep = mdnullerror; - syslog(LOG_DAEMON | LOG_DEBUG, gettext("mdcommd exiting")); - (void) metaioctl(MD_MN_SET_COMMD_RUNNING, 0, &ep, "rpc.mdcommd"); -} - -/* ARGSUSED */ -int -main() -{ - pid_t pid; - int i; - md_error_t ep = mdnullerror; - int mode = RPC_SVC_MT_USER; - - (void) sigset(SIGPIPE, SIG_IGN); - - /* - * Attempt to set MT_USER behaviour for mdcommd service routines. - * If this isn't done, there is a possibility that the transport - * handle might be freed before the thread created by mdmn_send_svc_2 - * can use it. A consequence of this is that svc_done() must be - * called on the handle when it's no longer needed. - */ - if (rpc_control(RPC_SVC_MTMODE_SET, &mode) == FALSE) { - _msgout(gettext("cannot set MT_USER mode for RPC service")); - exit(1); - } - - /* - * If stdin looks like a TLI endpoint, we assume - * that we were started by a port monitor. If - * t_getstate fails with TBADF, this is not a - * TLI endpoint. - */ - if (t_getstate(0) != -1 || t_errno != TBADF) { - char *netid; - struct netconfig *nconf = NULL; - SVCXPRT *transp; - int pmclose; - -#ifdef RPC_SVC_FG - _rpcpmstart = 1; -#endif /* RPC_SVC_FG */ - openlog("mdmn_commd", LOG_PID, LOG_DAEMON); - - if ((netid = getenv("NLSPROVIDER")) == NULL) { - /* started from inetd */ - pmclose = 1; - } else { - if ((nconf = getnetconfigent(netid)) == NULL) - _msgout(gettext("cannot get transport info")); - - pmclose = (t_getstate(0) != T_DATAXFER); - } - if ((transp = svc_tli_create(0, nconf, NULL, 0, 0)) == NULL) { - _msgout(gettext("cannot create server handle")); - exit(1); - } - if (nconf) - freenetconfigent(nconf); - if (!svc_reg(transp, MDMN_COMMD, TWO, mdmn_commd_2, 0)) { - _msgout(gettext( - "unable to register (MDMN_COMMD, TWO).")); - exit(1); - } - - (void) atexit(exit_commd); - - if (pmclose) { - (void) signal(SIGALRM, (void(*)()) closedown); - (void) alarm(_RPCSVC_CLOSEDOWN/2); - } - - pid = getpid(); - (void) metaioctl(MD_MN_SET_COMMD_RUNNING, (void *)pid, &ep, - "rpc.mdcommd"); - svc_run(); - exit(1); - /* NOTREACHED */ - } else { -#ifndef RPC_SVC_FG -#pragma weak closefrom - /* LINTED */ - extern void closefrom(); - int size; - struct rlimit rl; - pid = fork(); - if (pid < 0) { - perror(gettext("cannot fork")); - exit(1); - } - if (pid) - exit(0); - if (closefrom != NULL) - closefrom(0); - else { - rl.rlim_max = 0; - (void) getrlimit(RLIMIT_NOFILE, &rl); - if ((size = rl.rlim_max) == 0) - exit(1); - for (i = 0; i < size; i++) - (void) close(i); - } - i = open("/dev/null", 2); - (void) dup2(i, 1); - (void) dup2(i, 2); - (void) setsid(); - openlog("mdmn_commd", LOG_PID, LOG_DAEMON); -#endif - } - if (!svc_create(mdmn_commd_2, MDMN_COMMD, TWO, "tcp")) { - _msgout(gettext("unable to create (MDMN_COMMD, TWO) for tcp.")); - exit(1); - } - - (void) atexit(exit_commd); - (void) metaioctl(MD_MN_SET_COMMD_RUNNING, (void *)1, &ep, - "rpc.mdcommd"); - - svc_run(); - _msgout(gettext("svc_run returned")); - return (1); -} diff --git a/usr/src/cmd/lvm/rpc.mdcommd/mdmn_subr.c b/usr/src/cmd/lvm/rpc.mdcommd/mdmn_subr.c deleted file mode 100644 index 84d2fd48fef7..000000000000 --- a/usr/src/cmd/lvm/rpc.mdcommd/mdmn_subr.c +++ /dev/null @@ -1,845 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include -#include -#include -#include -#include -#include -#include "meta.h" -#include "mdmn_subr.h" - -extern int mdmn_init_set(set_t setno, int todo); - -uint_t mdmn_busy[MD_MAXSETS][MD_MN_NCLASSES]; -mutex_t mdmn_busy_mutex[MD_MAXSETS]; -cond_t mdmn_busy_cv[MD_MAXSETS]; - - -/* the wakeup table for the initiator's side */ -mdmn_wti_t mdmn_initiator_table[MD_MAXSETS][MD_MN_NCLASSES]; - -/* the wakeup table for the master */ -mdmn_wtm_t mdmn_master_table[MD_MAXSETS][MD_MN_NCLASSES]; - -/* List of licensed ip addresses */ -licensed_ip_t licensed_nodes[NNODES]; - -/* speed up the search for licensed ip addresses */ -md_mn_nodeid_t maxlicnodes = 0; /* 0 is not a valid node ID */ - -/* - * Check if a given set/class combination is currently in use - * If in use, returns TRUE - * Otherwise returns FALSE - * - * Must be called with mdmn_busy_mutex held - */ -bool_t -mdmn_is_class_busy(set_t setno, md_mn_msgclass_t class) -{ - if (mdmn_busy[setno][class] & MDMN_BUSY) { - return (TRUE); - } else { - return (FALSE); - } -} - -/* - * Mark a given set/class combination as currently in use - * If the class was already in use, returns FALSE - * Otherwise returns TRUE - * - * So mdmn_mark_class_busy can be used like - * if (mdmn_mark_class_busy(setno, class) == FALSE) - * failure; - * else - * success; - * - * Must be called with mdmn_busy_mutex held - */ -bool_t -mdmn_mark_class_busy(set_t setno, md_mn_msgclass_t class) -{ - if (mdmn_busy[setno][class] & MDMN_BUSY) { - return (FALSE); - } else { - mdmn_busy[setno][class] |= MDMN_BUSY; - commd_debug(MD_MMV_MISC, "busy: set=%d, class=%d\n", - setno, class); - return (TRUE); - } -} - -/* - * Mark a given set/class combination as currently available - * Always succeeds, thus void. - * - * If this class is marked MDMN_SUSPEND_ALL, we are in the middle of - * draining all classes of this set. - * We have to mark class+1 as MDMN_SUSPEND_ALL too. - * If class+2 wasn't busy, we proceed with class+2, and so on - * If any class is busy, we return. - * Then the drain process will be continued by the mdmn_mark_class_unbusy() of - * that busy class - */ -void -mdmn_mark_class_unbusy(set_t setno, md_mn_msgclass_t class) -{ - commd_debug(MD_MMV_MISC, "unbusy: set=%d, class=%d\n", setno, class); - mdmn_busy[setno][class] &= ~MDMN_BUSY; - /* something changed, inform threads waiting for that */ - (void) cond_signal(&mdmn_busy_cv[setno]); - - if ((mdmn_busy[setno][class] & MDMN_SUSPEND_ALL) == 0) { - return; - } - - while (++class < MD_MN_NCLASSES) { - commd_debug(MD_MMV_MISC, - "unbusy: suspending set=%d, class=%d\n", setno, class); - if (mdmn_mark_class_suspended(setno, class, MDMN_SUSPEND_ALL) - == MDMNE_SET_NOT_DRAINED) { - break; - } - } - -} - - -/* - * Check if a given set/class combination is locked. - */ -bool_t -mdmn_is_class_locked(set_t setno, md_mn_msgclass_t class) -{ - if (mdmn_busy[setno][class] & MDMN_LOCKED) { - return (TRUE); - } else { - return (FALSE); - } -} - -/* - * Mark a given set/class combination as locked. - * No checking is done here, so routine can be void. - * Locking a locked set/class is ok. - * - * Must be called with mdmn_busy_mutex held - */ -void -mdmn_mark_class_locked(set_t setno, md_mn_msgclass_t class) -{ - mdmn_busy[setno][class] |= MDMN_LOCKED; -} - -/* - * Mark a given set/class combination as unlocked. - * No checking is done here, so routine can be void. - * Unlocking a unlocked set/class is ok. - * - * Must be called with mdmn_busy_mutex held - */ -void -mdmn_mark_class_unlocked(set_t setno, md_mn_msgclass_t class) -{ - mdmn_busy[setno][class] &= ~MDMN_LOCKED; -} - -/* - * Suspend a set/class combination - * - * If called during draining all classes of a set susptype is MDMN_SUSPEND_ALL. - * If only one class is about to be drained susptype is MDMN_SUSPEND_1. - * - * Returns: - * MDMNE_ACK if there are no outstanding messages - * MDMNE_SET_NOT_DRAINED otherwise - * - * Must be called with mdmn_busy_mutex held for this set. - */ -int -mdmn_mark_class_suspended(set_t setno, md_mn_msgclass_t class, uint_t susptype) -{ - /* - * We use the mdmn_busy array to mark this set is suspended. - */ - mdmn_busy[setno][class] |= susptype; - - /* - * If there are outstanding messages for this set/class we - * return MDMNE_SET_NOT_DRAINED, otherwise we return MDMNE_ACK - */ - if (mdmn_is_class_busy(setno, class) == TRUE) { - return (MDMNE_SET_NOT_DRAINED); - } - return (MDMNE_ACK); -} - -/* - * Resume operation for a set/class combination after it was - * previously suspended - * - * If called from mdmn_comm_resume_svc_1 to resume _one_ specific class - * then susptype will be MDMN_SUSPEND_1 - * Otherwise to resume all classes of one set, - * then susptype equals (MDMN_SUSPEND_ALL | MDMN_SUSPEND_1) - * - * Always succeeds, thus void. - * - * Must be called with mdmn_busy_mutex held for this set. - */ -void -mdmn_mark_class_resumed(set_t setno, md_mn_msgclass_t class, uint_t susptype) -{ - /* simply the reverse operation to mdmn_mark_set_drained() */ - mdmn_busy[setno][class] &= ~susptype; -} - -/* - * Check if a drain command was issued for this set/class combination. - * - * Must be called with mdmn_busy_mutex held for this set. - */ -bool_t -mdmn_is_class_suspended(set_t setno, md_mn_msgclass_t class) -{ - if (mdmn_busy[setno][class] & (MDMN_SUSPEND_ALL | MDMN_SUSPEND_1)) { - return (TRUE); - } else { - return (FALSE); - } -} - -/* - * Put a result into the wakeup table for the master - * It's ensured that the msg id from the master_table entry and from - * result are matching - */ -void -mdmn_set_master_table_res(set_t setno, md_mn_msgclass_t class, - md_mn_result_t *res) -{ - mdmn_master_table[setno][class].wtm_result = res; -} -void -mdmn_set_master_table_id(set_t setno, md_mn_msgclass_t class, md_mn_msgid_t *id) -{ - MSGID_COPY(id, &(mdmn_master_table[setno][class].wtm_id)); -} - -void -mdmn_set_master_table_addr(set_t setno, md_mn_msgclass_t class, - md_mn_nodeid_t nid) -{ - mdmn_master_table[setno][class].wtm_addr = nid; -} - - -md_mn_result_t * -mdmn_get_master_table_res(set_t setno, md_mn_msgclass_t class) -{ - return (mdmn_master_table[setno][class].wtm_result); -} - -void -mdmn_get_master_table_id(set_t setno, md_mn_msgclass_t class, md_mn_msgid_t *id) -{ - MSGID_COPY(&(mdmn_master_table[setno][class].wtm_id), id); -} - -cond_t * -mdmn_get_master_table_cv(set_t setno, md_mn_msgclass_t class) -{ - return (&(mdmn_master_table[setno][class].wtm_cv)); -} - -mutex_t * -mdmn_get_master_table_mx(set_t setno, md_mn_msgclass_t class) -{ - return (&(mdmn_master_table[setno][class].wtm_mx)); -} - -md_mn_nodeid_t -mdmn_get_master_table_addr(set_t setno, md_mn_msgclass_t class) -{ - return (mdmn_master_table[setno][class].wtm_addr); -} - - - -/* here come the functions dealing with the wakeup table for the initiators */ - - -void -mdmn_register_initiator_table(set_t setno, md_mn_msgclass_t class, - md_mn_msg_t *msg, SVCXPRT *transp) -{ - uint_t nnodes = set_descriptor[setno]->sd_mn_numnodes; - time_t timeout = mdmn_get_timeout(msg->msg_type); - - - MSGID_COPY(&(msg->msg_msgid), - &(mdmn_initiator_table[setno][class].wti_id)); - mdmn_initiator_table[setno][class].wti_transp = transp; - mdmn_initiator_table[setno][class].wti_args = (char *)msg; - - /* - * as the point in time where we want to be guaranteed to be woken up - * again, we chose the - * current time + nnodes times the timeout value for the message type - */ - mdmn_initiator_table[setno][class].wti_time = - time((time_t *)NULL) + (nnodes * timeout); -} - -/* - * If the set/class combination is currently busy, return MDMNE_CLASS_BUSY - * Otherwise return MDMNE_ACK - */ -int -mdmn_check_initiator_table(set_t setno, md_mn_msgclass_t class) -{ - if ((mdmn_initiator_table[setno][class].wti_id.mid_nid == ~0u) && - (mdmn_initiator_table[setno][class].wti_transp == (SVCXPRT *)NULL)) - return (MDMNE_ACK); - return (MDMNE_CLASS_BUSY); -} - -/* - * Remove an entry from the initiator table entirely, - * This must be done with mutex held. - */ -void -mdmn_unregister_initiator_table(set_t setno, md_mn_msgclass_t class) -{ - mdmn_initiator_table[setno][class].wti_id.mid_nid = ~0u; - mdmn_initiator_table[setno][class].wti_id.mid_time = 0LL; - mdmn_initiator_table[setno][class].wti_transp = (SVCXPRT *)NULL; - mdmn_initiator_table[setno][class].wti_args = (char *)0; - mdmn_initiator_table[setno][class].wti_time = (time_t)0; -} - -void -mdmn_get_initiator_table_id(set_t setno, md_mn_msgclass_t class, - md_mn_msgid_t *mid) -{ - MSGID_COPY(&(mdmn_initiator_table[setno][class].wti_id), mid); -} - -SVCXPRT * -mdmn_get_initiator_table_transp(set_t setno, md_mn_msgclass_t class) -{ - return (mdmn_initiator_table[setno][class].wti_transp); -} - -char * -mdmn_get_initiator_table_args(set_t setno, md_mn_msgclass_t class) -{ - return (mdmn_initiator_table[setno][class].wti_args); -} - -mutex_t * -mdmn_get_initiator_table_mx(set_t setno, md_mn_msgclass_t class) -{ - return (&(mdmn_initiator_table[setno][class].wti_mx)); -} - -time_t -mdmn_get_initiator_table_time(set_t setno, md_mn_msgclass_t class) -{ - return (mdmn_initiator_table[setno][class].wti_time); -} - -extern uint_t md_commd_global_verb; /* global bitmask for debug classes */ -extern FILE *commdout; /* debug output file for the commd */ -extern hrtime_t __savetime; - - -/* - * Print debug messages to the terminal or to syslog - * commd_debug(MD_MMV_SYSLOG,....) is always printed (and always via syslog), - * even if md_commd_global_verb is zero. - * - * Otherwise the correct bit must be set in the bitmask md_commd_global_verb - */ -void -commd_debug(uint_t debug_class, const char *message, ...) -{ - va_list ap; - - /* Is this a message for syslog? */ - if (debug_class == MD_MMV_SYSLOG) { - - va_start(ap, message); - (void) vsyslog(LOG_WARNING, message, ap); - va_end(ap); - } else { - /* Is this debug_class set in the global verbosity state? */ - if ((md_commd_global_verb & debug_class) == 0) { - return; - } - /* Is our output file already functioning? */ - if (commdout == NULL) { - return; - } - /* Are timestamps activated ? */ - if (md_commd_global_verb & MD_MMV_TIMESTAMP) { - /* print time since last TRESET in usecs */ - (void) fprintf(commdout, "[%s]", - meta_print_hrtime(gethrtime() - __savetime)); - } - /* Now print the real message */ - va_start(ap, message); - (void) vfprintf(commdout, message, ap); - va_end(ap); - } -} - - -void -dump_hex(uint_t debug_class, unsigned int *x, int cnt) -{ - cnt /= sizeof (unsigned int); - while (cnt--) { - commd_debug(debug_class, "0x%8x ", *x++); - if (cnt % 4) - continue; - commd_debug(debug_class, "\n"); - } - commd_debug(debug_class, "\n"); -} - -/* debug output: dump a message */ -void -dump_msg(uint_t dbc, char *prefix, md_mn_msg_t *msg) -{ - commd_debug(dbc, "%s &msg = 0x%x\n", prefix, (int)msg); - commd_debug(dbc, "%s ID = (%d, 0x%llx-%d)\n", prefix, - MSGID_ELEMS(msg->msg_msgid)); - commd_debug(dbc, "%s sender = %d\n", prefix, msg->msg_sender); - commd_debug(dbc, "%s flags = 0x%x\n", prefix, msg->msg_flags); - commd_debug(dbc, "%s setno = %d\n", prefix, msg->msg_setno); - commd_debug(dbc, "%s recipient = %d\n", prefix, msg->msg_recipient); - commd_debug(dbc, "%s type = %d\n", prefix, msg->msg_type); - commd_debug(dbc, "%s size = %d\n", prefix, msg->msg_event_size); - if (msg->msg_event_size) { - commd_debug(dbc, "%s data =\n", prefix); - dump_hex(dbc, (unsigned int *)(void *)msg->msg_event_data, - msg->msg_event_size); - } -} - -/* debug output: dump a result structure */ -void -dump_result(uint_t dbc, char *prefix, md_mn_result_t *res) -{ - commd_debug(dbc, "%s &res = 0x%x\n", prefix, (int)res); - commd_debug(dbc, "%s ID = (%d, 0x%llx-%d)\n", prefix, - MSGID_ELEMS(res->mmr_msgid)); - commd_debug(dbc, "%s setno = %d\n", prefix, res->mmr_setno); - commd_debug(dbc, "%s type = %d\n", prefix, res->mmr_msgtype); - commd_debug(dbc, "%s flags = 0x%x\n", prefix, res->mmr_flags); - commd_debug(dbc, "%s comm_state= %d\n", prefix, res->mmr_comm_state); - commd_debug(dbc, "%s exitval = %d\n", prefix, res->mmr_exitval); - commd_debug(dbc, "%s out_size = %d\n", prefix, res->mmr_out_size); - if (res->mmr_out_size) - commd_debug(dbc, "%s out = %s\n", prefix, res->mmr_out); - commd_debug(dbc, "%s err_size = %d\n", prefix, res->mmr_err_size); - if (res->mmr_err_size) - commd_debug(dbc, "%s err = %s\n", prefix, res->mmr_err); -} - - -/* - * Here we find out, where to store or find the results for a given msg. - * - * Per set we have a pointer to a three dimensional array: - * mct[set] -> mct_mce[NNODES][MD_MN_NCLASSES][MAX_SUBMESSAGES] - * So, for every possible node and for every possible class we can store - * MAX_SUBMESSAGES results. - * the way to find the correct index is - * submessage + - * class * MAX_SUBMESSAGES + - * nodeid * MAX_SUBMESSAGES * MD_MN_NCLASSES. - * - * To find the correct address the index has to be multiplied - * by the size of one entry. - */ -static md_mn_mce_t * -mdmn_get_mce_by_msg(md_mn_msg_t *msg) -{ - set_t setno = msg->msg_setno; - int nodeid = msg->msg_msgid.mid_nid; - int submsg = msg->msg_msgid.mid_smid; - int mct_index; - off_t mct_offset; - md_mn_msgclass_t class; - - if (mct[setno] != NULL) { - if (mdmn_init_set(setno, MDMN_SET_MCT) != 0) { - return ((md_mn_mce_t *)MDMN_MCT_ERROR); - } - } - - if (submsg == 0) { - class = mdmn_get_message_class(msg->msg_type); - } else { - class = msg->msg_msgid.mid_oclass; - } - - mct_index = submsg + class * MAX_SUBMESSAGES + - nodeid * MAX_SUBMESSAGES * MD_MN_NCLASSES; - - mct_offset = mct_index * sizeof (md_mn_mce_t); - - /* LINTED Pointer alignment */ - return ((md_mn_mce_t *)((caddr_t)(mct[setno]) + mct_offset)); - - /* - * the lint clean version would be: - * return (&(mct[setno]->mct_mce[0][0][0]) + mct_index); - * :-) - */ -} - -/* - * mdmn_mark_completion(msg, result, flag) - * Stores the result of this message into the mmaped memory MCT[setno] - * In case the same message comes along a second time we will know that - * this message has already been processed and we can deliver the - * results immediately. - * - * Before a message handler is called, the message in the MCT is flagged - * as currently being processed (flag == MDMN_MCT_IN_PROGRESS). - * This we need so we don't start a second handler for the same message. - * - * After a message handler is completed, this routine is called with - * flag == MDMN_MCT_DONE and the appropriate result that we store in the MCT. - * As MCT[setno] is memory mapped to disks, this information is persistent - * even across a crash of the commd. - * It doesn't have to be persistent across a reboot, though. - * - * Returns MDMN_MCT_DONE in case of success - * Returns MDMN_MCT_ERROR in case of error creating the mct - */ -int -mdmn_mark_completion(md_mn_msg_t *msg, md_mn_result_t *result, uint_t flag) -{ - md_mn_mce_t *mce; - uint_t offset_in_page; - - mce = mdmn_get_mce_by_msg(msg); - if (mce == (md_mn_mce_t *)-1) { - return (MDMN_MCT_ERROR); - } - offset_in_page = (uint_t)(caddr_t)mce % sysconf(_SC_PAGESIZE); - - (void) memset(mce, 0, sizeof (md_mn_mce_t)); - - MSGID_COPY(&msg->msg_msgid, &mce->mce_result.mmr_msgid); - if (flag == MDMN_MCT_IN_PROGRESS) { - mce->mce_flags = MDMN_MCT_IN_PROGRESS; - goto mmc_out; - } - - /* - * In case the message flags indicate that the result should not be - * stored in the MCT, we return a MDMN_MCT_NOT_DONE, - * so the message will be processed at any rate, - * even if we process this message twice. - * this makes sense if the result of the message is a dynamic status - * and might have changed meanwhile. - */ - if (msg->msg_flags & MD_MSGF_NO_MCT) { - return (MDMN_MCT_DONE); - } - - /* This msg is no longer in progress */ - mce->mce_flags = MDMN_MCT_DONE; - - mce->mce_result.mmr_msgtype = result->mmr_msgtype; - mce->mce_result.mmr_setno = result->mmr_setno; - mce->mce_result.mmr_flags = result->mmr_flags; - mce->mce_result.mmr_sender = result->mmr_sender; - mce->mce_result.mmr_failing_node = result->mmr_failing_node; - mce->mce_result.mmr_comm_state = result->mmr_comm_state; - mce->mce_result.mmr_exitval = result->mmr_exitval; - - /* if mmr_exitval is zero, we store stdout, otherwise stderr */ - if (result->mmr_exitval == 0) { - if (result->mmr_out_size > 0) { - (void) memcpy(mce->mce_data, result->mmr_out, - result->mmr_out_size); - mce->mce_result.mmr_out_size = result->mmr_out_size; - } - } else { - if (result->mmr_err_size > 0) { - mce->mce_result.mmr_err_size = result->mmr_err_size; - (void) memcpy(mce->mce_data, result->mmr_err, - result->mmr_err_size); - } - } - - dump_result(MD_MMV_PROC_S, "mdmn_mark_completion1", result); - -mmc_out: - /* now flush this entry to disk */ - (void) msync((caddr_t)mce - offset_in_page, - sizeof (md_mn_mce_t) + offset_in_page, MS_SYNC); - return (MDMN_MCT_DONE); -} - -/* - * mdmn_check_completion(msg, resultp) - * checks if msg has already been processed on this node, and if so copies - * the stored result to resultp. - * - * returns MDMN_MCT_DONE and the result filled out acurately in case the - * msg has already been processed before - * returns MDMN_MCT_NOT_DONE if the message has not been processed before - * returns MDMN_MCT_IN_PROGRESS if the message is currently being processed - * This can only occur on a slave node. - * return MDMN_MCT_ERROR in case of error creating the mct - */ -int -mdmn_check_completion(md_mn_msg_t *msg, md_mn_result_t *result) -{ - md_mn_mce_t *mce; - size_t outsize; - size_t errsize; - - mce = mdmn_get_mce_by_msg(msg); - if (mce == (md_mn_mce_t *)MDMN_MCT_ERROR) { - return (MDMN_MCT_ERROR); /* what to do in that case ? */ - } - if (MSGID_CMP(&(msg->msg_msgid), &(mce->mce_result.mmr_msgid))) { - /* is the message completed, or in progress? */ - if (mce->mce_flags & MDMN_MCT_IN_PROGRESS) { - return (MDMN_MCT_IN_PROGRESS); - } - /* - * See comment on MD_MSGF_NO_MCT above, if this flag is set - * for a message no result was stored and so the message has - * to be processed no matter if this is the 2nd time then. - */ - if (msg->msg_flags & MD_MSGF_NO_MCT) { - return (MDMN_MCT_NOT_DONE); - } - - /* Paranoia check: mce_flags must be MDMN_MCT_DONE here */ - if ((mce->mce_flags & MDMN_MCT_DONE) == 0) { - commd_debug(MD_MMV_ALL, - "mdmn_check_completion: msg not done and not in " - "progress! ID = (%d, 0x%llx-%d)\n", - MSGID_ELEMS(msg->msg_msgid)); - return (MDMN_MCT_NOT_DONE); - } - /* - * Already processed. - * Copy saved results data; - * return only a pointer to any output. - */ - MSGID_COPY(&(mce->mce_result.mmr_msgid), &result->mmr_msgid); - result->mmr_msgtype = mce->mce_result.mmr_msgtype; - result->mmr_setno = mce->mce_result.mmr_setno; - result->mmr_flags = mce->mce_result.mmr_flags; - result->mmr_sender = mce->mce_result.mmr_sender; - result->mmr_failing_node = mce->mce_result.mmr_failing_node; - result->mmr_comm_state = mce->mce_result.mmr_comm_state; - result->mmr_exitval = mce->mce_result.mmr_exitval; - result->mmr_err = NULL; - result->mmr_out = NULL; - outsize = result->mmr_out_size = mce->mce_result.mmr_out_size; - errsize = result->mmr_err_size = mce->mce_result.mmr_err_size; - /* - * if the exit val is zero only stdout was stored (if any) - * otherwise only stderr was stored (if any) - */ - if (result->mmr_exitval == 0) { - if (outsize != 0) { - result->mmr_out = Zalloc(outsize); - (void) memcpy(result->mmr_out, mce->mce_data, - outsize); - } - } else { - if (errsize != 0) { - result->mmr_err = Zalloc(errsize); - (void) memcpy(result->mmr_err, mce->mce_data, - errsize); - } - } - commd_debug(MD_MMV_MISC, - "mdmn_check_completion: msg already processed \n"); - dump_result(MD_MMV_MISC, "mdmn_check_completion", result); - return (MDMN_MCT_DONE); - } - commd_debug(MD_MMV_MISC, - "mdmn_check_completion: msg not yet processed\n"); - return (MDMN_MCT_NOT_DONE); -} - - - -/* - * check_license(rqstp, chknid) - * - * Is this RPC request sent from a licensed host? - * - * If chknid is non-zero, the caller of check_license() knows the ID of - * the sender. Then we check just the one entry of licensed_nodes[] - * - * If chknid is zero, the sender is not known. In that case the sender must be - * the local node. - * - * If the host is licensed, return TRUE, else return FALSE - */ -bool_t -check_license(struct svc_req *rqstp, md_mn_nodeid_t chknid) -{ - char buf[INET6_ADDRSTRLEN]; - void *caller = NULL; - in_addr_t caller_ipv4; - in6_addr_t caller_ipv6; - struct sockaddr *ca; - - - ca = (struct sockaddr *)(void *)svc_getrpccaller(rqstp->rq_xprt)->buf; - - if (ca->sa_family == AF_INET) { - caller_ipv4 = - ((struct sockaddr_in *)(void *)ca)->sin_addr.s_addr; - caller = (void *)&caller_ipv4; - - if (chknid == 0) { - /* check against local node */ - if (caller_ipv4 == htonl(INADDR_LOOPBACK)) { - return (TRUE); - - } - } else { - /* check against one specific node */ - if ((caller_ipv4 == licensed_nodes[chknid].lip_ipv4) && - (licensed_nodes[chknid].lip_family == AF_INET)) { - return (TRUE); - } else { - commd_debug(MD_MMV_MISC, - "Bad attempt from %x ln[%d]=%x\n", - caller_ipv4, chknid, - licensed_nodes[chknid].lip_ipv4); - } - } - } else if (ca->sa_family == AF_INET6) { - caller_ipv6 = ((struct sockaddr_in6 *)(void *)ca)->sin6_addr; - caller = (void *)&caller_ipv6; - - if (chknid == 0) { - /* check against local node */ - if (IN6_IS_ADDR_LOOPBACK(&caller_ipv6)) { - return (TRUE); - - } - } else { - /* check against one specific node */ - if (IN6_ARE_ADDR_EQUAL(&caller_ipv6, - &(licensed_nodes[chknid].lip_ipv6)) && - (licensed_nodes[chknid].lip_family == AF_INET6)) { - return (TRUE); - } - } - } - /* if we are here, we were contacted by an unlicensed node */ - commd_debug(MD_MMV_SYSLOG, - "Bad attempt to contact rpc.mdcommd from %s\n", - caller ? - inet_ntop(ca->sa_family, caller, buf, INET6_ADDRSTRLEN) : - "unknown"); - - return (FALSE); -} - -/* - * Add a node to the list of licensed nodes. - * - * Only IPv4 is currently supported. - * for IPv6, we need to change md_mnnode_desc. - */ -void -add_license(md_mnnode_desc *node) -{ - md_mn_nodeid_t nid = node->nd_nodeid; - char buf[INET6_ADDRSTRLEN]; - - /* - * If this node is not yet licensed, do it now. - * For now only IPv4 addresses are supported. - */ - commd_debug(MD_MMV_MISC, "add_lic(%s): ln[%d]=%s, lnc[%d]=%d\n", - node->nd_priv_ic, nid, - inet_ntop(AF_INET, (void *)&licensed_nodes[nid].lip_ipv4, - buf, INET6_ADDRSTRLEN), nid, licensed_nodes[nid].lip_cnt); - - if (licensed_nodes[nid].lip_ipv4 == (in_addr_t)0) { - licensed_nodes[nid].lip_family = AF_INET; /* IPv4 */ - licensed_nodes[nid].lip_ipv4 = inet_addr(node->nd_priv_ic); - /* keep track of the last entry for faster search */ - if (nid > maxlicnodes) - maxlicnodes = nid; - - } - /* in any case bump up the reference count */ - licensed_nodes[nid].lip_cnt++; -} - -/* - * lower the reference count for one node. - * If that drops to zero, remove the node from the list of licensed nodes - * - * Only IPv4 is currently supported. - * for IPv6, we need to change md_mnnode_desc. - */ -void -rem_license(md_mnnode_desc *node) -{ - md_mn_nodeid_t nid = node->nd_nodeid; - char buf[INET6_ADDRSTRLEN]; - - commd_debug(MD_MMV_MISC, "rem_lic(%s): ln[%d]=%s, lnc[%d]=%d\n", - node->nd_priv_ic, nid, - inet_ntop(AF_INET, (void *)&licensed_nodes[nid].lip_ipv4, buf, - INET6_ADDRSTRLEN), nid, licensed_nodes[nid].lip_cnt); - - assert(licensed_nodes[nid].lip_cnt > 0); - - /* - * If this was the last reference to that node, it's license expires - * For now only IPv4 addresses are supported. - */ - if (--licensed_nodes[nid].lip_cnt == 0) { - licensed_nodes[nid].lip_ipv4 = (in_addr_t)0; - } -} diff --git a/usr/src/cmd/lvm/rpc.mdcommd/mdmn_subr.h b/usr/src/cmd/lvm/rpc.mdcommd/mdmn_subr.h deleted file mode 100644 index 50583ab1973d..000000000000 --- a/usr/src/cmd/lvm/rpc.mdcommd/mdmn_subr.h +++ /dev/null @@ -1,232 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _MDMN_SUBR_H -#define _MDMN_SUBR_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * This is the structure for the wakeup table for the initiator's side. - * We need a transportation handle in order to wake up the process waiting - * for the rpc call to complete - */ -typedef struct mdmn_wti { - md_mn_msgid_t wti_id; - mutex_t wti_mx; - time_t wti_time; /* for timeout purpose */ - SVCXPRT *wti_transp; - md_mn_result_t *wti_result; - char *wti_args; -} mdmn_wti_t; - -extern mdmn_wti_t initiator_table[MD_MAXSETS][MD_MN_NCLASSES]; - - -/* - * This is the structure for the wakeup table for the master. - * We need the ID for checking purpose, synchronizing cv's and a place to store - * a pointer to the results so the master can take over from here. - */ -typedef struct mdmn_wtm { - md_mn_msgid_t wtm_id; - mutex_t wtm_mx; - cond_t wtm_cv; - md_mn_nodeid_t wtm_addr; - md_mn_result_t *wtm_result; -} mdmn_wtm_t; - -extern mdmn_wtm_t mdmn_master_table[MD_MAXSETS][MD_MN_NCLASSES]; - - -/* - * This structure is only needed because we start a thread and we want to - * pass more than one argument to that thread. - * So we pack all the args into one structure and pass a pointer to it. - */ -typedef struct md_mn_msg_and_transp { - md_mn_msg_t *mat_msg; - SVCXPRT *mat_transp; -} md_mn_msg_and_transp_t; - -#define MAX_SUBMESSAGES 8 - -#define MAX_OUTERR 1024 - -/* - * This is the message completion entry structure that stores the result - * for one message incore in an array and on disk - * Each entry is identified by the msgid being part of the result structure. - * The actual data needs to be stored in a separate pre-allocated field - * because the result structure only contains a pointer to stdout / stderr. - * mce_flags is set to: - * MDMN_MCT_IN_PROGRESS - if a message is currently being handled and - * no new message handler should be issued. - * MDMN_MCT_DONE - if the message is completely processed and - * the result is available - */ -typedef struct md_mn_mce { - md_mn_result_t mce_result; - char mce_data[MAX_OUTERR]; - uint_t mce_flags; -} md_mn_mce_t; - -/* - * We need to be able to store one result per each class and for each - * possible submessage. - * This makes our Message Completion Table mct for one diskset. - */ -typedef struct md_mn_mct { - md_mn_mce_t mct_mce[NNODES][MD_MN_NCLASSES][MAX_SUBMESSAGES]; -} md_mn_mct_t; - -extern md_mn_mct_t *mct[]; -extern int mdmn_mark_completion(md_mn_msg_t *msg, md_mn_result_t *result, - uint_t flag); -extern int mdmn_check_completion(md_mn_msg_t *msg, md_mn_result_t *result); - -/* here we find the MCT files on disk */ -#define MD_MN_MSG_COMP_TABLE "/var/run/mct" - -/* the return values for mdmn_mark_completion and mdmn_check_completion */ -#define MDMN_MCT_NOT_DONE 0x0001 -#define MDMN_MCT_DONE 0x0002 -#define MDMN_MCT_ERROR 0x0004 -#define MDMN_MCT_IN_PROGRESS 0x0008 - -/* the different states for md_mn_set_inited[] */ -#define MDMN_SET_MUTEXES 0x0001 -#define MDMN_SET_NODES 0x0002 -#define MDMN_SET_MCT 0x0004 -#define MDMN_SET_READY (MDMN_SET_MUTEXES | MDMN_SET_NODES | \ - MDMN_SET_MCT) - -/* the different states of mdmn_busy[set][class] */ -#define MDMN_BUSY 0x0001 -#define MDMN_LOCKED 0x0002 -#define MDMN_SUSPEND_1 0x0004 -#define MDMN_SUSPEND_ALL 0x0008 - - -extern mutex_t mdmn_busy_mutex[]; -extern cond_t mdmn_busy_cv[]; -extern struct md_set_desc *set_descriptor[]; - - -/* Stuff for licensing / checking ip adresses */ -typedef struct licensed_ip { - union { - in_addr_t u_lip_ipv4; /* a licensed ipv4 adress */ - in6_addr_t u_lip_ipv6; /* a licensed ipv6 adress */ - } lip_u; - sa_family_t lip_family; /* indicator for IPv4/IPv6 */ - int lip_cnt; /* it's reference count */ -} licensed_ip_t; - -#define lip_ipv4 lip_u.u_lip_ipv4 -#define lip_ipv6 lip_u.u_lip_ipv6 - -extern licensed_ip_t licensed_nodes[]; - -extern bool_t check_license(struct svc_req *rqstp, md_mn_nodeid_t chknid); -extern void add_license(md_mnnode_desc *node); -extern void rem_license(md_mnnode_desc *node); - - -/* needful things */ - -extern bool_t mdmn_is_class_busy(set_t setno, md_mn_msgclass_t class); -extern bool_t mdmn_mark_class_busy(set_t setno, md_mn_msgclass_t class); -extern void mdmn_mark_class_unbusy(set_t setno, md_mn_msgclass_t class); - -extern bool_t mdmn_is_class_locked(set_t setno, md_mn_msgclass_t class); -extern void mdmn_mark_class_locked(set_t setno, md_mn_msgclass_t class); -extern void mdmn_mark_class_unlocked(set_t setno, md_mn_msgclass_t class); - -extern bool_t mdmn_is_class_suspended(set_t setno, md_mn_msgclass_t class); -extern int mdmn_mark_class_suspended(set_t setno, md_mn_msgclass_t class, - uint_t susptype); -extern void mdmn_mark_class_resumed(set_t setno, md_mn_msgclass_t class, - uint_t susptype); - -extern void commd_debug(uint_t debug_class, const char *message, ...); -extern void dump_result(uint_t dbc, char *prefix, md_mn_result_t *res); - - - -/* routines for handling the wakeup table for the master (master_table) */ -extern void mdmn_set_master_table_res(set_t setno, md_mn_msgclass_t class, - md_mn_result_t *res); -extern void mdmn_set_master_table_id(set_t setno, md_mn_msgclass_t class, - md_mn_msgid_t *id); -extern void mdmn_get_master_table_id(set_t setno, md_mn_msgclass_t class, - md_mn_msgid_t *id); -extern cond_t *mdmn_get_master_table_cv(set_t setno, md_mn_msgclass_t class); -extern mutex_t *mdmn_get_master_table_mx(set_t setno, md_mn_msgclass_t class); -extern md_mn_result_t *mdmn_get_master_table_res(set_t setno, - md_mn_msgclass_t class); -extern void mdmn_set_master_table_addr(set_t setno, md_mn_msgclass_t class, - md_mn_nodeid_t nid); -extern md_mn_nodeid_t mdmn_get_master_table_addr(set_t setno, - md_mn_msgclass_t class); - - -/* routines for handling the wakeup table for the initiator (initiator_table) */ -extern void mdmn_register_initiator_table(set_t setno, - md_mn_msgclass_t class, md_mn_msg_t *msg, SVCXPRT *transp); -extern void mdmn_unregister_initiator_table(set_t setno, - md_mn_msgclass_t class); -extern int mdmn_check_initiator_table(set_t setno, md_mn_msgclass_t class); -extern void mdmn_get_initiator_table_id(set_t setno, md_mn_msgclass_t class, - md_mn_msgid_t *id); -extern SVCXPRT *mdmn_get_initiator_table_transp(set_t setno, - md_mn_msgclass_t class); -extern char *mdmn_get_initiator_table_args(set_t setno, - md_mn_msgclass_t class); -extern cond_t *mdmn_get_initiator_table_cv(set_t setno, - md_mn_msgclass_t class); -extern mutex_t *mdmn_get_initiator_table_mx(set_t setno, - md_mn_msgclass_t class); -extern time_t mdmn_get_initiator_table_time(set_t setno, - md_mn_msgclass_t class); - -/* the change log interface */ -extern int mdmn_log_msg(md_mn_msg_t *); -extern int mdmn_flag_msg(md_mn_msg_t *, uint_t); -extern int mdmn_unlog_msg(md_mn_msg_t *); - -#ifdef __cplusplus -} -#endif - -#endif /* _MDMN_SUBR_H */ diff --git a/usr/src/cmd/lvm/rpc.mdcommd/sparc/Makefile b/usr/src/cmd/lvm/rpc.mdcommd/sparc/Makefile deleted file mode 100644 index 45efea82a5af..000000000000 --- a/usr/src/cmd/lvm/rpc.mdcommd/sparc/Makefile +++ /dev/null @@ -1,105 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2005 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# - -COMMD = rpc.mdcommd -MDDOORS = mddoors - -PROG = $(COMMD) $(MDDOORS) - -DERIVED_FILES = mdmn_commd_xdr.c - -OBJECTS = mdmn_commd_server.o \ - mdmn_commd_service.o \ - mddoors.o - -COMMD_OBJECTS = mdmn_commd_server.o \ - mdmn_commd_service.o \ - mdmn_subr.o \ - mdmn_commd_xdr.o - -MDDOORS_OBJECTS = mddoors.o - -LINTOBJECTS = mdmn_commd_server.o mdmn_commd_service.o mdmn_subr.o mddoors.o - -OBJECTS += $(DERIVED_FILES:.c=.o) - -SRCS = $(OBJECTS:%.o=../%.c) -LINTSRCS = $(LINTOBJECTS:%.o=../%.c) - -ROOTLIBLVM = $(ROOTLIB)/lvm - -include ../../../Makefile.cmd -include ../../Makefile.lvm - -LDLIBS += -lmeta - -CFLAGS += $(DEFINES) -# -# -lint := LINTFLAGS += -m - -.KEEP_STATE: - -%.o: ../%.c - $(COMPILE.c) $< - -all: $(PROG) - -$(COMMD): $(DERIVED_FILES) $(COMMD_OBJECTS) - $(LINK.c) -o $@ $(COMMD_OBJECTS) $(LDLIBS) -lnsl - $(POST_PROCESS) - -$(MDDOORS): ../mddoors.c - $(COMPILE.c) ../mddoors.c - $(LINK.c) -o $@ $(MDDOORS_OBJECTS) $(LDLIBS) - $(POST_PROCESS) - -INSTPROGS = $(ROOTUSRSBIN)/$(COMMD) $(ROOTLIBLVM)/$(MDDOORS) -install: all $(ROOTLIBLVM) $(INSTPROGS) - -cstyle: - $(CSTYLE) $(SRCS) - -lint: - for f in $(LINTSRCS) ; do \ - $(LINT.c) $(LINTFLAGS) $$f ; \ - done - -clean: - $(RM) $(OBJECTS) $(DERIVED_FILES) *.o - -clobber: clean - $(RM) $(PROG) $(CLOBBERFILES) - -$(DERIVED_FILES): $(SRC)/uts/common/sys/lvm/mdmn_commd.x - $(RPCGEN) -c $(SRC)/uts/common/sys/lvm/mdmn_commd.x -o $@ - -$(ROOTLIBLVM)/%: % - $(INS.file) - -$(ROOTLIBLVM): - $(INS.dir) - diff --git a/usr/src/cmd/lvm/rpc.metad/Makefile b/usr/src/cmd/lvm/rpc.metad/Makefile deleted file mode 100644 index d446ec246d9b..000000000000 --- a/usr/src/cmd/lvm/rpc.metad/Makefile +++ /dev/null @@ -1,74 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# -# Makefile for logical volume management -# -# cmd/lvm/rpc.metad/Makefile - -PROG= rpc.metad -MANIFEST= meta.xml - -include ../../Makefile.cmd -include ../Makefile.lvm - -ROOTMANIFESTDIR= $(ROOTSVCNETWORKRPC) - -SUBDIRS= $(MACH) - -SRCS= metad_freeresult.c metad_init.c metad_svc_subr.c - -POFILES= $(SRCS:%.c=%.po) -POFILE= rpc.metadp.po - -all := TARGET = all -install := TARGET = install -clean := TARGET = clean -clobber := TARGET = clobber -lint := TARGET = lint - -.KEEP_STATE: - -all: $(SUBDIRS) - -catalog: $(POFILE) - -$(POFILE): $(POFILES) - $(RM) $@ - cat $(POFILES) > $(POFILE) - -CLOBBERFILES += metad_svc.c - -clean clobber lint: $(SUBDIRS) - -install: $(SUBDIRS) $(ROOTMANIFEST) - -check: $(CHKMANIFEST) - -$(SUBDIRS): FRC - @cd $@; pwd; $(MAKE) $(TARGET) - -FRC: - -include ../../Makefile.targ diff --git a/usr/src/cmd/lvm/rpc.metad/i386/Makefile b/usr/src/cmd/lvm/rpc.metad/i386/Makefile deleted file mode 100644 index 9284ccc58c03..000000000000 --- a/usr/src/cmd/lvm/rpc.metad/i386/Makefile +++ /dev/null @@ -1,95 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2003 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# -# Makefile for logical volume management -# - -PROG= rpc.metad - -DERIVED_FILES = \ - metad_svc.c - -OBJECTS= metad_freeresult.o \ - metad_init.o \ - metad_svc_subr.o - -LINTOBJECTS= metad_freeresult.o \ - metad_init.o \ - metad_svc_subr.o - -OBJECTS += $(DERIVED_FILES:.c=.o) - -LINTSRCS = $(LINTOBJECTS:%.o=../%.c) - -SRCS = $(OBJECTS:%.o=../%.c) - -POFILES= $(OBJS:%.o=%.po) - -include ../../../Makefile.cmd -include ../../Makefile.lvm - -LDLIBS += -lmeta -lsocket -lnsl -ldevid - -CFLAGS += $(DEFINES) -# -# -lint := LINTFLAGS += -m - -%_svc.c := RPCGENFLAGS += -K -1 - - -.KEEP_STATE: - -%.o: ../%.c - $(COMPILE.c) $< - -all: $(PROG) - -$(PROG): $(OBJECTS) - $(LINK.c) -o $@ $(OBJECTS) $(LDLIBS) - $(POST_PROCESS) - -ROOTUSRSBINPROG=$(PROG:%=$(ROOTUSRSBIN)/%) -install: all $(ROOTUSRSBINPROG) - -catalog: - -cstyle: - ${CSTYLE} ${SRCS} - -lint: - ${LINT.c} $(LINTFLAGS) ${LINTSRCS} - -clean: - ${RM} ${OBJS} ${DERIVED_FILES} *.o - -clobber: clean - $(RM) $(PROG) $(CLOBBERFILES) - -metad_svc.c: $(SRC)/head/metad.x - $(CP) $(SRC)/head/metad.x . - $(RPCGEN) $(RPCGENFLAGS_SERVER) -DDEBUG metad.x -o $@ - ${RM} metad.x diff --git a/usr/src/cmd/lvm/rpc.metad/meta.xml b/usr/src/cmd/lvm/rpc.metad/meta.xml deleted file mode 100644 index 83840692a22d..000000000000 --- a/usr/src/cmd/lvm/rpc.metad/meta.xml +++ /dev/null @@ -1,120 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/usr/src/cmd/lvm/rpc.metad/metad_freeresult.c b/usr/src/cmd/lvm/rpc.metad/metad_freeresult.c deleted file mode 100644 index d7a46048959b..000000000000 --- a/usr/src/cmd/lvm/rpc.metad/metad_freeresult.c +++ /dev/null @@ -1,45 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 1993-2002 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include "metad_local.h" - -/*ARGSUSED*/ -bool_t -metad_1_freeresult(SVCXPRT *unused, xdrproc_t xdr_result, caddr_t result) -{ - xdr_free(xdr_result, result); - return (TRUE); -} - -/*ARGSUSED*/ -bool_t -metad_2_freeresult(SVCXPRT *unused, xdrproc_t xdr_result, caddr_t result) -{ - xdr_free(xdr_result, result); - return (TRUE); -} diff --git a/usr/src/cmd/lvm/rpc.metad/metad_init.c b/usr/src/cmd/lvm/rpc.metad/metad_init.c deleted file mode 100644 index 480946e30701..000000000000 --- a/usr/src/cmd/lvm/rpc.metad/metad_init.c +++ /dev/null @@ -1,308 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include "metad_local.h" -#include - -#include -#include -#include -#include -#include -#include - -extern void nc_perror(const char *msg); - -/*ARGSUSED*/ -void -sigalarmhandler(int sig) -{ - md_exit(NULL, 0); -} - - -/* - * check for trusted host and user - */ -static int -check_host( - struct svc_req *rqstp /* RPC stuff */ -) -{ - struct authsys_parms *sys_credp; - SVCXPRT *transp = rqstp->rq_xprt; - struct netconfig *nconfp = NULL; - struct nd_hostservlist *hservlistp = NULL; - int i; - int rval = -1; - char *inplace = NULL; - - /* check for root */ - /*LINTED*/ - sys_credp = (struct authsys_parms *)rqstp->rq_clntcred; - assert(sys_credp != NULL); - if (sys_credp->aup_uid != 0) - goto out; - - /* get hostnames */ - if (transp->xp_netid == NULL) { - md_eprintf("transp->xp_netid == NULL\n"); - goto out; - } - if ((nconfp = getnetconfigent(transp->xp_netid)) == NULL) { -#ifdef DEBUG - nc_perror("getnetconfigent(transp->xp_netid)"); -#endif - goto out; - } - if ((__netdir_getbyaddr_nosrv(nconfp, &hservlistp, &transp->xp_rtaddr) - != 0) || (hservlistp == NULL)) { -#ifdef DEBUG - netdir_perror("netdir_getbyaddr(transp->xp_rtaddr)"); -#endif - goto out; - } - - /* check hostnames */ - for (i = 0; (i < hservlistp->h_cnt); ++i) { - struct nd_hostserv *hservp = &hservlistp->h_hostservs[i]; - char *hostname = hservp->h_host; - - inplace = strdup(hostname); - - /* localhost is OK */ - if (strcmp(hostname, mynode()) == 0) { - rval = 0; - goto out; - } - - /* check for remote root access */ - if (ruserok(hostname, 1, "root", "root") == 0) { - rval = 0; - goto out; - } - - sdssc_cm_nm2nid(inplace); - if (strcmp(inplace, hostname)) { - - /* - * If the names are now different it indicates - * that hostname was converted to a nodeid. This - * will only occur if hostname is part of the same - * cluster that the current node is in. - * If the machine is not running in a cluster than - * sdssc_cm_nm2nid is a noop which leaves inplace - * alone. - */ - rval = 0; - goto out; - } - } - - /* cleanup, return success */ -out: - if (inplace) - free(inplace); - if (hservlistp != NULL) - netdir_free(hservlistp, ND_HOSTSERVLIST); - if (nconfp != NULL) - Free(nconfp); - return (rval); -} - -/* - * check for user in local group 14 - */ -static int -check_gid14( - uid_t uid -) -{ - struct passwd *pwp; - struct group *grp; - char **namep; - - /* get user info, check default GID */ - if ((pwp = getpwuid(uid)) == NULL) - return (-1); - if (pwp->pw_gid == METAD_GID) - return (0); - - /* check in group */ - if ((grp = getgrgid(METAD_GID)) == NULL) - return (-1); - for (namep = grp->gr_mem; ((*namep != NULL) && (**namep != '\0')); - ++namep) { - if (strcmp(*namep, pwp->pw_name) == 0) - return (0); - } - return (-1); -} - -/* - * check AUTH_SYS - */ -static int -check_sys( - struct svc_req *rqstp, /* RPC stuff */ - int amode, /* R_OK | W_OK */ - md_error_t *ep /* returned status */ -) -{ - static mutex_t mx = DEFAULTMUTEX; - struct authsys_parms *sys_credp; - - /* for read, anything is OK */ - if (! (amode & W_OK)) - return (0); - - /* single thread (not really needed if daemon stays single threaded) */ - (void) mutex_lock(&mx); - - /* check for remote root or METAD_GID */ - /*LINTED*/ - sys_credp = (struct authsys_parms *)rqstp->rq_clntcred; - if ((check_gid14(sys_credp->aup_uid) == 0) || - (check_host(rqstp) == 0)) { - (void) mutex_unlock(&mx); - return (0); - } - - /* return failure */ - (void) mutex_unlock(&mx); - return (mdsyserror(ep, EACCES, "rpc.metad")); -} - -/* - * setup RPC service - * - * if can't authenticate return < 0 - * any other error return > 0 - */ -int -svc_init( - struct svc_req *rqstp, /* RPC stuff */ - int amode, /* R_OK | W_OK */ - md_error_t *ep /* returned status */ -) -{ - SVCXPRT *transp; - - if (sdssc_bind_library() == SDSSC_ERROR) { - (void) mdsyserror(ep, EACCES, "can't bind to cluster library"); - return (1); - } - - /* - * if we have no rpc service info, we must have been - * called recursively from within the daemon - */ - if (rqstp == NULL) { - mdclrerror(ep); - return (0); /* OK */ - } - - /* - * initialize - */ - transp = rqstp->rq_xprt; - assert(transp != NULL); - *ep = mdnullerror; - - /* - * check credentials - */ - switch (rqstp->rq_cred.oa_flavor) { - - /* UNIX flavor */ - case AUTH_SYS: - { - if (check_sys(rqstp, amode, ep) != 0) - return (1); /* error */ - break; - } - - /* can't authenticate anything else */ - default: - svcerr_weakauth(transp); - return (-1); /* weak authentication */ - } - - /* - * (re)initialize - */ - if (md_init_daemon("rpc.metad", ep) != 0) - return (1); /* error */ - - if (set_snarf(ep)) - return (1); - - sr_validate(); - - /* success */ - return (0); -} - -/*ARGSUSED*/ -int -svc_fini(md_error_t *ep) -{ - return (0); -} - -int -check_set_lock( - int amode, /* R_OK | W_OK */ - md_setkey_t *cl_sk, /* clients idea of set locked */ - md_error_t *ep /* returned status */ -) -{ - md_setkey_t *svc_sk; - - if (cl_sk == NULL) - return (0); - - svc_sk = svc_get_setkey(cl_sk->sk_setno); - - /* The set is not locked */ - if (svc_sk == NULL) { - if ((amode & W_OK) == W_OK) { - (void) mddserror(ep, MDE_DS_WRITEWITHSULK, - cl_sk->sk_setno, mynode(), NULL, cl_sk->sk_setname); - return (1); - } - return (0); - } - - /* The set is locked, do we have the key? */ - if (cl_sk->sk_key.tv_sec == svc_sk->sk_key.tv_sec && - cl_sk->sk_key.tv_usec == svc_sk->sk_key.tv_usec) - return (0); - - (void) mddserror(ep, MDE_DS_SETLOCKED, MD_SET_BAD, mynode(), - svc_sk->sk_host, svc_sk->sk_setname); - - return (1); -} diff --git a/usr/src/cmd/lvm/rpc.metad/metad_local.h b/usr/src/cmd/lvm/rpc.metad/metad_local.h deleted file mode 100644 index a6883fab8ec3..000000000000 --- a/usr/src/cmd/lvm/rpc.metad/metad_local.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 1993, 2000 by Sun Microsystems, Inc. - * All rights reserved. - */ - -#ifndef _METAD_LOCAL_H -#define _METAD_LOCAL_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/* metad_svc.c */ -extern struct timeval gc; - -/* metad_svc_subr.c */ -extern md_setkey_t *svc_get_setkey(set_t setno); -extern void svc_set_setkey(md_setkey_t *svc_sl); - -/* metad_init.c */ -extern int svc_init(struct svc_req *rqstp, int amode, - md_error_t *ep); -extern void sigalarmhandler(int sig); -extern int svc_fini(md_error_t *ep); -extern int check_set_lock(int amode, md_setkey_t *cl_sk, - md_error_t *ep); - -#ifdef __cplusplus -} -#endif - -#endif /* _METAD_LOCAL_H */ diff --git a/usr/src/cmd/lvm/rpc.metad/metad_svc_subr.c b/usr/src/cmd/lvm/rpc.metad/metad_svc_subr.c deleted file mode 100644 index 71b6904594dd..000000000000 --- a/usr/src/cmd/lvm/rpc.metad/metad_svc_subr.c +++ /dev/null @@ -1,5596 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include "metad_local.h" -#include -#include -#include -#include -#include - -#include -#include -#include - -#define MDDOORS "/usr/lib/lvm/mddoors" - -/* - * rpc.metad daemon - * - * The rpc.metad deamon supports two versions of the svm rpc calls - version 1 - * and version 2. The over-the-wire structures sent as part of these rpc calls - * are also versioned - version 1 and version 2 exist. It must be noted that - * the version 2 structures have sub-versions or revisions as well. The - * revisions in the version 2 structures allow for flexiblility in changing - * over the wire structures without creating a new version of the svm rpc - * calls. No changes may be made to the version 1 routines or structures used - * by these routines. - * - * If, for example, the version 2 mdrpc_devinfo_args over the wire structure - * (mdrpc_devinfo_2_args*) is changed then the structure change must be - * accompanied by the following: - * - * Header file changes: - * . May need to introduce a new structure revision MD_METAD_ARGS_REV_X, where - * X is the revision number. - * . Create mdrpc_devinfo_2_args_rX, where X is the new revision of the - * structure. - * . Add a switch statement in mdrpc_devinfo_2_args. - * - * rpc.metad changes: - * . Check for the structure revision in the appropriate mdrpc_devinfo_svc - * routine (mdrpc_devinfo_2_svc). - * - * libmeta changes: - * . In the libmeta code that makes the mdrpc_devinfo rpc call, the arguments - * being passed as part of this call (namely mdrpc_devinfo_Y_args) must have - * the revision field and associated structure populated correctly. - */ - -static md_setkey_t *my_svc_sk = NULL; - -/* - * Add namespace entry to local mddb for using given sideno, key - * and names. - */ -static int -add_sideno_sidenm( - mdsidenames_t *sidenms, - mdkey_t local_key, - side_t sideno, - md_set_desc *sd, /* Only used with Version 2 */ - md_error_t *ep -) -{ - mdsidenames_t *sn; - mdsetname_t *local_sp; - char *nm; - - if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) - return (-1); - - for (sn = sidenms; sn != NULL; sn = sn->next) - if (sn->sideno == sideno) - break; - - assert(sn != NULL); - - - /* - * SKEW will be used on the traditional diskset despite of the - * rpc version. SKEW is not used on the multinode diskset - */ - if (MD_MNSET_DESC(sd)) { - nm = meta_getnmbykey(MD_LOCAL_SET, sideno, local_key, ep); - } else { - nm = meta_getnmbykey(MD_LOCAL_SET, sideno+SKEW, local_key, ep); - } - - if (nm == NULL) { - if (! mdisok(ep)) { - if (! mdissyserror(ep, ENOENT)) - return (-1); - mdclrerror(ep); - } - - /* - * Ignore returned key from add_name, only care about errs - * - * SKEW is used for a regular diskset since sideno could - * have a value of 0 in that diskset type. add_name is - * writing to the local mddb and a sideno of 0 in the - * local mddb is reserved for non-diskset names. - * SKEW is added to the sideno in the local mddb so that - * the sideno for the diskset will never be 0. - * - * In a MNdiskset, the sideno will never be 0 (by design). - * So, no SKEW is needed when writing to the local mddb. - */ - if (MD_MNSET_DESC(sd)) { - if (add_name(local_sp, sideno, local_key, - sn->dname, sn->mnum, sn->cname, NULL, NULL, - ep) == -1) - return (-1); - } else { - if (add_name(local_sp, sideno+SKEW, local_key, - sn->dname, sn->mnum, sn->cname, NULL, NULL, - ep) == -1) - return (-1); - } - } else - Free(nm); - - return (0); -} - -/* - * Delete sidename entry from local set using key and sideno. - */ -static int -del_sideno_sidenm( - mdkey_t sidekey, - side_t sideno, - md_error_t *ep -) -{ - mdsetname_t *local_sp; - - if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) - return (-1); - - if (del_name(local_sp, sideno, sidekey, ep) == -1) - mdclrerror(ep); /* ignore errs */ - - return (0); -} - - -/* - * Add namespace entries to local mddb for drives in drive list in - * set descriptor. - * - * If a MNset and if this host is being added to the set (this host - * is in the node_v list), add a namespace entry for the name of - * each drive using this host's sideno. - * - * If not a MNset, add namespace entries for all the new hosts being - * added to this set (list in node_v). - */ -static void -add_drv_sidenms( - char *hostname, - mdsetname_t *sp, - md_set_desc *sd, - int node_c, - char **node_v, - md_error_t *ep -) -{ - mdsetname_t *my_sp; - md_drive_desc *dd, *my_dd, *p, *q; - mddrivename_t *dn, *my_dn; - int i; - side_t sideno = 0, mysideno = 0; - ddi_devid_t devid_remote = NULL; - ddi_devid_t devid_local = NULL; - int devid_same = -1; - int using_devid = 0; - md_mnnode_desc *nd; - - assert(sd->sd_drvs != NULL); - dd = sd->sd_drvs; - - if (dd->dd_dnp == NULL) - return; - - if ((my_sp = metasetname(sp->setname, ep)) == NULL) - return; - metaflushsetname(my_sp); - - /* If a MN diskset */ - if (MD_MNSET_DESC(sd)) { - /* Find sideno associated with RPC client. */ - nd = sd->sd_nodelist; - while (nd) { - - if (strcmp(nd->nd_nodename, hostname) == 0) { - sideno = nd->nd_nodeid; - } - - /* While looping, find my side num as well */ - if (strcmp(nd->nd_nodename, mynode()) == 0) { - mysideno = nd->nd_nodeid; - } - - if ((sideno) && (mysideno)) { - break; - } - nd = nd->nd_next; - } - - if (!sideno) { - (void) mddserror(ep, MDE_DS_HOSTNOSIDE, - sp->setno, hostname, NULL, sp->setname); - return; - } - } else { - /* - * if not a MN diskset - * do action for traditional diskset. - * despite of the rpc version - */ - for (sideno = 0; sideno < MD_MAXSIDES; sideno++) { - /* Skip empty slots */ - if (sd->sd_nodes[sideno][0] == '\0') - continue; - - if (strcmp(hostname, sd->sd_nodes[sideno]) == 0) - break; - } - - if (sideno == MD_MAXSIDES) { - (void) mddserror(ep, MDE_DS_HOSTNOSIDE, sp->setno, - hostname, NULL, sp->setname); - return; - } - } - if ((my_dd = metaget_drivedesc_sideno(my_sp, sideno, MD_BASICNAME_OK, - ep)) == NULL) { - if (! mdisok(ep)) - return; - /* we are supposed to have drives!!!! */ - assert(0); - } - - /* - * The system is either all devid or all - * non-devid so we look at the first item - * in the list to determine if we're using devids or not. - * We also check to make sure it's not a multi-node diskset. - * If it is, we don't use devid's. - * - * For did disks, the dd_dnp->devid is a valid pointer which - * points to a '' string of devid. We need to check this - * before set the using_devid. - */ - if ((dd->dd_dnp->devid != NULL) && (dd->dd_dnp->devid[0] != '\0') && - (!(MD_MNSET_DESC(sd)))) - using_devid = 1; - - /* - * We have to match-up the dd that were passed - * across the wire to the dd we have in this daemon. - * That way we can pick up the new sidenames that were - * passed to us and match them up with the local namespace key. - * Only we have the key, this cannot be passed in. - */ - for (p = dd; p != NULL; p = p->dd_next) { - dn = p->dd_dnp; - devid_remote = NULL; - - if (dn->devid != NULL && (strlen(dn->devid) != 0) && - using_devid) { - /* - * We have a devid so use it - */ - (void) devid_str_decode(dn->devid, &devid_remote, NULL); - } - - /* check to make sure using_devid agrees with reality... */ - if ((using_devid == 1) && (devid_remote == NULL)) { - /* something went really wrong. Can't process */ - (void) mddserror(ep, MDE_DS_INVALIDDEVID, sp->setno, - hostname, dn->cname, sp->setname); - return; - } - - for (q = my_dd; q != NULL; q = q->dd_next) { - my_dn = q->dd_dnp; - devid_same = -1; - - if (my_dn->devid != NULL && using_devid) { - if (devid_str_decode(my_dn->devid, - &devid_local, NULL) == 0) { - devid_same = devid_compare(devid_remote, - devid_local); - devid_free(devid_local); - } - } - - if (using_devid && devid_same == 0) { - break; - } - - if (!using_devid && - strcmp(my_dn->cname, dn->cname) == 0) - break; - } - - if (devid_remote) { - devid_free(devid_remote); - } - assert(q != NULL); - assert(my_dn->side_names_key != MD_KEYWILD); - - if (MD_MNSET_DESC(sd)) { - /* - * Add the side names to the local db - * for this node only. - */ - if (add_sideno_sidenm(dn->side_names, - my_dn->side_names_key, mysideno, sd, ep)) - return; - /* - * Sidenames for this drive were added - * to this host during the routine adddrvs. - * The sidenames that were added are the - * names associated with this drive on - * each of the hosts that were previously - * in the set. - * When the sidename for this drive on - * this host is added, the sidename - * from the host executing the command - * (not this host) is sent to this host. - * This host finds the originating host's - * sidename and can then determine this - * host's sidename. - * The sidenames from the other hosts serve - * only as temporary sidenames until this - * host's sidename can be added. - * In order to conserve space in the - * local mddb, the code now deletes the - * temporary sidenames added during adddrvs. - * When finished, only the sidename for this - * node should be left. - * Ignore any errors during this process since - * a failure to delete the extraneous - * sidenames shouldn't cause this routine - * to fail (in case that sidename didn't exist). - */ - nd = sd->sd_nodelist; - while (nd) { - if (nd->nd_nodeid != mysideno) { - if (del_sideno_sidenm( - dn->side_names_key, - nd->nd_nodeid, ep) == -1) - mdclrerror(ep); - } - nd = nd->nd_next; - } - } else { - for (i = 0; i < MD_MAXSIDES; i++) { - /* Skip empty slots */ - if (sd->sd_nodes[i][0] == '\0') - continue; - - /* Skip nodes not being added */ - if (! strinlst(sd->sd_nodes[i], - node_c, node_v)) - continue; - - /* Add the per side names to local db */ - if (add_sideno_sidenm(dn->side_names, - my_dn->side_names_key, i, sd, ep)) - return; - } - } - } -} - -/* ARGSUSED */ -bool_t -mdrpc_flush_internal_common(mdrpc_null_args *args, mdrpc_generic_res *res, - struct svc_req *rqstp) -{ - md_error_t *ep = &res->status; - int err, op_mode = W_OK; - - (void) memset(res, 0, sizeof (*res)); - if ((err = svc_init(rqstp, op_mode, ep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - metaflushnames(1); - - err = svc_fini(ep); - - return (TRUE); -} - -bool_t -mdrpc_flush_internal_1_svc(mdrpc_null_args *args, mdrpc_generic_res *res, - struct svc_req *rqstp) -{ - return (mdrpc_flush_internal_common(args, res, rqstp)); -} - -bool_t -mdrpc_flush_internal_2_svc(mdrpc_null_args *args, mdrpc_generic_res *res, - struct svc_req *rqstp) -{ - return (mdrpc_flush_internal_common(args, res, rqstp)); -} - -/* - * add 1 or more namespace entries per drive record. - * (into the local namespace) - */ -bool_t -mdrpc_add_drv_sidenms_common( - mdrpc_drv_sidenm_2_args_r1 *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - md_error_t *ep = &res->status; - int err; - int op_mode = W_OK; - - /* setup, check permissions */ - if ((err = svc_init(rqstp, op_mode, ep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - if (check_set_lock(op_mode, args->cl_sk, ep)) - return (TRUE); - - /* doit */ - add_drv_sidenms(args->hostname, args->sp, args->sd, - args->node_v.node_v_len, args->node_v.node_v_val, ep); - - err = svc_fini(ep); - - return (TRUE); -} - -/* - * version 1 of the remote procedure. This procedure is called if the - * client is running in version 1. We first convert version 1 arguments - * into version 2 arguments and then call the common remote procedure. - */ -bool_t -mdrpc_add_drv_sidenms_1_svc( - mdrpc_drv_sidenm_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - bool_t retval; - mdrpc_drv_sidenm_2_args_r1 v2_args; - int i, j; - - /* allocate memory */ - v2_args.sd = Zalloc(sizeof (md_set_desc)); - alloc_newdrvdesc(args->sd->sd_drvs, &v2_args.sd->sd_drvs); - (void) memset(res, 0, sizeof (*res)); - - /* build args */ - v2_args.hostname = args->hostname; - v2_args.cl_sk = args->cl_sk; - v2_args.sp = args->sp; - /* set descriptor */ - v2_args.sd->sd_ctime = args->sd->sd_ctime; - v2_args.sd->sd_genid = args->sd->sd_genid; - v2_args.sd->sd_setno = args->sd->sd_setno; - v2_args.sd->sd_flags = args->sd->sd_flags; - for (i = 0; i < MD_MAXSIDES; i++) { - v2_args.sd->sd_isown[i] = args->sd->sd_isown[i]; - - for (j = 0; j < MD_MAX_NODENAME_PLUS_1; j++) - v2_args.sd->sd_nodes[i][j] = - args->sd->sd_nodes[i][j]; - } - v2_args.sd->sd_med = args->sd->sd_med; - /* convert v1 args to v2 (revision 1) args */ - meta_conv_drvdesc_old2new(args->sd->sd_drvs, v2_args.sd->sd_drvs); - v2_args.node_v.node_v_len = args->node_v.node_v_len; - v2_args.node_v.node_v_val = args->node_v.node_v_val; - - retval = mdrpc_add_drv_sidenms_common(&v2_args, res, rqstp); - - free(v2_args.sd); - free_newdrvdesc(v2_args.sd->sd_drvs); - - return (retval); -} - -bool_t -mdrpc_add_drv_sidenms_2_svc( - mdrpc_drv_sidenm_2_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - (void) memset(res, 0, sizeof (*res)); - switch (args->rev) { - case MD_METAD_ARGS_REV_1: - return (mdrpc_add_drv_sidenms_common( - &args->mdrpc_drv_sidenm_2_args_u.rev1, res, rqstp)); - default: - return (FALSE); - } -} - -static int -add_sidenamelist( - mddrivename_t *dn, - side_t thisside, - md_set_record *sr, /* used by RPC version 2 */ - md_error_t *ep -) -{ - mdsidenames_t *sn; - mdkey_t key; - int err; - mdsetname_t *local_sp; - md_mnset_record *mnsr; - md_mnnode_record *nr; - uint_t nodeid = 0; - - if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) - return (-1); - - key = MD_KEYWILD; - - /* - * If a multi-node diskset, find nodeid associated with this node. - */ - if (MD_MNSET_REC(sr)) { - mnsr = (struct md_mnset_record *)sr; - nr = mnsr->sr_nodechain; - while (nr) { - if (strcmp(nr->nr_nodename, mynode()) == 0) { - break; - } - nr = nr->nr_next; - } - /* - * If node is found, then a new drive is being added to - * a MN set of which this node is a member. - * If node is not found, then this host is being added to - * a MN set that has drives associated with it. - */ - if (nr) - nodeid = nr->nr_nodeid; - } - for (sn = dn->side_names; sn != NULL; sn = sn->next) { - if (MD_MNSET_REC(sr)) { - /* - * In multi-node disksets, only add side information - * to the local mddb about this node. - * If the sideno for this node is found, then - * a new drive is being added to a MN set of - * which this node is a member. - * If the sideno for this node is not found, then - * this host is being added to a MNset that - * has drives associated with it. In this case, - * need to add the sidename associated with the - * rpc client, but since we don't know which node - * is the client, then add temp entries for all sides. - * Later, the sidename for this node will be set - * via add_drv_sidenms and then the temp - * sidenames can be removed. - */ - if (nodeid == sn->sideno) { - if ((err = add_name(local_sp, sn->sideno, key, - sn->dname, sn->mnum, sn->cname, - NULL, NULL, ep)) == -1) - return (-1); - key = (mdkey_t)err; - break; - } - } else { - /* - * When a sidename is added into the namespace the local - * side information for the name is added first of all. - * When the first sidename is created this causes the - * devid of the disk to be recorded in the namespace, if - * the non-local side information is added first then - * there is the possibility of getting the wrong devid - * because there is no guarantee that the dev_t (mnum in - * this instance) is the same across all the nodes in - * the set. So the only way to make sure that the - * correct dev_t is used is to force the adding in of - * the local sidename record first of all. This same - * issue affects add_key_name(). - */ - if (sn->sideno != thisside) - continue; - if ((err = add_name(local_sp, sn->sideno+SKEW, key, - sn->dname, sn->mnum, sn->cname, NULL, - NULL, ep)) == -1) - return (-1); - key = (mdkey_t)err; - break; - } - } - - /* - * Now the other sides for non-MN set - */ - if (!MD_MNSET_REC(sr)) { - for (sn = dn->side_names; sn != NULL; sn = sn->next) { - if (sn->sideno == thisside) - continue; - if ((err = add_name(local_sp, sn->sideno+SKEW, key, - sn->dname, sn->mnum, sn->cname, NULL, NULL, - ep)) == -1) - return (-1); - key = (mdkey_t)err; - } - } - - /* Temporarily add all sides. */ - if ((key == MD_KEYWILD) && (MD_MNSET_REC(sr))) { - for (sn = dn->side_names; sn != NULL; sn = sn->next) { - sn = dn->side_names; - if (sn) { - if ((err = add_name(local_sp, sn->sideno, key, - sn->dname, sn->mnum, sn->cname, - NULL, NULL, ep)) == -1) - return (-1); - key = (mdkey_t)err; - } - } - } - - dn->side_names_key = key; - return (0); -} - -/* - * imp_adddrvs - * This is a version of adddrvs that is specific to the - * metaimport command. Due to the unavailability of some disks, - * information needs to be obtained about the disk from the devid so - * it can eventually be passed down to add_sidenamelist. - * Go ahead and set drive state to MD_DR_OK here so that no - * later RPC is needed to set OK where UNRLSV_REPLICATED could - * be cleared. Set record is still set to MD_SR_ADD which will force - * a cleanup of the set in case of panic. - */ -void -imp_adddrvs( - char *setname, - md_drive_desc *dd, - md_timeval32_t timestamp, - ulong_t genid, - md_error_t *ep -) -{ - mddb_userreq_t req; - md_drive_record *dr, *tdr; - md_set_record *sr; - md_drive_desc *p; - mddrivename_t *dn; - mdname_t *np; - md_dev64_t dev; - md_error_t xep = mdnullerror; - char *minorname = NULL; - ddi_devid_t devidp = NULL; - mdsidenames_t *sn; - mdsetname_t *local_sp; - - - if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) { - return; - } - - if ((sr = getsetbyname(setname, ep)) == NULL) - return; - - for (p = dd; p != NULL; p = p->dd_next) { - uint_t rep_slice; - int ret = 0; - - dn = p->dd_dnp; - - /* - * We need the minorname and devid string decoded from the - * devid to add the sidename for this drive to the - * local set. - */ - ret = devid_str_decode(dn->devid, &devidp, &minorname); - if (ret != 0) { - /* failed to decode the devid */ - goto out; - } - - sn = dn->side_names; - if (sn == NULL) { - dn->side_names_key = MD_KEYWILD; - continue; - } - - if ((dn->side_names_key = add_name(local_sp, SKEW, MD_KEYWILD, - sn->dname, sn->mnum, sn->cname, minorname, devidp, - ep)) == -1) { - devid_free(devidp); - devid_str_free(minorname); - goto out; - } - - devid_free(devidp); - devid_str_free(minorname); - - /* Create the drive record */ - (void) memset(&req, 0, sizeof (req)); - METAD_SETUP_DR(MD_DB_CREATE, 0); - req.ur_size = sizeof (*dr); - if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) { - (void) mdstealerror(ep, &req.ur_mde); - goto out; - } - - /* Fill in the drive record values */ - dr = Zalloc(sizeof (*dr)); - dr->dr_selfid = req.ur_recid; - dr->dr_dbcnt = p->dd_dbcnt; - dr->dr_dbsize = p->dd_dbsize; - dr->dr_key = dn->side_names_key; - - dr->dr_ctime = timestamp; - dr->dr_genid = genid; - dr->dr_revision = MD_DRIVE_RECORD_REVISION; - dr->dr_flags = MD_DR_OK; - if (p->dd_flags & MD_DR_UNRSLV_REPLICATED) { - dr->dr_flags |= MD_DR_UNRSLV_REPLICATED; - sr->sr_flags |= MD_SR_UNRSLV_REPLICATED; - } - - /* Link the drive records and fill in in-core data */ - dr_cache_add(sr, dr); - - dev = NODEV64; - if ((meta_replicaslice(dn, &rep_slice, &xep) == 0) && - ((np = metaslicename(dn, rep_slice, &xep)) != NULL)) - dev = np->dev; - else - mdclrerror(&xep); - - SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REMOVE, SVM_TAG_DRIVE, - MD_LOCAL_SET, dev); - SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_ADD, SVM_TAG_DRIVE, - sr->sr_setno, dev); - } - - /* Commit all the records atomically */ - commitset(sr, TRUE, ep); - free_sr(sr); - return; - -out: - /* If failures, remove drive records. */ - dr = tdr = sr->sr_drivechain; - while (dr != NULL) { - tdr = dr->dr_next; - if (del_name(local_sp, 0, dr->dr_key, &xep)) - mdclrerror(&xep); - sr_del_drv(sr, dr->dr_selfid); - dr = tdr; - } -} - -static void -adddrvs( - char *setname, - md_drive_desc *dd, - md_timeval32_t timestamp, - ulong_t genid, - md_error_t *ep -) -{ - mddb_userreq_t req; - md_drive_record *dr; - md_set_record *sr; - md_drive_desc *p; - mddrivename_t *dn; - mdname_t *np; - md_dev64_t dev; - md_error_t xep = mdnullerror; - int i; - - if ((sr = getsetbyname(setname, ep)) == NULL) - return; - - if (MD_MNSET_REC(sr)) - i = 0; - else { - /* get thisside */ - for (i = 0; i < MD_MAXSIDES; i++) { - if (sr->sr_nodes[i][0] == '\0') - continue; - if (strcmp(mynode(), sr->sr_nodes[i]) == 0) - break; - } - - if (i == MD_MAXSIDES) { - /* so find the first free slot! */ - for (i = 0; i < MD_MAXSIDES; i++) { - if (sr->sr_nodes[i][0] == '\0') - break; - } - } - } - - for (p = dd; p != NULL; p = p->dd_next) { - uint_t rep_slice; - - dn = p->dd_dnp; - - /* Add the per side names to the local db */ - if (add_sidenamelist(dn, (side_t)i, sr, ep)) { - free_sr(sr); - return; - } - - /* Create the drive record */ - (void) memset(&req, 0, sizeof (req)); - METAD_SETUP_DR(MD_DB_CREATE, 0); - req.ur_size = sizeof (*dr); - if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) { - (void) mdstealerror(ep, &req.ur_mde); - free_sr(sr); - return; - } - - /* Fill in the drive record values */ - dr = Zalloc(sizeof (*dr)); - dr->dr_selfid = req.ur_recid; - dr->dr_dbcnt = p->dd_dbcnt; - dr->dr_dbsize = p->dd_dbsize; - dr->dr_key = dn->side_names_key; - - dr->dr_ctime = timestamp; - dr->dr_genid = genid; - dr->dr_revision = MD_DRIVE_RECORD_REVISION; - dr->dr_flags = MD_DR_ADD; - - /* Link the drive records and fill in in-core data */ - dr_cache_add(sr, dr); - - dev = NODEV64; - if ((meta_replicaslice(dn, &rep_slice, &xep) == 0) && - ((np = metaslicename(dn, rep_slice, &xep)) != NULL)) - dev = np->dev; - else - mdclrerror(&xep); - - SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REMOVE, SVM_TAG_DRIVE, - MD_LOCAL_SET, dev); - SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_ADD, SVM_TAG_DRIVE, - sr->sr_setno, dev); - } - - /* Commit all the records atomically */ - commitset(sr, TRUE, ep); - free_sr(sr); -} - -/* - * add 1 or more drive records to a set. - */ -bool_t -mdrpc_adddrvs_common( - mdrpc_drives_2_args_r1 *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - md_error_t *ep = &res->status; - int err; - int op_mode = W_OK; - - /* setup, check permissions */ - if ((err = svc_init(rqstp, op_mode, ep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - if (check_set_lock(op_mode, args->cl_sk, ep)) - return (TRUE); - - /* doit */ - adddrvs(args->sp->setname, args->drivedescs, args->timestamp, - args->genid, ep); - - err = svc_fini(ep); - - return (TRUE); -} - -/* - * version 1 of the remote procedure. This procedure is called if the - * client is running in version 1. We first convert version 1 arguments - * into version 2 arguments and then call the common remote procedure. - */ -bool_t -mdrpc_adddrvs_1_svc( - mdrpc_drives_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - bool_t retval; - mdrpc_drives_2_args_r1 v2_args; - - /* allocate memory */ - alloc_newdrvdesc(args->drivedescs, &v2_args.drivedescs); - (void) memset(res, 0, sizeof (*res)); - - /* build args */ - v2_args.cl_sk = args->cl_sk; - v2_args.sp = args->sp; - /* convert v1 args to v2 (revision 1) args */ - meta_conv_drvdesc_old2new(args->drivedescs, v2_args.drivedescs); - v2_args.timestamp = args->timestamp; - v2_args.genid = args->genid; - - retval = mdrpc_adddrvs_common(&v2_args, res, rqstp); - - free_newdrvdesc(v2_args.drivedescs); - - return (retval); -} - -bool_t -mdrpc_adddrvs_2_svc( - mdrpc_drives_2_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - (void) memset(res, 0, sizeof (*res)); - switch (args->rev) { - case MD_METAD_ARGS_REV_1: - return (mdrpc_adddrvs_common( - &args->mdrpc_drives_2_args_u.rev1, res, rqstp)); - default: - return (FALSE); - } -} - -/* - * add 1 or more drive records to a set when importing. - */ -bool_t -mdrpc_imp_adddrvs_2_svc( - mdrpc_drives_2_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - mdrpc_drives_2_args_r1 *v2_args; - md_error_t *ep = &res->status; - int err; - int op_mode = W_OK; - - (void) memset(res, 0, sizeof (*res)); - switch (args->rev) { - case MD_METAD_ARGS_REV_1: - v2_args = &args->mdrpc_drives_2_args_u.rev1; - if (v2_args == NULL) { - return (FALSE); - } - break; - default: - return (FALSE); - } - - /* setup, check permissions */ - if ((err = svc_init(rqstp, op_mode, ep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - if (check_set_lock(op_mode, v2_args->cl_sk, ep)) - return (TRUE); - - /* doit */ - imp_adddrvs(v2_args->sp->setname, v2_args->drivedescs, - v2_args->timestamp, v2_args->genid, ep); - - err = svc_fini(ep); - - return (TRUE); -} - -static void -addhosts( - char *setname, - int node_c, - char **node_v, - int version, /* RPC version of calling routine */ - md_error_t *ep -) -{ - mddb_userreq_t req; - md_set_record *sr; - int i, j; - md_mnset_record *mnsr; - md_mnnode_record *nr; - mddb_set_node_params_t snp; - int nodecnt; - mndiskset_membershiplist_t *nl, *nl2; - - if ((sr = getsetbyname(setname, ep)) == NULL) - return; - - /* Do MN operation if rpc version supports it and if a MN set */ - if ((version != METAD_VERSION) && (MD_MNSET_REC(sr))) { - mnsr = (md_mnset_record *)sr; - /* - * Verify nodes are in membership list on THIS node. - * Initiating node has verified that nodes are in membership - * list on the initiating node. - * Get membershiplist from API routine. If there's - * an error, fail to add hosts and pass back error. - */ - if (meta_read_nodelist(&nodecnt, &nl, ep) == -1) { - free_sr(sr); - return; - } - /* Verify that all nodes are in member list */ - for (i = 0; i < node_c; i++) { - /* - * If node in list isn't a member of the membership, - * just return error. - */ - if (meta_is_member(node_v[i], NULL, nl) == 0) { - meta_free_nodelist(nl); - (void) mddserror(ep, MDE_DS_NOTINMEMBERLIST, - sr->sr_setno, node_v[i], NULL, setname); - free_sr(sr); - return; - } - } - } - - for (i = 0; i < node_c; i++) { - /* Do MN operation if rpc version supports it and if a MN set */ - if ((version != METAD_VERSION) && (MD_MNSET_REC(sr))) { - mnsr = (md_mnset_record *)sr; - /* Create the node record */ - (void) memset(&req, 0, sizeof (req)); - METAD_SETUP_NR(MD_DB_CREATE, 0); - req.ur_size = sizeof (*nr); - if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) - != 0) { - (void) mdstealerror(ep, &req.ur_mde); - meta_free_nodelist(nl); - free_sr(sr); - return; - } - - nr = Zalloc(sizeof (*nr)); - nr->nr_revision = MD_MNNODE_RECORD_REVISION; - nr->nr_selfid = req.ur_recid; - nr->nr_ctime = sr->sr_ctime; - nr->nr_genid = sr->sr_genid; - nr->nr_flags = MD_MN_NODE_ADD; - nl2 = nl; - while (nl2) { - if (strcmp(nl2->msl_node_name, node_v[i]) - == 0) { - nr->nr_nodeid = nl2->msl_node_id; - break; - } - nl2 = nl2->next; - } - - (void) strcpy(nr->nr_nodename, node_v[i]); - - /* - * When a node is added to a MN diskset, set the - * nodeid of this node in the md_set structure - * in the kernel. - */ - if (strcmp(nr->nr_nodename, mynode()) == 0) { - (void) memset(&snp, 0, sizeof (snp)); - snp.sn_nodeid = nr->nr_nodeid; - snp.sn_setno = mnsr->sr_setno; - if (metaioctl(MD_MN_SET_NODEID, &snp, - &snp.sn_mde, NULL) != 0) { - (void) mdstealerror(ep, &snp.sn_mde); - meta_free_nodelist(nl); - free_sr(sr); - return; - } - } - - /* Link the node records and fill in in-core data */ - mnnr_cache_add(mnsr, nr); - - SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_ADD, SVM_TAG_HOST, - mnsr->sr_setno, nr->nr_nodeid); - } else { - for (j = 0; j < MD_MAXSIDES; j++) { - if (sr->sr_nodes[j][0] != '\0') - continue; - (void) strcpy(sr->sr_nodes[j], node_v[i]); - SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_ADD, - SVM_TAG_HOST, sr->sr_setno, j); - break; - } - } - } - /* Do MN operation if rpc version supports it and if a MN set */ - if ((version != METAD_VERSION) && (MD_MNSET_REC(sr))) { - meta_free_nodelist(nl); - } - - (void) memset(&req, '\0', sizeof (req)); - - METAD_SETUP_SR(MD_DB_SETDATA, sr->sr_selfid) - - /* Do MN operation if rpc version supports it and if a MN set */ - if ((version != METAD_VERSION) && (MD_MNSET_REC(sr))) { - req.ur_size = sizeof (*mnsr); - } else { - req.ur_size = sizeof (*sr); - } - req.ur_data = (uintptr_t)sr; - if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) { - (void) mdstealerror(ep, &req.ur_mde); - free_sr(sr); - return; - } - - commitset(sr, TRUE, ep); - - free_sr(sr); -} - -/* - * add 1 or more hosts to a set. - */ -bool_t -mdrpc_addhosts_common( - mdrpc_host_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp, /* RPC stuff */ - int version /* RPC version */ -) -{ - md_error_t *ep = &res->status; - int err; - int op_mode = W_OK; - - /* setup, check permissions */ - if ((err = svc_init(rqstp, op_mode, ep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - if (check_set_lock(op_mode, args->cl_sk, ep)) - return (TRUE); - - /* doit */ - addhosts(args->sp->setname, args->hosts.hosts_len, - args->hosts.hosts_val, version, ep); - - err = svc_fini(ep); - - return (TRUE); -} - -bool_t -mdrpc_addhosts_1_svc( - mdrpc_host_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - /* Pass RPC version (METAD_VERSION) to common routine */ - (void) memset(res, 0, sizeof (*res)); - return (mdrpc_addhosts_common(args, res, rqstp, METAD_VERSION)); -} - -bool_t -mdrpc_addhosts_2_svc( - mdrpc_host_2_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - (void) memset(res, 0, sizeof (*res)); - switch (args->rev) { - case MD_METAD_ARGS_REV_1: - /* Pass RPC version (METAD_VERSION_DEVID) to common routine */ - return (mdrpc_addhosts_common( - &args->mdrpc_host_2_args_u.rev1, res, - rqstp, METAD_VERSION_DEVID)); - default: - return (FALSE); - } -} - -static void -createset( - mdsetname_t *sp, - md_node_nm_arr_t nodes, - md_timeval32_t timestamp, - ulong_t genid, - md_error_t *ep -) -{ - mddb_userreq_t req; - md_set_record *sr; - int i; - - (void) memset(&req, 0, sizeof (req)); - METAD_SETUP_SR(MD_DB_CREATE, 0); - req.ur_size = sizeof (*sr); - if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) { - (void) mdstealerror(ep, &req.ur_mde); - return; - } - - sr = Zalloc(sizeof (*sr)); - - sr->sr_selfid = req.ur_recid; - sr->sr_setno = sp->setno; - (void) strcpy(sr->sr_setname, sp->setname); - - SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_CREATE, SVM_TAG_SET, sp->setno, - NODEV64); - - (void) meta_smf_enable(META_SMF_DISKSET, NULL); - - for (i = 0; i < MD_MAXSIDES; i++) { - (void) strcpy(sr->sr_nodes[i], nodes[i]); - /* Skip empty slots */ - if (sr->sr_nodes[i][0] == '\0') - continue; - SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_ADD, SVM_TAG_HOST, sp->setno, - i); - } - - sr->sr_ctime = timestamp; - sr->sr_genid = genid; - sr->sr_revision = MD_SET_RECORD_REVISION; - sr->sr_flags |= MD_SR_ADD; - - sr->sr_mhiargs = defmhiargs; - - sr_cache_add(sr); - - commitset(sr, TRUE, ep); -} - -static void -mncreateset( - mdsetname_t *sp, - md_mnnode_desc *nodelist, - md_timeval32_t timestamp, - ulong_t genid, - md_node_nm_t master_nodenm, - int master_nodeid, - md_error_t *ep -) -{ - mddb_userreq_t req; - md_mnset_record *mnsr; - md_mnnode_record *nr; - md_mnnode_desc *nd; - mddb_set_node_params_t snp; - int nodecnt; - mndiskset_membershiplist_t *nl; - - /* - * Validate that nodes in set being created are in the - * membership list on THIS node. - * Initiating node has verified that nodes are in membership - * list on the initiating node. - * Get membershiplist from API routine. If there's - * an error, fail to add set and pass back error. - */ - if (meta_read_nodelist(&nodecnt, &nl, ep) == -1) { - return; - } - /* Verify that all nodes are in member list */ - nd = nodelist; - while (nd) { - /* - * If node in list isn't a member of the membership, - * just return error. - */ - if (meta_is_member(nd->nd_nodename, 0, nl) == 0) { - meta_free_nodelist(nl); - (void) mddserror(ep, MDE_DS_NOTINMEMBERLIST, - sp->setno, nd->nd_nodename, NULL, sp->setname); - return; - } - nd = nd->nd_next; - } - meta_free_nodelist(nl); - - (void) memset(&req, 0, sizeof (req)); - METAD_SETUP_SR(MD_DB_CREATE, 0); - req.ur_size = sizeof (*mnsr); - if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) { - (void) mdstealerror(ep, &req.ur_mde); - return; - } - - mnsr = Zalloc(sizeof (*mnsr)); - mnsr->sr_selfid = req.ur_recid; - mnsr->sr_setno = sp->setno; - (void) strlcpy(mnsr->sr_setname, sp->setname, MD_MAX_SETNAME); - - SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_CREATE, SVM_TAG_SET, sp->setno, - NODEV64); - - (void) meta_smf_enable(META_SMF_DISKSET | META_SMF_MN_DISKSET, NULL); - - nd = nodelist; - while (nd) { - /* Create the node record */ - (void) memset(&req, 0, sizeof (req)); - METAD_SETUP_NR(MD_DB_CREATE, 0); - req.ur_size = sizeof (*nr); - if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) { - /* Frees mnsr and any alloc'd node records */ - free_sr((struct md_set_record *)mnsr); - (void) mdstealerror(ep, &req.ur_mde); - return; - } - - nr = Zalloc(sizeof (*nr)); - nr->nr_revision = MD_MNNODE_RECORD_REVISION; - nr->nr_selfid = req.ur_recid; - nr->nr_ctime = timestamp; - nr->nr_genid = genid; - nr->nr_nodeid = nd->nd_nodeid; - nr->nr_flags = nd->nd_flags; - (void) strlcpy(nr->nr_nodename, nd->nd_nodename, - MD_MAX_NODENAME); - - /* Link the node records and fill in in-core data */ - mnnr_cache_add(mnsr, nr); - - SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_ADD, SVM_TAG_HOST, sp->setno, - nr->nr_nodeid); - - nd = nd->nd_next; - } - - /* - * For backward compatibility, fill in mynode name - * as the only name in the sr_nodes array. This - * allows the pre-MNdiskset code to see that there - * is a node in this diskset. This will keep the - * pre-MNdiskset code from removing this set. - */ - (void) strlcpy(mnsr->sr_nodes_bw_compat[0], mynode(), MD_MAX_NODENAME); - - mnsr->sr_ctime = timestamp; - mnsr->sr_genid = genid; - mnsr->sr_revision = MD_SET_RECORD_REVISION; - mnsr->sr_flags |= MD_SR_ADD; - - mnsr->sr_flags |= MD_SR_MN; - (void) strcpy(mnsr->sr_master_nodenm, master_nodenm); - mnsr->sr_master_nodeid = master_nodeid; - - mnsr->sr_mhiargs = defmhiargs; - - sr_cache_add((struct md_set_record *)mnsr); - - commitset((struct md_set_record *)mnsr, TRUE, ep); - - /* - * When a set is created for the first time, the nodelist - * will contain this node. - * When a node is just being added to a set, the nodelist - * will not contain this node. This node is added to the - * set structure with a later call to addhosts. - * - * So, if the nodelist contains an entry for this node - * then set the nodeid of this node in the md_set kernel - * data structure. - */ - nd = nodelist; - while (nd) { - if (strcmp(nd->nd_nodename, mynode()) == 0) { - break; - } - nd = nd->nd_next; - } - if (nd) { - (void) memset(&snp, 0, sizeof (snp)); - snp.sn_nodeid = nd->nd_nodeid; - snp.sn_setno = sp->setno; - if (metaioctl(MD_MN_SET_NODEID, &snp, &snp.sn_mde, NULL) != 0) { - (void) mdstealerror(ep, &snp.sn_mde); - return; - } - } -} - -/* - * create a set on a host - */ -bool_t -mdrpc_createset_common( - mdrpc_createset_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - md_error_t *ep = &res->status; - char stringbuf1[MAXPATHLEN]; - char stringbuf2[MAXPATHLEN]; - int err; - int op_mode = W_OK; - - /* setup, check permissions */ - if ((err = svc_init(rqstp, op_mode, ep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - if (check_set_lock(op_mode, args->cl_sk, ep)) - return (TRUE); - - /* create the arguments for the symlink() and unlink() calls */ - (void) snprintf(stringbuf2, sizeof (stringbuf2), "/dev/md/%s", - args->sp->setname); - (void) snprintf(stringbuf1, sizeof (stringbuf1), "shared/%d", - args->sp->setno); - - /* - * Since we already verified that the setname was OK, make sure to - * cleanup before proceeding. - */ - if (unlink(stringbuf2) == -1) { - if (errno != ENOENT) { - (void) mdsyserror(ep, errno, stringbuf2); - return (TRUE); - } - } - - /* create the set */ - createset(args->sp, args->nodes, args->timestamp, args->genid, ep); - - if (! mdisok(ep)) - return (TRUE); - - /* create the symlink */ - if (symlink(stringbuf1, stringbuf2) == -1) - (void) mdsyserror(ep, errno, stringbuf2); - - err = svc_fini(ep); - - return (TRUE); -} - -bool_t -mdrpc_createset_1_svc( - mdrpc_createset_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - (void) memset(res, 0, sizeof (*res)); - return (mdrpc_createset_common(args, res, rqstp)); -} - -bool_t -mdrpc_createset_2_svc( - mdrpc_createset_2_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - (void) memset(res, 0, sizeof (*res)); - switch (args->rev) { - case MD_METAD_ARGS_REV_1: - return (mdrpc_createset_common( - &args->mdrpc_createset_2_args_u.rev1, res, rqstp)); - default: - return (FALSE); - } -} - -bool_t -mdrpc_mncreateset_common( - mdrpc_mncreateset_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - md_error_t *ep = &res->status; - char stringbuf1[MAXPATHLEN]; - char stringbuf2[MAXPATHLEN]; - int err; - int op_mode = W_OK; - - /* setup, check permissions */ - if ((err = svc_init(rqstp, op_mode, ep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - if (check_set_lock(op_mode, args->cl_sk, ep)) - return (TRUE); - - /* create the arguments for the symlink() and unlink() calls */ - (void) snprintf(stringbuf2, sizeof (stringbuf2), "/dev/md/%s", - args->sp->setname); - (void) snprintf(stringbuf1, sizeof (stringbuf1), "shared/%d", - args->sp->setno); - - /* - * Since we already verified that the setname was OK, make sure to - * cleanup before proceeding. - */ - if (unlink(stringbuf2) == -1) { - if (errno != ENOENT) { - (void) mdsyserror(ep, errno, stringbuf2); - return (TRUE); - } - } - - /* create the set */ - mncreateset(args->sp, args->nodelist, args->timestamp, args->genid, - args->master_nodenm, args->master_nodeid, ep); - - if (! mdisok(ep)) { - return (TRUE); - } - - /* create the symlink */ - if (symlink(stringbuf1, stringbuf2) == -1) - (void) mdsyserror(ep, errno, stringbuf2); - - err = svc_fini(ep); - - return (TRUE); -} - -bool_t -mdrpc_mncreateset_2_svc( - mdrpc_mncreateset_2_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - (void) memset(res, 0, sizeof (*res)); - switch (args->rev) { - case MD_METAD_ARGS_REV_1: - return (mdrpc_mncreateset_common( - &args->mdrpc_mncreateset_2_args_u.rev1, res, rqstp)); - default: - return (FALSE); - } -} - -static void -del_drv_sidenms( - mdsetname_t *sp, - int version, /* RPC version of calling routine */ - md_error_t *ep -) -{ - md_set_record *sr; - md_drive_desc *dd, *p; - mddrivename_t *dn; - mdsetname_t *local_sp; - int i; - int rb_mode = 0; - - if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) - return; - - if ((sr = getsetbyname(sp->setname, ep)) == NULL) - return; - - /* Do MN operation if rpc version supports it and if a MN set */ - if ((version != METAD_VERSION) && (MD_MNSET_REC(sr))) { - /* - * In the multi-node diskset, there are no diskset - * entries in the local set for other nodes, so there's - * nothing to do. - */ - free_sr(sr); - return; - } - - if ((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST), - ep)) == NULL) { - if (! mdisdserror(ep, MDE_DS_HOSTNOSIDE)) { - metaflushsetname(sp); - if (! mdisok(ep)) { - free_sr(sr); - return; - } - /* we are supposed to have drives!!!! */ - assert(0); - } - rb_mode = 1; - mdclrerror(ep); - for (i = 0; i < MD_MAXSIDES; i++) { - /* Skip empty sides of the diskset */ - if (sr->sr_nodes[i][0] == '\0') - continue; - dd = metaget_drivedesc_sideno(sp, i, - (MD_BASICNAME_OK | PRINT_FAST), ep); - /* Got dd, get out of loop */ - if (dd != NULL) - break; - - /* some error occurred, get out of loop */ - if (! mdisok(ep)) - break; - } - /* - * At this point, we have one of three possibilities: - * 1) dd != NULL (we have found drives using an alternate - * side.) - * 2) dd == NULL (no drives) && mdisok(ep) : assert(0) - * 3) dd == NULL (no drives) && ! mdisok(ep) : return - * error information to caller. - */ - if (dd == NULL) { - metaflushsetname(sp); - if (! mdisok(ep)) { - free_sr(sr); - return; - } - /* we are supposed to have drives!!!! */ - assert(0); - } - } - - /* - * Let's run through each drive descriptor, and delete the - * sidename for all sides that are not in the sr_nodes array. - * We will ignore errors, cause the empty side may not - * have had any names to begin with. - */ - for (p = dd; p != NULL; p = p->dd_next) { - dn = p->dd_dnp; - - for (i = 0; i < MD_MAXSIDES; i++) { - /* Skip existing sides of the diskset */ - if (!rb_mode && sr->sr_nodes[i][0] != '\0') - continue; - /* An empty side, delete the sidename */ - if (del_name(local_sp, i+SKEW, - dn->side_names_key, ep)) { - if (!mdissyserror(ep, ENOENT)) { - free_sr(sr); - return; - } - mdclrerror(ep); - } - } - } - free_sr(sr); - metaflushsetname(sp); -} - -/* - * delete 1 or more sidenames per drive desc, from the local namespace - */ -bool_t -mdrpc_del_drv_sidenms_common( - mdrpc_sp_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp, /* RPC stuff */ - int version /* RPC version */ -) -{ - md_error_t *ep = &res->status; - int err; - int op_mode = W_OK; - - /* setup, check permissions */ - if ((err = svc_init(rqstp, op_mode, ep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - if (check_set_lock(op_mode, args->cl_sk, ep)) - return (TRUE); - - /* doit */ - del_drv_sidenms(args->sp, version, ep); - - err = svc_fini(ep); - - return (TRUE); -} - -bool_t -mdrpc_del_drv_sidenms_1_svc( - mdrpc_sp_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - (void) memset(res, 0, sizeof (*res)); - /* Pass RPC version (METAD_VERSION) to common routine */ - return (mdrpc_del_drv_sidenms_common(args, res, rqstp, METAD_VERSION)); -} - -bool_t -mdrpc_del_drv_sidenms_2_svc( - mdrpc_sp_2_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - (void) memset(res, 0, sizeof (*res)); - switch (args->rev) { - case MD_METAD_ARGS_REV_1: - /* Pass RPC version (METAD_VERSION_DEVID) to common routine */ - return (mdrpc_del_drv_sidenms_common( - &args->mdrpc_sp_2_args_u.rev1, res, - rqstp, METAD_VERSION_DEVID)); - default: - return (FALSE); - } -} - -static int -del_sidenamelist( - md_set_record *sr, - mddrivename_t *dn, - md_error_t *ep -) -{ - mdsidenames_t *sn; - mdsetname_t *local_sp; - md_mnset_record *mnsr; - md_mnnode_record *nr; - - if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) - return (-1); - - for (sn = dn->side_names; sn != NULL; sn = sn->next) - if (MD_MNSET_REC(sr)) { - mnsr = (struct md_mnset_record *)sr; - /* - * Only delete side name entries for this node - * on a multi-node diskset. - */ - nr = mnsr->sr_nodechain; - while (nr) { - if (nr->nr_nodeid == sn->sideno) { - if (del_name(local_sp, sn->sideno, - dn->side_names_key, ep) == -1) - mdclrerror(ep); /* ignore err */ - break; - } - nr = nr->nr_next; - } - } else { - if (del_name(local_sp, sn->sideno+SKEW, - dn->side_names_key, ep) == -1) - mdclrerror(ep); /* ignore errors */ - } - - dn->side_names_key = MD_KEYBAD; - return (0); -} - -static void -deldrvs( - char *setname, - md_drive_desc *dd, - md_error_t *ep -) -{ - mdsetname_t *sp; - md_set_record *sr; - md_drive_record *dr; - mddb_userreq_t req; - md_drive_desc *p; - mddrivename_t *dn, *dn1; - side_t sideno; - int i; - int rb_mode = 0; - mdname_t *np; - md_dev64_t dev; - md_error_t xep = mdnullerror; - ddi_devid_t devid_remote = NULL; - ddi_devid_t devid_local = NULL; - int devid_same = -1; - int using_devid = 0; - md_mnnode_record *nr; - md_mnset_record *mnsr; - - if ((sp = metasetname(setname, ep)) == NULL) - return; - - metaflushsetname(sp); - - if ((sideno = getmyside(sp, ep)) == MD_SIDEWILD) { - if (! mdisdserror(ep, MDE_DS_HOSTNOSIDE)) - return; - mdclrerror(ep); - /* - * The set record is incomplete, so we need to make note - * here so that we can do some special handling later. - */ - rb_mode = 1; - } - - if ((sr = getsetbyname(setname, ep)) == NULL) - return; - - if (dd->dd_dnp == NULL) - return; - - /* - * The system is either all devid or all - * non-devid so we determine this by looking - * at the first item in the list. - * - * For did disks, the dd_dnp->devid is a valid pointer which - * points to a '' string of devid. We need to check this - * before set the using_devid. - */ - if ((dd->dd_dnp->devid != NULL) && (dd->dd_dnp->devid[0] != '\0') && - (!(MD_MNSET_REC(sr)))) - using_devid = 1; - - for (p = dd; p != NULL; p = p->dd_next) { - dn = p->dd_dnp; - devid_remote = NULL; - - if (dn->devid != NULL && (strlen(dn->devid) != 0) && - using_devid) { - /* - * We have a devid so use it - */ - (void) devid_str_decode(dn->devid, &devid_remote, NULL); - } - - /* check to make sure using_devid agrees with reality... */ - if ((using_devid == 1) && (devid_remote == NULL)) { - /* something went really wrong. Can't process */ - (void) mddserror(ep, MDE_DS_INVALIDDEVID, sp->setno, - mynode(), dn->cname, sp->setname); - return; - } - - for (dr = sr->sr_drivechain; dr; dr = dr->dr_next) { - devid_same = -1; - - if (! rb_mode) { - dn1 = metadrivename_withdrkey(sp, sideno, - dr->dr_key, MD_BASICNAME_OK, ep); - if (dn1 == NULL) { - free_sr(sr); - if (devid_remote) - devid_free(devid_remote); - return; - } - } else { - /* - * Handle special case here where sidenames - * from other hosts for this drive may be - * in the local mddb, but there is no - * sidename entry for this host for this drive. - * This could have happened if the node - * panic'd between the 2 operations when - * adding this node to the set. - * So, delete all sidename entries for this - * drive. - */ - if (MD_MNSET_REC(sr)) { - mnsr = (struct md_mnset_record *)sr; - nr = mnsr->sr_nodechain; - while (nr) { - /* We delete all dr sides */ - dn1 = metadrivename_withdrkey( - sp, nr->nr_nodeid, - dr->dr_key, - MD_BASICNAME_OK, ep); - - /* if we do, get out of loop */ - if (dn1 != NULL) - break; - - /* save error for later */ - (void) mdstealerror(&xep, ep); - - mdclrerror(ep); - - nr = nr->nr_next; - } - } else { - /* - * Handle special case here - * for traditional diskset - */ - for (i = 0; i < MD_MAXSIDES; i++) { - /* We delete all dr sides */ - dn1 = metadrivename_withdrkey( - sp, i, dr->dr_key, - MD_BASICNAME_OK, ep); - - /* if we do, get out of loop */ - if (dn1 != NULL) - break; - - /* save error for later */ - (void) mdstealerror(&xep, ep); - - mdclrerror(ep); - } - } - - if (dn1 == NULL) { - (void) mdstealerror(ep, &xep); - free_sr(sr); - if (devid_remote) - devid_free(devid_remote); - return; - } - - if (!using_devid) - mdclrerror(ep); - } - - if (dn1->devid != NULL && using_devid) { - if (devid_str_decode(dn1->devid, &devid_local, - NULL) == 0) { - devid_same = devid_compare(devid_remote, - devid_local); - devid_free(devid_local); - } - } - - /* - * Has the required disk been found - either the devids - * match if devid are being used or the actual name of - * the disk matches. - */ - if ((using_devid && devid_same == 0) || - (!using_devid && - strcmp(dn->cname, dn1->cname) == 0)) { - uint_t rep_slice; - - dev = NODEV64; - np = NULL; - if (meta_replicaslice(dn1, - &rep_slice, &xep) == 0) { - np = metaslicename(dn1, - rep_slice, &xep); - } - - if (np != NULL) - dev = np->dev; - else - mdclrerror(&xep); - break; - } - } - - if (dr) { - (void) memset(&req, 0, sizeof (req)); - METAD_SETUP_DR(MD_DB_DELETE, dr->dr_selfid) - if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) - != 0) { - (void) mdstealerror(ep, &req.ur_mde); - if (devid_remote) - devid_free(devid_remote); - free_sr(sr); - return; - } - - dr_cache_del(sr, dr->dr_selfid); - - if (del_sidenamelist(sr, dn1, ep) == -1) { - goto out; - } - - SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REMOVE, SVM_TAG_DRIVE, - sr->sr_setno, dev); - SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_ADD, SVM_TAG_DRIVE, - MD_LOCAL_SET, dev); - - continue; - } - - if (devid_remote) - devid_free(devid_remote); - } - -out: - commitset(sr, TRUE, ep); - - free_sr(sr); -} - -/* - * delete 1 or more drive records from a host. - */ -bool_t -mdrpc_deldrvs_common( - mdrpc_drives_2_args_r1 *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - md_error_t *ep = &res->status; - int err; - int op_mode = W_OK; - - /* setup, check permissions */ - if ((err = svc_init(rqstp, op_mode, ep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - if (check_set_lock(op_mode, args->cl_sk, ep)) - return (TRUE); - - /* doit */ - deldrvs(args->sp->setname, args->drivedescs, ep); - - err = svc_fini(ep); - - return (TRUE); -} - -/* - * version 1 of the remote procedure. This procedure is called if the - * client is running in version 1. We first convert version 1 arguments - * into version 2 arguments and then call the common remote procedure. - */ -bool_t -mdrpc_deldrvs_1_svc( - mdrpc_drives_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - bool_t retval; - mdrpc_drives_2_args_r1 v2_args; - - /* allocate memory */ - alloc_newdrvdesc(args->drivedescs, &v2_args.drivedescs); - (void) memset(res, 0, sizeof (*res)); - - /* build args */ - v2_args.cl_sk = args->cl_sk; - v2_args.sp = args->sp; - /* convert v1 args to v2 (revision 1) args */ - meta_conv_drvdesc_old2new(args->drivedescs, v2_args.drivedescs); - v2_args.timestamp = args->timestamp; - v2_args.genid = args->genid; - - retval = mdrpc_deldrvs_common(&v2_args, res, rqstp); - - free_newdrvdesc(v2_args.drivedescs); - - return (retval); -} - -bool_t -mdrpc_deldrvs_2_svc( - mdrpc_drives_2_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - (void) memset(res, 0, sizeof (*res)); - switch (args->rev) { - case MD_METAD_ARGS_REV_1: - return (mdrpc_deldrvs_common( - &args->mdrpc_drives_2_args_u.rev1, res, rqstp)); - default: - return (FALSE); - } -} - -static void -delhosts( - char *setname, - int node_c, - char **node_v, - int version, /* RPC version of calling routine */ - md_error_t *ep -) -{ - mddb_userreq_t req; - md_set_record *sr; - int i, j; - md_mnset_record *mnsr; - md_mnnode_record *nr; - - if ((sr = getsetbyname(setname, ep)) == NULL) - return; - - for (i = 0; i < node_c; i++) { - /* Do MN operation if rpc version supports it and if a MN set */ - if ((version != METAD_VERSION) && (MD_MNSET_REC(sr))) { - mnsr = (struct md_mnset_record *)sr; - nr = mnsr->sr_nodechain; - while (nr) { - if (strcmp(nr->nr_nodename, node_v[i]) == 0) { - SE_NOTIFY(EC_SVM_CONFIG, - ESC_SVM_REMOVE, SVM_TAG_HOST, - sr->sr_setno, nr->nr_nodeid); - (void) memset(&req, '\0', sizeof (req)); - METAD_SETUP_NR(MD_DB_DELETE, - nr->nr_selfid); - if (metaioctl(MD_DB_USERREQ, &req, - &req.ur_mde, NULL) != 0) { - (void) mdstealerror(ep, - &req.ur_mde); - free_sr(sr); - return; - } - mnnr_cache_del(mnsr, nr->nr_selfid); - break; - } - nr = nr->nr_next; - } - } else { - for (j = 0; j < MD_MAXSIDES; j++) { - if (sr->sr_nodes[j][0] == '\0') - continue; - if (strcmp(sr->sr_nodes[j], node_v[i]) != 0) - continue; - (void) memset(sr->sr_nodes[j], '\0', - sizeof (sr->sr_nodes[j])); - SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REMOVE, - SVM_TAG_HOST, sr->sr_setno, j); - break; - } - } - } - - (void) memset(&req, '\0', sizeof (req)); - METAD_SETUP_SR(MD_DB_SETDATA, sr->sr_selfid) - /* Do MN operation if rpc version supports it and if a MN set */ - if ((version != METAD_VERSION) && (MD_MNSET_REC(sr))) { - req.ur_size = sizeof (*mnsr); - } else { - req.ur_size = sizeof (*sr); - } - req.ur_data = (uintptr_t)sr; - if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) { - (void) mdstealerror(ep, &req.ur_mde); - free_sr(sr); - return; - } - - commitset(sr, TRUE, ep); - free_sr(sr); -} - -/* - * delete 1 or more a hosts from a set. - */ -bool_t -mdrpc_delhosts_common( - mdrpc_host_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp, /* RPC stuff */ - int version /* RPC version */ -) -{ - md_error_t *ep = &res->status; - int err; - int op_mode = W_OK; - - /* setup, check permissions */ - if ((err = svc_init(rqstp, op_mode, ep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - if (check_set_lock(op_mode, args->cl_sk, ep)) - return (TRUE); - - /* doit */ - delhosts(args->sp->setname, args->hosts.hosts_len, - args->hosts.hosts_val, version, ep); - - err = svc_fini(ep); - - return (TRUE); -} - -bool_t -mdrpc_delhosts_1_svc( - mdrpc_host_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - (void) memset(res, 0, sizeof (*res)); - /* Pass RPC version (METAD_VERSION) to common routine */ - return (mdrpc_delhosts_common(args, res, rqstp, METAD_VERSION)); -} - -bool_t -mdrpc_delhosts_2_svc( - mdrpc_host_2_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - (void) memset(res, 0, sizeof (*res)); - switch (args->rev) { - case MD_METAD_ARGS_REV_1: - /* Pass RPC version (METAD_VERSION_DEVID) to common routine */ - return (mdrpc_delhosts_common( - &args->mdrpc_host_2_args_u.rev1, res, - rqstp, METAD_VERSION_DEVID)); - default: - return (FALSE); - } -} - -/* - * delete a set. - */ -bool_t -mdrpc_delset_common( - mdrpc_sp_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - md_error_t *ep = &res->status; - int err; - int op_mode = W_OK; - - /* setup, check permissions */ - if ((err = svc_init(rqstp, op_mode, ep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - if (check_set_lock(op_mode, args->cl_sk, ep)) - return (TRUE); - - /* doit */ - s_delset(args->sp->setname, ep); - - err = svc_fini(ep); - - return (TRUE); -} - -bool_t -mdrpc_delset_1_svc( - mdrpc_sp_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - (void) memset(res, 0, sizeof (*res)); - return (mdrpc_delset_common(args, res, rqstp)); -} - -bool_t -mdrpc_delset_2_svc( - mdrpc_sp_2_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - (void) memset(res, 0, sizeof (*res)); - switch (args->rev) { - case MD_METAD_ARGS_REV_1: - return (mdrpc_delset_common( - &args->mdrpc_sp_2_args_u.rev1, res, rqstp)); - default: - return (FALSE); - } -} - -/* - * return device info - */ -static void -devinfo( - mdsetname_t *sp, - mddrivename_t *dp, - mdrpc_devinfo_2_res *res, - md_error_t *ep -) -{ - mdname_t *np, *real_np; - - if ((np = metaslicename(dp, MD_SLICE0, ep)) == NULL) - return; - - if ((real_np = metaname(&sp, np->bname, LOGICAL_DEVICE, ep)) == NULL) - return; - - res->dev = real_np->dev; - (void) getdevstamp(dp, (long *)&res->vtime, ep); - res->enc_devid = meta_get_devid(np->rname); -} - -bool_t -mdrpc_devinfo_common( - mdrpc_devinfo_2_args_r1 *args, - mdrpc_devinfo_2_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - int slice; - mdname_t *np; - mddrivename_t *dnp = args->drivenamep; - md_error_t *ep = &res->status; - int err; - int op_mode = R_OK; - - /* setup, check permissions */ - if ((err = svc_init(rqstp, op_mode, ep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - if (check_set_lock(op_mode, NULL, ep)) - return (TRUE); - - /* - * fix all the drivenamep's in the mdname_t's to - * point to the right place. - */ - for (slice = 0; (slice < dnp->parts.parts_len); ++slice) { - if ((np = metaslicename(dnp, slice, ep)) == NULL) - return (TRUE); - np->drivenamep = dnp; - } - - /* doit */ - devinfo(args->sp, dnp, res, ep); - - err = svc_fini(ep); - - return (TRUE); -} - -/* - * version 1 of the remote procedure. This procedure is called if the - * client is running in version 1. We first convert version 1 arguments - * into version 2 arguments and then call the common remote procedure. - */ -bool_t -mdrpc_devinfo_1_svc( - mdrpc_devinfo_args *args, - mdrpc_devinfo_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - bool_t retval; - mdrpc_devinfo_2_args_r1 v2_args; - mdrpc_devinfo_2_res v2_res; - - /* allocate memory */ - v2_args.drivenamep = Zalloc(sizeof (mddrivename_t)); - v2_args.drivenamep->parts.parts_val = - Zalloc(sizeof (mdname_t) * args->drivenamep->parts.parts_len); - (void) memset(res, 0, sizeof (*res)); - - /* convert v1 args to v2 (revision 1) args */ - meta_conv_drvname_old2new(args->drivenamep, v2_args.drivenamep); - retval = mdrpc_devinfo_common(&v2_args, &v2_res, rqstp); - - /* - * Fill in the result appropriately. - * Since dev_t's for version 2 are 64-bit, - * we need to convert them to 32-bit for version 1. - */ - res->dev = meta_cmpldev(v2_res.dev); - res->vtime = v2_res.vtime; - res->status = v2_res.status; - - free(v2_args.drivenamep); - free(v2_args.drivenamep->parts.parts_val); - - return (retval); -} - -bool_t -mdrpc_devinfo_2_svc( - mdrpc_devinfo_2_args *args, - mdrpc_devinfo_2_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - (void) memset(res, 0, sizeof (*res)); - switch (args->rev) { - case MD_METAD_ARGS_REV_1: - return (mdrpc_devinfo_common( - &args->mdrpc_devinfo_2_args_u.rev1, res, rqstp)); - default: - return (FALSE); - } -} - -/* - * return device id - */ -static void -mdrpc_get_devid( - mdsetname_t *sp, - mddrivename_t *dp, - mdrpc_devid_res *res, - md_error_t *ep -) -{ - mdname_t *np; - - if ((np = metaslicename(dp, MD_SLICE0, ep)) == NULL) - return; - - if (metaname(&sp, np->bname, LOGICAL_DEVICE, ep) == NULL) - return; - - res->enc_devid = meta_get_devid(np->rname); -} - -bool_t -mdrpc_devid_2_svc( - mdrpc_devid_2_args *args, - mdrpc_devid_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - int slice; - mdname_t *np; - mddrivename_t *dnp; - md_error_t *ep = &res->status; - int err; - int op_mode = R_OK; - - (void) memset(res, 0, sizeof (*res)); - switch (args->rev) { - case MD_METAD_ARGS_REV_1: - dnp = (&(args->mdrpc_devid_2_args_u.rev1))->drivenamep; - break; - default: - return (FALSE); - } - - /* setup, check permissions */ - if ((err = svc_init(rqstp, op_mode, ep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - if (check_set_lock(op_mode, NULL, ep)) - return (TRUE); - - /* - * fix all the drivenamep's in the mdname_t's to - * point to the right place. - */ - for (slice = 0; (slice < dnp->parts.parts_len); ++slice) { - if ((np = metaslicename(dnp, slice, ep)) == NULL) - return (TRUE); - np->drivenamep = dnp; - } - - /* doit */ - mdrpc_get_devid((&(args->mdrpc_devid_2_args_u.rev1))->sp, dnp, res, ep); - - err = svc_fini(ep); - - return (TRUE); -} - -/* - * This routine should not be called for a multi-node diskset. - * - * The devid support is disabled for MN diskset so this routine - * will not be called if the set is MN diskset. The check has - * been done early in meta_getnextside_devinfo. However this - * routine will be called when the devid support for MN set is - * enabled and check is removed. - */ -bool_t -mdrpc_devinfo_by_devid_2_svc( - mdrpc_devidstr_args *args, - mdrpc_devinfo_2_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - - char *devidstr = args->enc_devid; - md_error_t *ep = &res->status; - ddi_devid_t devid; - char *minor_name = NULL; - int ret = 0; - int err; - devid_nmlist_t *disklist = NULL; - int op_mode = R_OK; - mdname_t *np; - mdsetname_t *sp = args->sp; - - /* setup, check permissions */ - (void) memset(res, 0, sizeof (*res)); - if ((err = svc_init(rqstp, op_mode, ep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - if (check_set_lock(op_mode, NULL, ep)) - return (TRUE); - - if (devid_str_decode(devidstr, &devid, &minor_name) != 0) - return (TRUE); - - /* - * if we do not have a minor name then look for a character device. - * This is because the caller (checkdrive_onnode) expects a character - * device to be returned. The other client of this interface is - * meta_getnextside_devinfo and this supplies a minor name. - */ - if (minor_name == NULL) { - ret = meta_deviceid_to_nmlist("/dev", devid, - DEVID_MINOR_NAME_ALL_CHR, &disklist); - } else { - ret = meta_deviceid_to_nmlist("/dev", devid, minor_name, - &disklist); - devid_str_free(minor_name); - } - - devid_free(devid); - if (ret != 0) { - res->dev = NODEV64; - devid_free_nmlist(disklist); - return (TRUE); - } - - np = metaname(&sp, disklist[0].devname, LOGICAL_DEVICE, ep); - if (np != NULL) { - mdcinfo_t *cinfo; - if ((cinfo = metagetcinfo(np, ep)) != NULL) { - res->drivername = Strdup(cinfo->dname); - } - } - - res->dev = meta_expldev(disklist[0].dev); - res->devname = strdup(disklist[0].devname); - - devid_free_nmlist(disklist); - - err = svc_fini(ep); - - return (TRUE); -} - -/* - * This routine should not be called for a multi-node diskset. - * - * The devid support is disabled for MN diskset so this routine - * will not be called if the set is MN diskset. The check has - * been done early in meta_getnextside_devinfo. However this - * routine will be called when the devid support for MN set is - * enabled and check is removed. - * - * This function will return the device info attempting to use - * both the passed in devid and device name. This is to deal - * with systems that use multi-path disks but not running mpxio. - * In this situation meta_deviceid_to_nmlist will return multiple - * devices. The orig_devname is used to disambiguate. - * - */ -bool_t -mdrpc_devinfo_by_devid_name_2_svc( - mdrpc_devid_name_2_args *args, - mdrpc_devinfo_2_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - - char *devidstr; - char *orig_devname; - md_error_t *ep = &res->status; - ddi_devid_t devid; - char *minor_name = NULL; - int ret = 0; - int err; - int i; - devid_nmlist_t *disklist = NULL; - int op_mode = R_OK; - mdname_t *np; - mdsetname_t *sp; - - (void) memset(res, 0, sizeof (*res)); - switch (args->rev) { - case MD_METAD_ARGS_REV_1: - sp = (&(args->mdrpc_devid_name_2_args_u.rev1))->sp; - devidstr = (&(args->mdrpc_devid_name_2_args_u.rev1))->enc_devid; - orig_devname = - (&(args->mdrpc_devid_name_2_args_u.rev1))->orig_devname; - break; - default: - return (FALSE); - } - - /* setup, check permissions */ - if ((err = svc_init(rqstp, op_mode, ep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - if (check_set_lock(op_mode, NULL, ep)) - return (TRUE); - - if (devid_str_decode(devidstr, &devid, &minor_name) != 0) - return (TRUE); - - /* - * if we do not have a minor name then look for a character device. - * This is because the caller (checkdrive_onnode) expects a character - * device to be returned. The other client of this interface is - * meta_getnextside_devinfo and this supplies a minor name. - */ - if (minor_name == NULL) { - ret = meta_deviceid_to_nmlist("/dev", devid, - DEVID_MINOR_NAME_ALL_CHR, &disklist); - } else { - ret = meta_deviceid_to_nmlist("/dev", devid, minor_name, - &disklist); - devid_str_free(minor_name); - } - - devid_free(devid); - if (ret != 0) { - res->dev = NODEV64; - devid_free_nmlist(disklist); - return (TRUE); - } - - /* attempt to match to the device name on the originating node */ - for (i = 0; disklist[i].dev != NODEV; i++) { - if (strncmp(orig_devname, disklist[i].devname, - strlen(disklist[i].devname)) == 0) - break; - } - - /* if it's not found then use the first disk in the list */ - if (disklist[i].dev == NODEV) - i = 0; - - np = metaname(&sp, disklist[i].devname, LOGICAL_DEVICE, ep); - if (np != NULL) { - mdcinfo_t *cinfo; - if ((cinfo = metagetcinfo(np, ep)) != NULL) { - res->drivername = Strdup(cinfo->dname); - } - } - - res->dev = meta_expldev(disklist[i].dev); - res->devname = strdup(disklist[i].devname); - - devid_free_nmlist(disklist); - - err = svc_fini(ep); - - return (TRUE); -} - -static void -drvused(mdsetname_t *sp, mddrivename_t *dnp, md_error_t *ep) -{ - if (meta_check_drivemounted(sp, dnp, ep)) - return; - - if (meta_check_driveswapped(sp, dnp, ep)) - return; - - if (meta_check_drive_inuse(metasetname(MD_LOCAL_NAME, ep), dnp, - TRUE, ep)) - return; - - (void) meta_check_driveinset(sp, dnp, ep); -} - -/* - * determine if a device is in use. - */ -bool_t -mdrpc_drvused_common( - mdrpc_drvused_2_args_r1 *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - md_error_t *ep = &res->status; - int slice; - mdname_t *np; - mddrivename_t *dnp = args->drivenamep; - int err; - int op_mode = R_OK; - - /* setup, check permissions */ - if ((err = svc_init(rqstp, op_mode, ep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - if (check_set_lock(op_mode, NULL, ep)) - return (TRUE); - - if (dnp == NULL) { - /* no drive pointer specified */ - return (TRUE); - } - /* - * fix all the drivenamep's in the mdname_t's to - * point to the right place. - */ - for (slice = 0; (slice < dnp->parts.parts_len); ++slice) { - if ((np = metaslicename(dnp, slice, ep)) == NULL) - return (TRUE); - np->drivenamep = dnp; - } - - /* doit */ - drvused(args->sp, dnp, ep); - - err = svc_fini(ep); - - return (TRUE); -} - -/* - * version 1 of the remote procedure. This procedure is called if the - * client is running in version 1. We first convert version 1 arguments - * into version 2 arguments and then call the common remote procedure. - */ -bool_t -mdrpc_drvused_1_svc( - mdrpc_drvused_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - bool_t retval; - mdrpc_drvused_2_args_r1 v2_args; - - /* allocate memory */ - v2_args.drivenamep = Zalloc(sizeof (mddrivename_t)); - v2_args.drivenamep->parts.parts_val = - Zalloc(sizeof (mdname_t) * args->drivenamep->parts.parts_len); - (void) memset(res, 0, sizeof (*res)); - - /* build args */ - v2_args.sp = args->sp; - v2_args.cl_sk = args->cl_sk; - - /* convert v1 args to v2 (revision 1) args */ - meta_conv_drvname_old2new(args->drivenamep, v2_args.drivenamep); - retval = mdrpc_drvused_common(&v2_args, res, rqstp); - - free(v2_args.drivenamep); - free(v2_args.drivenamep->parts.parts_val); - - return (retval); -} - -bool_t -mdrpc_drvused_2_svc( - mdrpc_drvused_2_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - (void) memset(res, 0, sizeof (*res)); - switch (args->rev) { - case MD_METAD_ARGS_REV_1: - return (mdrpc_drvused_common( - &args->mdrpc_drvused_2_args_u.rev1, res, rqstp)); - default: - return (FALSE); - } -} - -/* - * return a set records selected by name or number. - */ -bool_t -mdrpc_getset_common( - mdrpc_getset_args *args, - mdrpc_getset_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - md_error_t *ep = &res->status; - int err; - int op_mode = R_OK; - - /* setup, check permissions */ - if ((err = svc_init(rqstp, op_mode, ep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - /* Don't have a setno, so we don't check the lock */ - if (check_set_lock(op_mode, NULL, ep)) - return (TRUE); - - /* doit */ - if (args->setname && *args->setname) - res->sr = setdup(getsetbyname(args->setname, ep)); - else if (args->setno > 0) - res->sr = setdup(getsetbynum(args->setno, ep)); - else - res->sr = NULL; - - err = svc_fini(ep); - - return (TRUE); -} - -bool_t -mdrpc_getset_1_svc( - mdrpc_getset_args *args, - mdrpc_getset_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - (void) memset(res, 0, sizeof (*res)); - return (mdrpc_getset_common(args, res, rqstp)); -} - -bool_t -mdrpc_getset_2_svc( - mdrpc_getset_2_args *args, - mdrpc_getset_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - (void) memset(res, 0, sizeof (*res)); - switch (args->rev) { - case MD_METAD_ARGS_REV_1: - return (mdrpc_getset_common( - &args->mdrpc_getset_2_args_u.rev1, res, rqstp)); - default: - return (FALSE); - } -} - -/* - * return a MN set record selected by name or number. - */ -bool_t -mdrpc_mngetset_common( - mdrpc_getset_args *args, - mdrpc_mngetset_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - md_error_t *ep = &res->status; - int err; - int op_mode = R_OK; - md_set_record *sr = NULL; - md_mnset_record *mnsr = NULL; - - /* setup, check permissions */ - if ((err = svc_init(rqstp, op_mode, ep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - /* Don't have a setno, so we don't check the lock */ - if (check_set_lock(op_mode, NULL, ep)) - return (TRUE); - - /* doit */ - res->mnsr = NULL; - if (args->setname && *args->setname) - sr = getsetbyname(args->setname, ep); - else if (args->setno > 0) - sr = getsetbynum(args->setno, ep); - - if ((sr) && (MD_MNSET_REC(sr))) { - mnsr = (struct md_mnset_record *)sr; - res->mnsr = mnsetdup(mnsr); - } - - err = svc_fini(ep); - - return (TRUE); -} - -bool_t -mdrpc_mngetset_2_svc( - mdrpc_getset_2_args *args, - mdrpc_mngetset_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - (void) memset(res, 0, sizeof (*res)); - switch (args->rev) { - case MD_METAD_ARGS_REV_1: - return (mdrpc_mngetset_common( - &args->mdrpc_getset_2_args_u.rev1, res, rqstp)); - default: - return (FALSE); - } -} - -static void -upd_setmaster( - mdsetname_t *sp, - md_node_nm_t master_nodenm, - int master_nodeid, - md_error_t *ep -) -{ - mdsetname_t *local_sp; - md_set_record *sr; - md_mnset_record *mnsr; - mddb_setmaster_config_t sm; - - if ((local_sp = metasetname(sp->setname, ep)) == NULL) - return; - - metaflushsetname(local_sp); - - if ((sr = getsetbyname(sp->setname, ep)) == NULL) - return; - - if (MD_MNSET_REC(sr)) { - mnsr = (struct md_mnset_record *)sr; - (void) strlcpy(mnsr->sr_master_nodenm, master_nodenm, - MD_MAX_NODENAME); - mnsr->sr_master_nodeid = master_nodeid; - if (master_nodeid != 0) { - (void) memset(&sm, 0, sizeof (sm)); - sm.c_setno = sp->setno; - /* Use magic to help protect ioctl against attack. */ - sm.c_magic = MDDB_SETMASTER_MAGIC; - if (strcmp(master_nodenm, mynode()) == 0) { - sm.c_current_host_master = 1; - } else { - sm.c_current_host_master = 0; - } - (void) metaioctl(MD_SETMASTER, &sm, &sm.c_mde, NULL); - mdclrerror(&sm.c_mde); - } - } - -out: - commitset(sr, FALSE, ep); - free_sr(sr); -} - -/* - * set the master and nodeid in node record - */ -bool_t -mdrpc_mnsetmaster_common( - mdrpc_mnsetmaster_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - md_error_t *ep = &res->status; - int err; - int op_mode = W_OK; - - /* setup, check permissions */ - if ((err = svc_init(rqstp, op_mode, ep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - if (check_set_lock(op_mode, args->cl_sk, ep)) - return (TRUE); - - /* doit */ - upd_setmaster(args->sp, args->master_nodenm, args->master_nodeid, ep); - - err = svc_fini(ep); - - return (TRUE); -} - -bool_t -mdrpc_mnsetmaster_2_svc( - mdrpc_mnsetmaster_2_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - (void) memset(res, 0, sizeof (*res)); - switch (args->rev) { - case MD_METAD_ARGS_REV_1: - return (mdrpc_mnsetmaster_common( - &args->mdrpc_mnsetmaster_2_args_u.rev1, res, rqstp)); - default: - return (FALSE); - } -} - -/* - * Join this node to the diskset. - * Pass stale_flag information to snarf_set so that snarf code - * can choose a STALE or non-STALE state when starting the set. - * If master is STALE, any joining node will join a stale set regardless - * of the number of accessible mddbs. Also, if master is at 50% - * accessible replicas and is in the TOOFEW state, don't mark newly - * joining node as STALE; mark it TOOFEW instead. - */ -static void -joinset( - mdsetname_t *sp, - int flags, - md_error_t *ep -) -{ - mdsetname_t *local_sp; - md_drive_desc *mydd; - bool_t stale_bool; - mddb_block_parm_t mbp; - md_error_t xep = mdnullerror; - - if ((local_sp = metasetname(sp->setname, ep)) == NULL) - return; - - /* - * Start mddoors daemon here. - * mddoors itself takes care there will be - * only one instance running, so starting it twice won't hurt - */ - (void) pclose(popen(MDDOORS, "w")); - - /* - * Get latest copy of data. If a drive was just added causing - * nodes to get joined - this drive won't be in the local - * name caches drive list yet. - */ - metaflushsetname(local_sp); - - mydd = metaget_drivedesc(local_sp, (MD_BASICNAME_OK | PRINT_FAST), ep); - if (mydd) { - /* - * Causes mddbs to be loaded into the kernel. - * Set the force flag so that replica locations can be loaded - * into the kernel even if a mediator node was unavailable. - * This allows a node to join an MO diskset when there are - * sufficient replicas available, but a mediator node - * in unavailable. - */ - if (setup_db_bydd(local_sp, mydd, TRUE, ep) == -1) { - /* If ep isn't set for some reason, set it */ - if (mdisok(ep)) { - (void) mdmddberror(ep, MDE_DB_NOTNOW, - (minor_t)NODEV64, sp->setno, 0, NULL); - } - return; - } - - if (flags & MNSET_IS_STALE) - stale_bool = TRUE; - else - stale_bool = FALSE; - - /* - * Snarf the set. No failure has occurred if STALE or - * ACCOK error was set. Otherwise, fail the call setting - * a generic error if no error was already set. - * - * STALE means that set has < 50% mddbs. - * ACCOK means that the mediator provided an extra vote. - */ - if (snarf_set(local_sp, stale_bool, ep) != 0) { - if (!(mdismddberror(ep, MDE_DB_STALE)) && - !(mdismddberror(ep, MDE_DB_ACCOK))) { - return; - } else if (mdisok(ep)) { - /* If snarf failed, but no error set - set it */ - (void) mdmddberror(ep, MDE_DB_NOTNOW, - (minor_t)NODEV64, sp->setno, 0, NULL); - return; - } - } - - /* - * If node is joining during reconfig cycle, then - * set mddb_parse to be in blocked state so that - * mddb reparse messages are not generated until - * the commd has been resumed later in the reconfig - * cycle. - */ - if (flags & MNSET_IN_RECONFIG) { - (void) memset(&mbp, 0, sizeof (mbp)); - if (s_ownset(sp->setno, &xep) == MD_SETOWNER_YES) { - (void) memset(&mbp, 0, sizeof (mbp)); - mbp.c_setno = local_sp->setno; - mbp.c_blk_flags = MDDB_BLOCK_PARSE; - if (metaioctl(MD_MN_MDDB_BLOCK, &mbp, - &mbp.c_mde, NULL)) { - (void) mdstealerror(&xep, &mbp.c_mde); - mde_perror(ep, gettext( - "Could not block set %s"), - sp->setname); - return; - } - } - /* - * If s_ownset fails and snarf_set succeeded, - * then can steal the ownset failure information - * and store it into ep. If snarf_set failed, - * don't overwrite critical ep information even - * if s_ownset failed. - */ - if (!mdisok(&xep)) { - /* - * If snarf_set succeeded or snarf_set failed - * with MDE_DB_ACCOK (which is set if the - * mediator provided the extra vote) then - * steal the xep failure information and put - * into ep. - */ - if (mdisok(ep) || - mdismddberror(ep, MDE_DB_ACCOK)) { - (void) mdstealerror(ep, &xep); - } - } - } - } -} - -/* - * Have this node join the set. - * This is called when a node has been - * added to a MN diskset that has drives. - * Also, called when a node is an alive - * member of a MN diskset and the first - * drive has been added. - */ -bool_t -mdrpc_joinset_common( - mdrpc_sp_flags_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - md_error_t *ep = &res->status; - int err; - int op_mode = W_OK; - - /* setup, check permissions */ - if ((err = svc_init(rqstp, op_mode, ep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - /* - * During reconfig, joinset can happen without - * locking first. Turn off reconfig flag before calling - * joinset. - */ - if (!(args->flags & MNSET_IN_RECONFIG)) { - if (check_set_lock(op_mode, args->cl_sk, ep)) - return (TRUE); - } - - /* doit */ - joinset(args->sp, args->flags, ep); - - err = svc_fini(ep); - - return (TRUE); -} - -bool_t -mdrpc_joinset_2_svc( - mdrpc_sp_flags_2_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - (void) memset(res, 0, sizeof (*res)); - switch (args->rev) { - case MD_METAD_ARGS_REV_1: - return (mdrpc_joinset_common( - &args->mdrpc_sp_flags_2_args_u.rev1, res, rqstp)); - default: - return (FALSE); - } -} - -static void -withdrawset( - mdsetname_t *sp, - md_error_t *ep -) -{ - mdsetname_t *my_sp; - - if ((my_sp = metasetname(sp->setname, ep)) == NULL) - return; - - (void) halt_set(my_sp, ep); -} - -/* - * Have this node withdraw from set. - * In response to a failure that occurred - * on the client after a joinset. - */ -bool_t -mdrpc_withdrawset_common( - mdrpc_sp_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - md_error_t *ep = &res->status; - int err; - int op_mode = W_OK; - - /* setup, check permissions */ - if ((err = svc_init(rqstp, op_mode, ep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - if (check_set_lock(op_mode, args->cl_sk, ep)) - return (TRUE); - - /* doit */ - withdrawset(args->sp, ep); - - err = svc_fini(ep); - - return (TRUE); -} - -bool_t -mdrpc_withdrawset_2_svc( - mdrpc_sp_2_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - (void) memset(res, 0, sizeof (*res)); - switch (args->rev) { - case MD_METAD_ARGS_REV_1: - return (mdrpc_withdrawset_common( - &args->mdrpc_sp_2_args_u.rev1, res, rqstp)); - default: - return (FALSE); - } -} - -static mhd_mhiargs_t * -gtimeout(mdsetname_t *sp, md_error_t *ep) -{ - md_set_record *sr; - mhd_mhiargs_t *mhiargs; - - if ((sr = getsetbyname(sp->setname, ep)) == NULL) - return (NULL); - - mhiargs = Zalloc(sizeof (*mhiargs)); - *mhiargs = sr->sr_mhiargs; - - free_sr(sr); - return (mhiargs); -} - -/* - * Get the MH timeout values for this set. - */ -bool_t -mdrpc_gtimeout_common( - mdrpc_sp_args *args, - mdrpc_gtimeout_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - md_error_t *ep = &res->status; - int err; - int op_mode = R_OK; - - /* setup, check permissions */ - if ((err = svc_init(rqstp, op_mode, ep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - if (check_set_lock(op_mode, NULL, ep)) - return (TRUE); - - /* doit */ - res->mhiargsp = gtimeout(args->sp, ep); - - err = svc_fini(ep); - - return (TRUE); -} - -bool_t -mdrpc_gtimeout_1_svc( - mdrpc_sp_args *args, - mdrpc_gtimeout_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - (void) memset(res, 0, sizeof (*res)); - return (mdrpc_gtimeout_common(args, res, rqstp)); -} - -bool_t -mdrpc_gtimeout_2_svc( - mdrpc_sp_2_args *args, - mdrpc_gtimeout_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - (void) memset(res, 0, sizeof (*res)); - switch (args->rev) { - case MD_METAD_ARGS_REV_1: - return (mdrpc_gtimeout_common( - &args->mdrpc_sp_2_args_u.rev1, res, rqstp)); - default: - return (FALSE); - } -} - -/* - * return the official host name for the callee - */ -/*ARGSUSED*/ -bool_t -mdrpc_hostname_common( - mdrpc_null_args *args, - mdrpc_hostname_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - md_error_t *ep = &res->status; - int err; - int op_mode = R_OK; - - /* setup, check permissions */ - (void) memset(res, 0, sizeof (*res)); - if ((err = svc_init(rqstp, op_mode, ep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - if (check_set_lock(op_mode, NULL, ep)) - return (TRUE); - - /* doit */ - res->hostname = Strdup(mynode()); - - err = svc_fini(ep); - - return (TRUE); -} - -bool_t -mdrpc_hostname_1_svc( - mdrpc_null_args *args, - mdrpc_hostname_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - return (mdrpc_hostname_common(args, res, rqstp)); -} - -bool_t -mdrpc_hostname_2_svc( - mdrpc_null_args *args, - mdrpc_hostname_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - return (mdrpc_hostname_common(args, res, rqstp)); -} - -/* - * return a response - */ -/*ARGSUSED*/ -bool_t -mdrpc_nullproc_common( - void *args, - md_error_t *ep, - struct svc_req *rqstp /* RPC stuff */ -) -{ - *ep = mdnullerror; - /* do nothing */ - return (TRUE); -} - -bool_t -mdrpc_nullproc_1_svc( - void *args, - md_error_t *ep, - struct svc_req *rqstp /* RPC stuff */ -) -{ - return (mdrpc_nullproc_common(args, ep, rqstp)); -} - -bool_t -mdrpc_nullproc_2_svc( - void *args, - md_error_t *ep, - struct svc_req *rqstp /* RPC stuff */ -) -{ - return (mdrpc_nullproc_common(args, ep, rqstp)); -} - -/* - * determine if the caller owns the set. - */ -bool_t -mdrpc_ownset_common( - mdrpc_sp_args *args, - mdrpc_bool_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - md_error_t *ep = &res->status; - int err; - int op_mode = R_OK; - - /* setup, check permissions */ - if ((err = svc_init(rqstp, op_mode, ep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - if (check_set_lock(op_mode, NULL, ep)) - return (TRUE); - - /* doit */ - if (s_ownset(args->sp->setno, ep)) - res->value = TRUE; - else - res->value = FALSE; - - err = svc_fini(ep); - - return (TRUE); -} - -bool_t -mdrpc_ownset_1_svc( - mdrpc_sp_args *args, - mdrpc_bool_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - (void) memset(res, 0, sizeof (*res)); - return (mdrpc_ownset_common(args, res, rqstp)); -} - -bool_t -mdrpc_ownset_2_svc( - mdrpc_sp_2_args *args, - mdrpc_bool_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - (void) memset(res, 0, sizeof (*res)); - switch (args->rev) { - case MD_METAD_ARGS_REV_1: - return (mdrpc_ownset_common( - &args->mdrpc_sp_2_args_u.rev1, res, rqstp)); - default: - return (FALSE); - } -} - -static int -setnameok(char *setname, md_error_t *ep) -{ - int rval = 0; - struct stat statb; - md_set_record *sr = NULL; - char *setlink = NULL; - - setlink = Strdup("/dev/md/"); - setlink = Realloc(setlink, strlen(setlink) + strlen(setname) + 1); - (void) strcat(setlink, setname); - - if (lstat(setlink, &statb) == -1) { - /* - * If lstat() fails with ENOENT, setname is OK, if it - * fails for other than that, we fail the RPC - */ - if (errno == ENOENT) { - rval = 1; - goto out; - } - - (void) mdsyserror(ep, errno, setlink); - goto out; - } - - /* - * If the lstat() succeeded, then we see what type of object - * we are dealing with, if it is a symlink, we do some further - * checking, if it is not a symlink, then we return an - * indication that the set name is NOT acceptable. - */ - if (! S_ISLNK(statb.st_mode)) - goto out; - - /* - * We look up the setname to see if there is a set - * with that name, if there is, then we return - * an indication that the set name is NOT acceptable. - */ - if ((sr = getsetbyname(setname, ep)) != NULL) - goto out; - - if (! mdiserror(ep, MDE_NO_SET)) - goto out; - - mdclrerror(ep); - - rval = 1; -out: - if (sr != NULL) - free_sr(sr); - Free(setlink); - return (rval); -} - -/* - * Make sure the name of the set is OK. - */ -bool_t -mdrpc_setnameok_common( - mdrpc_sp_args *args, /* device name */ - mdrpc_bool_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - md_error_t *ep = &res->status; - int err; - int op_mode = R_OK; - - /* setup, check permissions */ - if ((err = svc_init(rqstp, op_mode, ep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - if (check_set_lock(op_mode, NULL, ep)) - return (TRUE); - - /* doit */ - res->value = setnameok(args->sp->setname, ep); - - err = svc_fini(ep); - - return (TRUE); -} - -bool_t -mdrpc_setnameok_1_svc( - mdrpc_sp_args *args, /* device name */ - mdrpc_bool_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - (void) memset(res, 0, sizeof (*res)); - return (mdrpc_setnameok_common(args, res, rqstp)); -} - -bool_t -mdrpc_setnameok_2_svc( - mdrpc_sp_2_args *args, /* device name */ - mdrpc_bool_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - (void) memset(res, 0, sizeof (*res)); - switch (args->rev) { - case MD_METAD_ARGS_REV_1: - return (mdrpc_setnameok_common( - &args->mdrpc_sp_2_args_u.rev1, res, rqstp)); - default: - return (FALSE); - } -} - -/* - * determine if the setnumber we want to share is in use. - */ -bool_t -mdrpc_setnumbusy_common( - mdrpc_setno_args *args, - mdrpc_bool_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - md_error_t *ep = &res->status; - md_set_record *sr = NULL; - int err; - int op_mode = R_OK; - - /* setup, check permissions */ - if ((err = svc_init(rqstp, op_mode, ep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - if (check_set_lock(op_mode, NULL, ep)) - return (TRUE); - - /* doit */ - if ((sr = getsetbynum(args->setno, ep)) != NULL) { - res->value = TRUE; - free_sr(sr); - return (TRUE); - } - res->value = FALSE; - if (mdiserror(ep, MDE_NO_SET)) - mdclrerror(ep); - - err = svc_fini(ep); - - return (TRUE); -} - -bool_t -mdrpc_setnumbusy_1_svc( - mdrpc_setno_args *args, - mdrpc_bool_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - (void) memset(res, 0, sizeof (*res)); - return (mdrpc_setnumbusy_common(args, res, rqstp)); -} - -bool_t -mdrpc_setnumbusy_2_svc( - mdrpc_setno_2_args *args, - mdrpc_bool_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - (void) memset(res, 0, sizeof (*res)); - switch (args->rev) { - case MD_METAD_ARGS_REV_1: - return (mdrpc_setnumbusy_common( - &args->mdrpc_setno_2_args_u.rev1, res, rqstp)); - default: - return (FALSE); - } -} - -static void -stimeout( - mdsetname_t *sp, - mhd_mhiargs_t *mhiargsp, - int version, /* RPC version of calling routine */ - md_error_t *ep -) -{ - mddb_userreq_t req; - md_set_record *sr; - - if ((sr = getsetbyname(sp->setname, ep)) == NULL) - return; - - sr->sr_mhiargs = *mhiargsp; - - (void) memset(&req, '\0', sizeof (req)); - - METAD_SETUP_SR(MD_DB_SETDATA, sr->sr_selfid) - /* Do MN operation if rpc version supports it and if a MN set */ - if ((version != METAD_VERSION) && (MD_MNSET_REC(sr))) { - req.ur_size = sizeof (struct md_mnset_record); - } else { - req.ur_size = sizeof (*sr); - } - req.ur_data = (uintptr_t)sr; - - /* - * Cluster nodename support - * Convert nodename -> nodeid - * Don't do this for MN disksets since we've already stored - * both the nodeid and name. - */ - if ((version == METAD_VERSION) || - ((version == METAD_VERSION_DEVID) && (!(MD_MNSET_REC(sr))))) - sdssc_cm_sr_nm2nid(sr); - - if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) { - (void) mdstealerror(ep, &req.ur_mde); - return; - } - - (void) memset(&req, '\0', sizeof (req)); - METAD_SETUP_SR(MD_DB_COMMIT_ONE, sr->sr_selfid) - if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) - (void) mdstealerror(ep, &req.ur_mde); - - /* - * Cluster nodename support - * Convert nodeid -> nodename - * Don't do this for MN disksets since we've already stored - * both the nodeid and name. - */ - if ((version == METAD_VERSION) || - ((version == METAD_VERSION_DEVID) && (!(MD_MNSET_REC(sr))))) - sdssc_cm_sr_nid2nm(sr); - - free_sr(sr); -} - -/* - * Set MH ioctl timeout values. - */ -bool_t -mdrpc_stimeout_common( - mdrpc_stimeout_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp, /* RPC stuff */ - int version /* RPC version */ -) -{ - md_error_t *ep = &res->status; - int err; - int op_mode = W_OK; - - /* setup, check permissions */ - if ((err = svc_init(rqstp, op_mode, ep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - if (check_set_lock(op_mode, NULL, ep)) - return (TRUE); - - /* doit */ - stimeout(args->sp, args->mhiargsp, version, ep); - - err = svc_fini(ep); - - return (TRUE); -} - -bool_t -mdrpc_stimeout_1_svc( - mdrpc_stimeout_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - (void) memset(res, 0, sizeof (*res)); - /* Pass RPC version (METAD_VERSION) to common routine */ - return (mdrpc_stimeout_common(args, res, rqstp, METAD_VERSION)); -} - -bool_t -mdrpc_stimeout_2_svc( - mdrpc_stimeout_2_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - (void) memset(res, 0, sizeof (*res)); - switch (args->rev) { - case MD_METAD_ARGS_REV_1: - /* Pass RPC version (METAD_VERSION_DEVID) to common routine */ - return (mdrpc_stimeout_common( - &args->mdrpc_stimeout_2_args_u.rev1, res, - rqstp, METAD_VERSION_DEVID)); - default: - return (FALSE); - } -} - -static void -upd_dr_dbinfo( - mdsetname_t *sp, - md_drive_desc *dd, - md_error_t *ep -) -{ - mdsetname_t *local_sp; - md_set_record *sr; - md_drive_record *dr; - md_drive_desc *p; - mddrivename_t *dn, *dn1; - ddi_devid_t devid_remote = NULL; - ddi_devid_t devid_local = NULL; - int devid_same = -1; - side_t sideno; - int using_devid = 0; - - if ((local_sp = metasetname(sp->setname, ep)) == NULL) - return; - - metaflushsetname(local_sp); - - if ((sideno = getmyside(local_sp, ep)) == MD_SIDEWILD) - return; - - if ((sr = getsetbyname(sp->setname, ep)) == NULL) - return; - - if (dd->dd_dnp == NULL) - return; - - /* - * The system is either all devid or all - * non-devid so we determine this by looking - * at the first item in the list. - * - * For did disks, the dd_dnp->devid is a valid pointer which - * points to a '' string of devid. We need to check this - * before set the using_devid. - */ - if ((dd->dd_dnp->devid != NULL) && (dd->dd_dnp->devid[0] != '\0') && - (!(MD_MNSET_REC(sr)))) - using_devid = 1; - - for (p = dd; p != NULL; p = p->dd_next) { - dn = p->dd_dnp; - devid_remote = NULL; - - if (dn->devid != NULL && (strlen(dn->devid) != 0) && - using_devid) { - /* - * We have a devid so use it. - */ - (void) devid_str_decode(dn->devid, &devid_remote, NULL); - } - - /* check to make sure using_devid agrees with reality... */ - if ((using_devid == 1) && (devid_remote == NULL)) { - /* something went really wrong. Can't process */ - (void) mddserror(ep, MDE_DS_INVALIDDEVID, sp->setno, - mynode(), dn->cname, sp->setname); - return; - } - - for (dr = sr->sr_drivechain; dr; dr = dr->dr_next) { - devid_same = -1; - - dn1 = metadrivename_withdrkey(local_sp, sideno, - dr->dr_key, MD_BASICNAME_OK, ep); - - if (dn1 == NULL) { - if (devid_remote) - devid_free(devid_remote); - goto out; - } - - if (dn1->devid != NULL && using_devid) { - if (devid_str_decode(dn1->devid, &devid_local, - NULL) == 0) { - devid_same = devid_compare(devid_remote, - devid_local); - devid_free(devid_local); - } - } - - if (using_devid && devid_same == 0) - break; - - if (!using_devid && - strcmp(dn->cname, dn1->cname) == 0) - break; - } - - if (dr) { - /* Adjust the fields in the copy */ - dr->dr_dbcnt = p->dd_dbcnt; - dr->dr_dbsize = p->dd_dbsize; - } - if (devid_remote) - devid_free(devid_remote); - } - - -out: - commitset(sr, FALSE, ep); - free_sr(sr); -} - -/* - * update the database count and size field of drive records. - */ -bool_t -mdrpc_upd_dr_dbinfo_common( - mdrpc_drives_2_args_r1 *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - md_error_t *ep = &res->status; - int err; - int op_mode = W_OK; - - /* setup, check permissions */ - if ((err = svc_init(rqstp, op_mode, ep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - if (check_set_lock(op_mode, args->cl_sk, ep)) - return (TRUE); - - /* doit */ - upd_dr_dbinfo(args->sp, args->drivedescs, ep); - - err = svc_fini(ep); - - return (TRUE); -} - -/* - * version 1 of the remote procedure. This procedure is called if the - * client is running in version 1. We first convert version 1 arguments - * into version 2 arguments and then call the common remote procedure. - */ -bool_t -mdrpc_upd_dr_dbinfo_1_svc( - mdrpc_drives_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - bool_t retval; - mdrpc_drives_2_args_r1 v2_args; - - /* allocate memory */ - alloc_newdrvdesc(args->drivedescs, &v2_args.drivedescs); - (void) memset(res, 0, sizeof (*res)); - - /* build args */ - v2_args.cl_sk = args->cl_sk; - v2_args.sp = args->sp; - /* convert v1 args to v2 (revision 1) args */ - meta_conv_drvdesc_old2new(args->drivedescs, v2_args.drivedescs); - v2_args.timestamp = args->timestamp; - v2_args.genid = args->genid; - - retval = mdrpc_upd_dr_dbinfo_common(&v2_args, res, rqstp); - - free_newdrvdesc(v2_args.drivedescs); - - return (retval); -} - -bool_t -mdrpc_upd_dr_dbinfo_2_svc( - mdrpc_drives_2_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - (void) memset(res, 0, sizeof (*res)); - switch (args->rev) { - case MD_METAD_ARGS_REV_1: - return (mdrpc_upd_dr_dbinfo_common( - &args->mdrpc_drives_2_args_u.rev1, res, rqstp)); - default: - return (FALSE); - } -} - -static void -upd_dr_flags( - mdsetname_t *sp, - md_drive_desc *dd, - uint_t new_flags, - md_error_t *ep -) -{ - mdsetname_t *local_sp; - md_set_record *sr; - md_drive_record *dr; - md_drive_desc *p; - mddrivename_t *dn, *dn1; - ddi_devid_t devid_remote = NULL; - ddi_devid_t devid_local = NULL; - int devid_same = -1; - side_t sideno; - int using_devid = 0; - - if ((local_sp = metasetname(sp->setname, ep)) == NULL) - return; - - metaflushsetname(local_sp); - - if ((sideno = getmyside(local_sp, ep)) == MD_SIDEWILD) - return; - - if ((sr = getsetbyname(sp->setname, ep)) == NULL) - return; - - if (dd->dd_dnp == NULL) - return; - - /* - * The system is either all devid or all - * non-devid so we determine this by looking - * at the first item in the list. - * - * For did disks, the dd_dnp->devid is a valid pointer which - * points to a '' string of devid. We need to check this - * before set the using_devid. - */ - if ((dd->dd_dnp->devid != NULL) && (dd->dd_dnp->devid[0] != '\0') && - (!(MD_MNSET_REC(sr)))) - using_devid = 1; - - for (p = dd; p != NULL; p = p->dd_next) { - dn = p->dd_dnp; - devid_remote = NULL; - - if (dn->devid != NULL && (strlen(dn->devid) != 0) && - using_devid) { - /* - * We have a devid so use it. - */ - (void) devid_str_decode(dn->devid, &devid_remote, NULL); - } - - /* check to make sure using_devid agrees with reality... */ - if ((using_devid == 1) && (devid_remote == NULL)) { - /* something went really wrong. Can't process */ - (void) mddserror(ep, MDE_DS_INVALIDDEVID, sp->setno, - mynode(), dn->cname, sp->setname); - return; - } - - for (dr = sr->sr_drivechain; dr; dr = dr->dr_next) { - devid_same = -1; - - dn1 = metadrivename_withdrkey(local_sp, sideno, - dr->dr_key, MD_BASICNAME_OK, ep); - - if (dn1 == NULL) { - if (devid_remote) - devid_free(devid_remote); - goto out; - } - - if (dn1->devid != NULL && using_devid) { - if (devid_str_decode(dn1->devid, - &devid_local, NULL) == 0) { - devid_same = devid_compare(devid_remote, - devid_local); - devid_free(devid_local); - } - } - - if (using_devid && devid_same == 0) - break; - - if (!using_devid && - strcmp(dn->cname, dn1->cname) == 0) - break; - } - - if (dr) - dr->dr_flags = new_flags; - if (devid_remote) - devid_free(devid_remote); - } -out: - commitset(sr, TRUE, ep); - free_sr(sr); -} - -/* - * update the database count and size field of drive records. - */ -bool_t -mdrpc_upd_dr_flags_common( - mdrpc_upd_dr_flags_2_args_r1 *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - md_error_t *ep = &res->status; - int err; - int op_mode = W_OK; - - /* setup, check permissions */ - if ((err = svc_init(rqstp, op_mode, ep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - if (check_set_lock(op_mode, args->cl_sk, ep)) - return (TRUE); - - /* doit */ - upd_dr_flags(args->sp, args->drivedescs, args->new_flags, ep); - - err = svc_fini(ep); - - return (TRUE); -} - -/* - * version 1 of the remote procedure. This procedure is called if the - * client is running in version 1. We first convert version 1 arguments - * into version 2 arguments and then call the common remote procedure. - */ -bool_t -mdrpc_upd_dr_flags_1_svc( - mdrpc_upd_dr_flags_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - bool_t retval; - mdrpc_upd_dr_flags_2_args_r1 v2_args; - - /* allocate memory */ - alloc_newdrvdesc(args->drivedescs, &v2_args.drivedescs); - (void) memset(res, 0, sizeof (*res)); - - /* build args */ - v2_args.cl_sk = args->cl_sk; - v2_args.sp = args->sp; - /* convert v1 args to v2 (revision 1) args */ - meta_conv_drvdesc_old2new(args->drivedescs, v2_args.drivedescs); - v2_args.new_flags = args->new_flags; - - retval = mdrpc_upd_dr_flags_common(&v2_args, res, rqstp); - - free_newdrvdesc(v2_args.drivedescs); - - return (retval); -} - -bool_t -mdrpc_upd_dr_flags_2_svc( - mdrpc_upd_dr_flags_2_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - (void) memset(res, 0, sizeof (*res)); - switch (args->rev) { - case MD_METAD_ARGS_REV_1: - return (mdrpc_upd_dr_flags_common( - &args->mdrpc_upd_dr_flags_2_args_u.rev1, res, rqstp)); - default: - return (FALSE); - } -} - -static void -upd_sr_flags( - mdsetname_t *sp, - uint_t new_flags, - md_error_t *ep -) -{ - md_set_record *sr; - - if ((sr = getsetbyname(sp->setname, ep)) == NULL) - return; - - sr->sr_flags = new_flags; - commitset(sr, TRUE, ep); - free_sr(sr); -} - -/* - * update the set record flags - */ -bool_t -mdrpc_upd_sr_flags_common( - mdrpc_upd_sr_flags_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - md_error_t *ep = &res->status; - int err; - int op_mode = W_OK; - - /* setup, check permissions */ - if ((err = svc_init(rqstp, op_mode, ep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - if (check_set_lock(op_mode, args->cl_sk, ep)) - return (TRUE); - - /* doit */ - upd_sr_flags(args->sp, args->new_flags, ep); - - err = svc_fini(ep); - - return (TRUE); -} - -bool_t -mdrpc_upd_sr_flags_1_svc( - mdrpc_upd_sr_flags_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - (void) memset(res, 0, sizeof (*res)); - return (mdrpc_upd_sr_flags_common(args, res, rqstp)); -} - -bool_t -mdrpc_upd_sr_flags_2_svc( - mdrpc_upd_sr_flags_2_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - (void) memset(res, 0, sizeof (*res)); - switch (args->rev) { - case MD_METAD_ARGS_REV_1: - return (mdrpc_upd_sr_flags_common( - &args->mdrpc_upd_sr_flags_2_args_u.rev1, res, rqstp)); - default: - return (FALSE); - } -} - -/* - * upd_nr_flags updates the node records stored in this node's local mddb - * given a node desciptor list and an action. upd_nr_flags then commits - * the node records to the local mddb. - * - * nd - A linked list of node descriptors that describes the node records - * in this diskset on which the action applies. - * flag_action: action to be taken on node records that match the nd list. - * flag_action can be: - * MD_NR_JOIN: set OWN flag in node records - * MD_NR_WITHDRAW: reset OWN flag in node records - * MD_NR_OK: reset ADD flags and set OK flag in node records - * MD_NR_SET: set node record flags based on flags stored in nd - * - * Typically, the JOIN, WITHDRAW and OK flag_actions are used when setting - * all nodes in a diskset to JOIN (add first disk to set), WITHDRAW - * (remove last disk from set) or OK (after addition of host to set). - * - * The SET flag_action is typically used when nodelist contains all nodes - * in the diskset, but specific nodes have had flag changes. An example of - * this would be the join/withdraw of a specific node to/from the set. - * - * Ignore the MD_MN_NODE_RB_JOIN flag if set in node record flag. This - * flag is used by the client to recover in case of failure and should not - * be set in the node record flags. - */ -static void -upd_nr_flags( - mdsetname_t *sp, - md_mnnode_desc *nd, - uint_t flag_action, - md_error_t *ep -) -{ - mdsetname_t *local_sp; - md_set_record *sr; - md_mnset_record *mnsr; - md_mnnode_desc *ndp; - md_mnnode_record *nrp; - - if ((local_sp = metasetname(sp->setname, ep)) == NULL) - return; - - metaflushsetname(local_sp); - - if ((sr = getsetbyname(sp->setname, ep)) == NULL) - return; - - if (!(MD_MNSET_REC(sr))) { - return; - } - mnsr = (struct md_mnset_record *)sr; - - switch (flag_action) { - case MD_NR_JOIN: - case MD_NR_WITHDRAW: - case MD_NR_SET: - case MD_NR_OK: - case MD_NR_DEL: - break; - default: - return; - } - - for (ndp = nd; ndp != NULL; ndp = ndp->nd_next) { - /* Find matching node record for given node descriptor */ - for (nrp = mnsr->sr_nodechain; nrp != NULL; - nrp = nrp->nr_next) { - if (ndp->nd_nodeid == nrp->nr_nodeid) { - switch (flag_action) { - case MD_NR_JOIN: - nrp->nr_flags |= MD_MN_NODE_OWN; - break; - case MD_NR_WITHDRAW: - nrp->nr_flags &= ~MD_MN_NODE_OWN; - break; - case MD_NR_OK: - nrp->nr_flags &= - ~(MD_MN_NODE_ADD | MD_MN_NODE_DEL); - nrp->nr_flags |= MD_MN_NODE_OK; - break; - case MD_NR_DEL: - nrp->nr_flags &= - ~(MD_MN_NODE_OK | MD_MN_NODE_ADD); - nrp->nr_flags |= MD_MN_NODE_DEL; - break; - case MD_NR_SET: - /* Do not set RB_JOIN flag */ - nrp->nr_flags = - ndp->nd_flags & ~MD_MN_NODE_RB_JOIN; - break; - } - break; - } - } - } -out: - /* Don't increment set genid for node record flag update */ - commitset(sr, FALSE, ep); - free_sr(sr); -} - -/* - * init/fini wrapper around upd_nr_flags - */ -bool_t -mdrpc_upd_nr_flags_common( - mdrpc_upd_nr_flags_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - md_error_t *ep = &res->status; - int err; - int op_mode = W_OK; - - /* setup, check permissions */ - if ((err = svc_init(rqstp, op_mode, ep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - /* - * During reconfig, node record flags can be updated without - * locking first. - */ - if (!(args->flags & MNSET_IN_RECONFIG)) { - if (check_set_lock(op_mode, args->cl_sk, ep)) - return (TRUE); - } - - /* doit */ - upd_nr_flags(args->sp, args->nodedescs, args->flag_action, ep); - - err = svc_fini(ep); - - return (TRUE); -} - -/* - * update the node records using given flag action. - */ -bool_t -mdrpc_upd_nr_flags_2_svc( - mdrpc_upd_nr_flags_2_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - (void) memset(res, 0, sizeof (*res)); - switch (args->rev) { - case MD_METAD_ARGS_REV_1: - return (mdrpc_upd_nr_flags_common( - &args->mdrpc_upd_nr_flags_2_args_u.rev1, res, rqstp)); - default: - return (FALSE); - } -} - -void -free_sk(md_setkey_t *skp) -{ - Free(skp->sk_setname); - Free(skp->sk_host); - Free(skp); -} - -void -del_sk(set_t setno) -{ - md_setkey_t *skp; - md_setkey_t *tskp; - - for (skp = tskp = my_svc_sk; skp; tskp = skp, skp = skp->sk_next) { - if (setno == skp->sk_setno) { - if (skp == my_svc_sk) - my_svc_sk = skp->sk_next; - else - tskp->sk_next = skp->sk_next; - - Free(skp->sk_setname); - Free(skp->sk_host); - Free(skp); - break; - } - } -} - -md_setkey_t * -dupsk(md_setkey_t *skp) -{ - md_setkey_t *tskp; - - tskp = Zalloc(sizeof (md_setkey_t)); - - *tskp = *skp; - tskp->sk_host = Strdup(skp->sk_host); - tskp->sk_setname = Strdup(skp->sk_setname); - - return (tskp); -} - -md_setkey_t * -svc_get_setkey(set_t setno) -{ - md_setkey_t *skp; - - for (skp = my_svc_sk; skp != NULL; skp = skp->sk_next) - if (setno == skp->sk_setno) - return (dupsk(skp)); - return (NULL); -} - -void -svc_set_setkey(md_setkey_t *svc_sk) -{ - md_setkey_t *skp; - - if (my_svc_sk == NULL) { - my_svc_sk = dupsk(svc_sk); - return; - } - - for (skp = my_svc_sk; skp->sk_next != NULL; skp = skp->sk_next) - assert(svc_sk->sk_setno != skp->sk_setno); - - skp->sk_next = dupsk(svc_sk); -} - -/* - * Unlock the set - * - * To unlock the set, the user must have the correct key, once this is verified - * the set is unlocked and the cached information for the set is flushed. - */ -bool_t -mdrpc_unlock_set_common( - mdrpc_null_args *args, - mdrpc_setlock_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - md_error_t *ep = &res->status; - int err; - int op_mode = W_OK; - md_setkey_t *svc_skp; - md_set_desc *sd; - mdsetname_t *sp; - int multi_node = 0; - md_error_t xep = mdnullerror; - - /* setup, check permissions */ - (void) memset(res, 0, sizeof (*res)); - if ((err = svc_init(rqstp, op_mode, ep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - /* - * Is diskset a MN diskset? - * Don't set error from this check since unlock set can be - * called after a set has been deleted. - */ - if (((sp = metasetnosetname(args->cl_sk->sk_setno, &xep)) != NULL) && - ((sd = metaget_setdesc(sp, &xep)) != NULL)) { - if ((MD_MNSET_DESC(sd))) { - multi_node = 1; - } - } - - /* Get the set key, if any */ - svc_skp = svc_get_setkey(args->cl_sk->sk_setno); - - /* The set is locked */ - if (svc_skp != NULL) { - - /* Make sure the opener has the right key. */ - if (args->cl_sk->sk_key.tv_sec != svc_skp->sk_key.tv_sec || - args->cl_sk->sk_key.tv_usec != svc_skp->sk_key.tv_usec) { - (void) mddserror(ep, MDE_DS_ULKSBADKEY, - svc_skp->sk_setno, mynode(), svc_skp->sk_host, - svc_skp->sk_setname); - free_sk(svc_skp); - return (TRUE); - } - - /* Unlock the set */ - del_sk(args->cl_sk->sk_setno); - - /* Cleanup */ - free_sk(svc_skp); - - goto out; - } - - - /* - * It is possible on a MN diskset to attempt to unlock a set that - * is unlocked. This could occur when the metaset or metadb command - * is failing due to another metaset or metadb command running. - * So, print no warning for MN disksets. - */ - if (multi_node == 0) { - md_eprintf("Warning: set unlocked when unlock_set called!\n"); - } - -out: - res->cl_sk = svc_get_setkey(args->cl_sk->sk_setno); - - /* Flush the set cache */ - sr_cache_flush_setno(args->cl_sk->sk_setno); - - return (TRUE); -} - -bool_t -mdrpc_unlock_set_1_svc( - mdrpc_null_args *args, - mdrpc_setlock_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - return (mdrpc_unlock_set_common(args, res, rqstp)); -} - -bool_t -mdrpc_unlock_set_2_svc( - mdrpc_null_args *args, - mdrpc_setlock_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - return (mdrpc_unlock_set_common(args, res, rqstp)); -} - -/* - * Lock the set - * - * If the user does not hand us a key, then we generate a new key and lock the - * set using this new key that was generated, if the user hands us a key then - * we use the key to lock the set. - */ -bool_t -mdrpc_lock_set_common( - mdrpc_null_args *args, - mdrpc_setlock_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - md_error_t *ep = &res->status; - int err; - md_error_t xep = mdnullerror; - int op_mode = W_OK; - md_setkey_t *svc_skp; - md_setkey_t new_sk; - md_set_desc *sd = NULL; - mdsetname_t *sp = NULL; - - /* setup, check permissions */ - (void) memset(res, 0, sizeof (*res)); - if ((err = svc_init(rqstp, op_mode, ep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - svc_skp = svc_get_setkey(args->cl_sk->sk_setno); - - /* The set is locked */ - if (svc_skp != NULL) { - - /* - * This lock request could be for a new diskset, as - * such metasetnosetname() may not return anything - * useful. Only call it if there is already a key. - */ - if ((sp = metasetnosetname(args->cl_sk->sk_setno, ep)) - != NULL) { - sd = metaget_setdesc(sp, ep); - } - - /* - * meta_lock() provides local locking for non-MN - * disksets. The local lock is held before we call - * this RPC function. We should not receive a lock - * request from the host which owns the lock. If we - * do, release the lock. - */ - if (!((sd != NULL) && (MD_MNSET_DESC(sd))) && - (strcmp(svc_skp->sk_host, args->cl_sk->sk_host) == 0)) { - md_eprintf( - "Warning: set locked when lock_set called!\n"); - - md_eprintf("Held lock info:\n"); - - md_eprintf("\tLock:\n"); - md_eprintf("\t\tSetname: %s\n", svc_skp->sk_setname); - md_eprintf("\t\tSetno: %d\n", svc_skp->sk_setno); - md_eprintf("\t\tHost: %s\n", svc_skp->sk_host); - md_eprintf("\t\tKey: %d/%d %s\n", - svc_skp->sk_key.tv_sec, svc_skp->sk_key.tv_usec, - ctime((const time_t *)&svc_skp->sk_key.tv_sec)); - - /* Unlock set */ - del_sk(svc_skp->sk_setno); - free_sk(svc_skp); - svc_skp = NULL; - - md_eprintf("Released lock held by requesting host\n"); - } - } - - /* The set is unlocked */ - if (svc_skp == NULL) { - /* If we have been given a key, use it. */ - if (args->cl_sk->sk_key.tv_sec || args->cl_sk->sk_key.tv_usec) { - svc_set_setkey(args->cl_sk); - res->cl_sk = svc_get_setkey(args->cl_sk->sk_setno); - goto out; - } - - /* We need to lock it, with a new key */ - new_sk = *args->cl_sk; - if (meta_gettimeofday(&new_sk.sk_key) == -1) { - (void) mdsyserror(ep, errno, "meta_gettimeofday()"); - mde_perror(&xep, ""); - md_exit(NULL, 1); - } - svc_set_setkey(&new_sk); - - res->cl_sk = svc_get_setkey(args->cl_sk->sk_setno); - goto out; - } - - /* - * If a MN diskset, the lock_set routine is used as a locking - * mechanism to keep multiple metaset and/or metadb commads - * from interfering with each other. If two metaset/metadb - * commands are issued at the same time - one will complete - * and the other command will fail with MDE_DS_NOTNOW_CMD. - */ - if ((sd != NULL) && MD_MNSET_DESC(sd)) { - (void) mddserror(ep, MDE_DS_NOTNOW_CMD, - svc_skp->sk_setno, mynode(), - svc_skp->sk_host, svc_skp->sk_setname); - goto out; - } - - md_eprintf("Warning: set locked when lock_set called!\n"); - - md_eprintf("Lock info:\n"); - - md_eprintf("\tLock(svc):\n"); - md_eprintf("\t\tSetname: %s\n", svc_skp->sk_setname); - md_eprintf("\t\tSetno: %d\n", svc_skp->sk_setno); - md_eprintf("\t\tHost: %s\n", svc_skp->sk_host); - md_eprintf("\t\tKey: %d/%d %s", - svc_skp->sk_key.tv_sec, svc_skp->sk_key.tv_usec, - ctime((const time_t *)&svc_skp->sk_key.tv_sec)); - - md_eprintf("\tLock(cl):\n"); - md_eprintf("\t\tSetname: %s\n", args->cl_sk->sk_setname); - md_eprintf("\t\tSetno: %d\n", args->cl_sk->sk_setno); - md_eprintf("\t\tHost: %s\n", args->cl_sk->sk_host); - md_eprintf("\t\tKey: %d/%d %s", - args->cl_sk->sk_key.tv_sec, args->cl_sk->sk_key.tv_usec, - ctime((const time_t *)&args->cl_sk->sk_key.tv_sec)); - - /* The set is locked, do we have the key? */ - if (args->cl_sk->sk_key.tv_sec == svc_skp->sk_key.tv_sec && - args->cl_sk->sk_key.tv_usec == svc_skp->sk_key.tv_usec) { - res->cl_sk = svc_get_setkey(args->cl_sk->sk_setno); - goto out; - } - - /* - * The set is locked and we do not have the key, so we set up an error. - */ - (void) mddserror(ep, MDE_DS_LKSBADKEY, svc_skp->sk_setno, mynode(), - svc_skp->sk_host, args->cl_sk->sk_setname); - -out: - if (svc_skp != NULL) - free_sk(svc_skp); - - /* Flush the set cache */ - sr_cache_flush_setno(args->cl_sk->sk_setno); - - return (TRUE); -} - -bool_t -mdrpc_lock_set_1_svc( - mdrpc_null_args *args, - mdrpc_setlock_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - return (mdrpc_lock_set_common(args, res, rqstp)); -} - -bool_t -mdrpc_lock_set_2_svc( - mdrpc_null_args *args, - mdrpc_setlock_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - return (mdrpc_lock_set_common(args, res, rqstp)); -} - -static void -updmeds( - char *setname, - md_h_arr_t *medp, - int version, /* RPC version of calling routine */ - md_error_t *ep -) -{ - mddb_userreq_t req; - md_set_record *sr; - mddb_med_parm_t mp; - - if ((sr = getsetbyname(setname, ep)) == NULL) - return; - - sr->sr_med = *medp; /* structure assignment */ - - (void) memset(&req, '\0', sizeof (req)); - - METAD_SETUP_SR(MD_DB_SETDATA, sr->sr_selfid) - /* Do MN operation if rpc version supports it and if a MN set */ - if ((version != METAD_VERSION) && (MD_MNSET_REC(sr))) { - req.ur_size = sizeof (struct md_mnset_record); - } else { - req.ur_size = sizeof (*sr); - } - req.ur_data = (uintptr_t)sr; - if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) { - (void) mdstealerror(ep, &req.ur_mde); - free_sr(sr); - return; - } - - commitset(sr, TRUE, ep); - - /* - * If a MN disket, send the mediator list to the kernel. - */ - if (MD_MNSET_REC(sr)) { - (void) memset(&mp, '\0', sizeof (mddb_med_parm_t)); - mp.med_setno = sr->sr_setno; - if (meta_h2hi(medp, &mp.med, ep)) { - free_sr(sr); - return; - } - - /* Resolve the IP addresses for the host list */ - if (meta_med_hnm2ip(&mp.med, ep)) { - free_sr(sr); - return; - } - - /* If node not yet joined to set, failure is ok. */ - if (metaioctl(MD_MED_SET_LST, &mp, &mp.med_mde, NULL) != 0) { - if (!mdismddberror(&mp.med_mde, MDE_DB_NOTOWNER)) { - (void) mdstealerror(ep, &mp.med_mde); - } - } - } - free_sr(sr); -} - -/* - * Update the mediator data in the set record - */ -bool_t -mdrpc_updmeds_common( - mdrpc_updmeds_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp, /* RPC stuff */ - int version /* RPC version */ -) -{ - md_error_t *ep = &res->status; - int err; - int op_mode = W_OK; - - /* setup, check permissions */ - if ((err = svc_init(rqstp, op_mode, ep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - if (check_set_lock(op_mode, args->cl_sk, ep)) - return (TRUE); - - /* doit */ - updmeds(args->sp->setname, &args->meds, version, ep); - - err = svc_fini(ep); - - return (TRUE); -} - -bool_t -mdrpc_updmeds_1_svc( - mdrpc_updmeds_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - (void) memset(res, 0, sizeof (*res)); - /* Pass RPC version (METAD_VERSION) to common routine */ - return (mdrpc_updmeds_common(args, res, rqstp, METAD_VERSION)); -} - -bool_t -mdrpc_updmeds_2_svc( - mdrpc_updmeds_2_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - (void) memset(res, 0, sizeof (*res)); - switch (args->rev) { - case MD_METAD_ARGS_REV_1: - /* Pass RPC version (METAD_VERSION_DEVID) to common routine */ - return (mdrpc_updmeds_common( - &args->mdrpc_updmeds_2_args_u.rev1, res, - rqstp, METAD_VERSION_DEVID)); - default: - return (FALSE); - } -} - -/* - * Call routines to suspend, reinit and resume mdcommd. - * Called during metaset and metadb command. - * NOT called during reconfig cycle. - */ -bool_t -mdrpc_mdcommdctl_2_svc( - mdrpc_mdcommdctl_2_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - mdrpc_mdcommdctl_args *args_cc; - md_error_t *ep = &res->status; - int err; - int op_mode = R_OK; - int suspend_ret; - - (void) memset(res, 0, sizeof (*res)); - switch (args->rev) { - case MD_METAD_ARGS_REV_1: - /* setup, check permissions */ - if ((err = svc_init(rqstp, op_mode, ep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - args_cc = &(args->mdrpc_mdcommdctl_2_args_u.rev1); - switch (args_cc->flag_action) { - case COMMDCTL_SUSPEND: - suspend_ret = mdmn_suspend(args_cc->setno, - args_cc->class, 0); - if (suspend_ret != 0) { - (void) mddserror(ep, suspend_ret, - args_cc->setno, mynode(), - NULL, mynode()); - } - break; - case COMMDCTL_RESUME: - if (mdmn_resume(args_cc->setno, - args_cc->class, args_cc->flags, 0)) { - (void) mddserror(ep, - MDE_DS_COMMDCTL_RESUME_FAIL, - args_cc->setno, mynode(), - NULL, mynode()); - } - break; - case COMMDCTL_REINIT: - if (mdmn_reinit_set(args_cc->setno, 0)) { - (void) mddserror(ep, - MDE_DS_COMMDCTL_REINIT_FAIL, - args_cc->setno, mynode(), - NULL, mynode()); - } - break; - } - err = svc_fini(ep); - return (TRUE); - - default: - return (FALSE); - } -} - -/* - * Return TRUE if set is stale. - */ -bool_t -mdrpc_mn_is_stale_2_svc( - mdrpc_setno_2_args *args, - mdrpc_bool_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - md_error_t *ep = &res->status; - mddb_config_t c; - int err; - int op_mode = R_OK; - - (void) memset(res, 0, sizeof (*res)); - (void) memset(&c, 0, sizeof (c)); - switch (args->rev) { - case MD_METAD_ARGS_REV_1: - c.c_id = 0; - c.c_setno = args->mdrpc_setno_2_args_u.rev1.setno; - - /* setup, check permissions */ - (void) memset(res, 0, sizeof (*res)); - if ((err = svc_init(rqstp, op_mode, ep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) { - (void) mdstealerror(ep, &c.c_mde); - return (TRUE); - } - - if (c.c_flags & MDDB_C_STALE) { - res->value = TRUE; - } else { - res->value = FALSE; - } - - err = svc_fini(ep); - return (TRUE); - - default: - return (FALSE); - } -} - -/* - * Clear out all clnt_locks held by all MN disksets. - * This is only used during a reconfig cycle. - */ -/* ARGSUSED */ -int -mdrpc_clr_mnsetlock_2_svc( - mdrpc_null_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - set_t max_sets, setno; - md_error_t *ep = &res->status; - int err; - int op_mode = W_OK; - mdsetname_t *sp; - - /* setup, check permissions */ - (void) memset(res, 0, sizeof (*res)); - - if ((err = svc_init(rqstp, op_mode, ep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - /* - * Walk through all possible disksets. - * For each MN set, delete all keys associated with that set. - */ - if ((max_sets = get_max_sets(ep)) == 0) { - return (TRUE); - } - - /* start walking through all possible disksets */ - for (setno = 1; setno < max_sets; setno++) { - if ((sp = metasetnosetname(setno, ep)) == NULL) { - if (mdiserror(ep, MDE_NO_SET)) { - /* No set for this setno - continue */ - mdclrerror(ep); - continue; - } else { - mde_perror(ep, gettext( - "Unable to get set %s information"), - sp->setname); - mdclrerror(ep); - continue; - } - } - - /* only check multi-node disksets */ - if (!meta_is_mn_set(sp, ep)) { - mdclrerror(ep); - continue; - } - - /* Delete keys associated with rpc.metad clnt_lock */ - del_sk(setno); - } - - *ep = mdnullerror; - - err = svc_fini(ep); - - return (TRUE); -} - -/* - * Get drive desc on this host for given setno. - * This is only used during a reconfig cycle. - * Returns a drive desc structure for the given mdsetname - * from this host. - * - * Returned drive desc structure is partially filled in with - * the drive name but is not filled in with any other strings - * in the drivename structure. - */ -bool_t -mdrpc_getdrivedesc_2_svc( - mdrpc_sp_2_args *args, - mdrpc_getdrivedesc_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - md_drive_desc *dd; - md_error_t *ep = &res->status; - int err; - int op_mode = R_OK; - mdsetname_t *my_sp; - mdrpc_sp_args *args_r1; - - (void) memset(res, 0, sizeof (*res)); - switch (args->rev) { - case MD_METAD_ARGS_REV_1: - /* setup, check permissions */ - if ((err = svc_init(rqstp, op_mode, ep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - /* doit */ - args_r1 = &args->mdrpc_sp_2_args_u.rev1; - if ((my_sp = metasetname(args_r1->sp->setname, ep)) == NULL) - return (TRUE); - - dd = metaget_drivedesc(my_sp, - (MD_BASICNAME_OK | PRINT_FAST), ep); - - res->dd = dd_list_dup(dd); - - err = svc_fini(ep); - - return (TRUE); - default: - return (FALSE); - } -} - -/* - * Update drive records given list from master during reconfig. - * Make this node's list match the master's list which may include - * deleting a drive record that is known by this node and not known - * by the master node. - * - * Sync up the set/node/drive record genids to match the genid - * passed in the dd structure (all genids in this structure - * are the same). - */ -bool_t -mdrpc_upd_dr_reconfig_common( - mdrpc_upd_dr_flags_2_args_r1 *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - md_error_t *ep = &res->status; - int err; - mdsetname_t *local_sp; - md_set_record *sr; - md_mnset_record *mnsr; - md_drive_record *dr, *dr_placeholder = NULL; - md_drive_desc *dd; - mddrivename_t *dn, *dn1; - side_t sideno; - md_mnnode_record *nrp; - int op_mode = W_OK; - int change = 0; - - /* setup, check permissions */ - if ((err = svc_init(rqstp, op_mode, ep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - if ((local_sp = metasetname(args->sp->setname, ep)) == NULL) - return (TRUE); - - metaflushsetname(local_sp); - - if ((sideno = getmyside(local_sp, ep)) == MD_SIDEWILD) - return (TRUE); - - if ((sr = getsetbyname(args->sp->setname, ep)) == NULL) - return (TRUE); - - if (!(MD_MNSET_REC(sr))) { - free_sr(sr); - return (TRUE); - } - - mnsr = (md_mnset_record *)sr; - /* Setup genid on set and node records */ - if (args->drivedescs) { - if (mnsr->sr_genid != args->drivedescs->dd_genid) { - change = 1; - mnsr->sr_genid = args->drivedescs->dd_genid; - } - nrp = mnsr->sr_nodechain; - while (nrp) { - if (nrp->nr_genid != args->drivedescs->dd_genid) { - change = 1; - nrp->nr_genid = args->drivedescs->dd_genid; - } - nrp = nrp->nr_next; - } - } - for (dr = mnsr->sr_drivechain; dr; dr = dr->dr_next) { - dn1 = metadrivename_withdrkey(local_sp, sideno, - dr->dr_key, (MD_BASICNAME_OK | PRINT_FAST), ep); - if (dn1 == NULL) - goto out; - for (dd = args->drivedescs; dd != NULL; dd = dd->dd_next) { - dn = dd->dd_dnp; - /* Found this node's drive rec to match dd */ - if (strcmp(dn->cname, dn1->cname) == 0) - break; - } - - /* - * If drive found in master's list, make slave match master. - * If drive not found in master's list, remove drive. - */ - if (dd) { - if ((dr->dr_flags != dd->dd_flags) || - (dr->dr_genid != dd->dd_genid)) { - change = 1; - dr->dr_flags = dd->dd_flags; - dr->dr_genid = dd->dd_genid; - } - } else { - /* - * Delete entry from linked list. Need to use - * dr_placeholder so that dr->dr_next points to - * the next drive record in the list. - */ - if (dr_placeholder == NULL) { - dr_placeholder = - Zalloc(sizeof (md_drive_record)); - } - dr_placeholder->dr_next = dr->dr_next; - dr_placeholder->dr_key = dr->dr_key; - sr_del_drv(sr, dr->dr_selfid); - (void) del_sideno_sidenm(dr_placeholder->dr_key, - sideno, ep); - change = 1; - dr = dr_placeholder; - } - } -out: - /* If incore records are correct, don't need to write to disk */ - if (change) { - /* Don't increment the genid in commitset */ - commitset(sr, FALSE, ep); - } - free_sr(sr); - - err = svc_fini(ep); - - if (dr_placeholder != NULL) - Free(dr_placeholder); - - return (TRUE); -} - -/* - * Version 2 routine to update this node's drive records based on - * list passed in from master node. - */ -bool_t -mdrpc_upd_dr_reconfig_2_svc( - mdrpc_upd_dr_flags_2_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - (void) memset(res, 0, sizeof (*res)); - switch (args->rev) { - case MD_METAD_ARGS_REV_1: - return (mdrpc_upd_dr_reconfig_common( - &args->mdrpc_upd_dr_flags_2_args_u.rev1, res, rqstp)); - default: - return (FALSE); - } -} - -/* - * reset mirror owner for mirrors owned by deleted - * or withdrawn host(s). Hosts being deleted or - * withdrawn are designated by nodeid since host is - * already deleted or withdrawn from set and may not - * be able to translate between a nodename and a nodeid. - * If an error occurs, ep will be set to that error information. - */ -static void -reset_mirror_owner( - char *setname, - int node_c, - int *node_id, /* Array of node ids */ - md_error_t *ep -) -{ - mdsetname_t *local_sp; - int i; - mdnamelist_t *devnlp = NULL; - mdnamelist_t *p; - mdname_t *devnp = NULL; - md_set_mmown_params_t ownpar_p; - md_set_mmown_params_t *ownpar = &ownpar_p; - char *miscname; - - if ((local_sp = metasetname(setname, ep)) == NULL) - return; - - /* get a list of all the mirrors for current set */ - if (meta_get_mirror_names(local_sp, &devnlp, 0, ep) < 0) - return; - - /* for each mirror */ - for (p = devnlp; (p != NULL); p = p->next) { - devnp = p->namep; - - /* - * we can only do these for mirrors so make sure we - * really have a mirror device and not a softpartition - * imitating one. meta_get_mirror_names seems to think - * softparts on top of a mirror are mirrors! - */ - if ((miscname = metagetmiscname(devnp, ep)) == NULL) - goto out; - if (strcmp(miscname, MD_MIRROR) != 0) - continue; - - (void) memset(ownpar, 0, sizeof (*ownpar)); - ownpar->d.mnum = meta_getminor(devnp->dev); - MD_SETDRIVERNAME(ownpar, MD_MIRROR, local_sp->setno); - - /* get the current owner id */ - if (metaioctl(MD_MN_GET_MM_OWNER, ownpar, ep, - "MD_MN_GET_MM_OWNER") != 0) { - mde_perror(ep, gettext( - "Unable to get mirror owner for %s/%s"), - local_sp->setname, - get_mdname(local_sp, ownpar->d.mnum)); - goto out; - } - - if (ownpar->d.owner == MD_MN_MIRROR_UNOWNED) { - mdclrerror(ep); - continue; - } - /* - * reset owner only if the current owner is - * in the list of nodes being deleted. - */ - for (i = 0; i < node_c; i++) { - if (ownpar->d.owner == node_id[i]) { - if (meta_mn_change_owner(&ownpar, - local_sp->setno, ownpar->d.mnum, - MD_MN_MIRROR_UNOWNED, - MD_MN_MM_ALLOW_CHANGE) == -1) { - mde_perror(ep, gettext( - "Unable to reset mirror owner for" - " %s/%s"), local_sp->setname, - get_mdname(local_sp, - ownpar->d.mnum)); - goto out; - } - break; - } - } - } - -out: - /* cleanup */ - metafreenamelist(devnlp); -} - -/* - * Wrapper routine for reset_mirror_owner. - * Called when hosts are deleted or withdrawn - * in order to reset any mirror owners that are needed. - */ -bool_t -mdrpc_reset_mirror_owner_common( - mdrpc_nodeid_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - md_error_t *ep = &res->status; - int err; - int op_mode = W_OK; - - /* setup, check permissions */ - if ((err = svc_init(rqstp, op_mode, ep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - if (check_set_lock(op_mode, args->cl_sk, ep)) - return (TRUE); - - /* doit */ - reset_mirror_owner(args->sp->setname, args->nodeid.nodeid_len, - args->nodeid.nodeid_val, ep); - - err = svc_fini(ep); - - return (TRUE); -} - -/* - * RPC service routine to reset the mirror owner for mirrors owned - * by the given hosts. Typically, the list of given hosts is a list - * of nodes being deleted or withdrawn from a diskset. - * The given hosts are designated by nodeid since host may - * already be deleted or withdrawn from set and may not - * be able to translate between a nodename and a nodeid. - */ -bool_t -mdrpc_reset_mirror_owner_2_svc( - mdrpc_nodeid_2_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - (void) memset(res, 0, sizeof (*res)); - switch (args->rev) { - case MD_METAD_ARGS_REV_1: - return (mdrpc_reset_mirror_owner_common( - &args->mdrpc_nodeid_2_args_u.rev1, res, - rqstp)); - default: - return (FALSE); - } -} - -/* - * Call routines to suspend and resume I/O for the given diskset(s). - * Called during reconfig cycle. - * Diskset of 0 represents all MN disksets. - */ -bool_t -mdrpc_mn_susp_res_io_2_svc( - mdrpc_mn_susp_res_io_2_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - mdrpc_mn_susp_res_io_args *args_sr; - md_error_t *ep = &res->status; - int err; - int op_mode = R_OK; - - (void) memset(res, 0, sizeof (*res)); - switch (args->rev) { - case MD_METAD_ARGS_REV_1: - /* setup, check permissions */ - if ((err = svc_init(rqstp, op_mode, ep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - args_sr = &(args->mdrpc_mn_susp_res_io_2_args_u.rev1); - switch (args_sr->susp_res_cmd) { - case MN_SUSP_IO: - (void) (metaioctl(MD_MN_SUSPEND_SET, - &args_sr->susp_res_setno, ep, NULL)); - break; - case MN_RES_IO: - (void) (metaioctl(MD_MN_RESUME_SET, - &args_sr->susp_res_setno, ep, NULL)); - break; - } - err = svc_fini(ep); - return (TRUE); - - default: - return (FALSE); - } -} - -/* - * Resnarf a set after it has been imported - */ -bool_t -mdrpc_resnarf_set_2_svc( - mdrpc_setno_2_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - mdrpc_setno_args *setno_args; - md_error_t *ep = &res->status; - int err; - int op_mode = R_OK; - - (void) memset(res, 0, sizeof (*res)); - switch (args->rev) { - case MD_METAD_ARGS_REV_1: - setno_args = &args->mdrpc_setno_2_args_u.rev1; - break; - default: - return (FALSE); - } - - if ((err = svc_init(rqstp, op_mode, ep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - /* do it */ - if (resnarf_set(setno_args->setno, ep) < 0) - return (FALSE); - - err = svc_fini(ep); - return (TRUE); -} - -/* - * Creates a resync thread. - * Always returns true. - */ -bool_t -mdrpc_mn_mirror_resync_all_2_svc( - mdrpc_setno_2_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - md_error_t *ep = &res->status; - mdrpc_setno_args *setno_args; - int err; - int op_mode = R_OK; - - (void) memset(res, 0, sizeof (*res)); - switch (args->rev) { - case MD_METAD_ARGS_REV_1: - /* setup, check permissions */ - if ((err = svc_init(rqstp, op_mode, ep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - setno_args = &args->mdrpc_setno_2_args_u.rev1; - - /* - * Need to invoke a metasync on a node newly added to a set. - */ - (void) meta_mn_mirror_resync_all(&(setno_args->setno)); - - err = svc_fini(ep); - return (TRUE); - - default: - return (FALSE); - } -} - -/* - * Updates ABR state for all softpartitions. Calls meta_mn_sp_update_abr(), - * which forks a daemon process to perform this action. - * Always returns true. - */ -bool_t -mdrpc_mn_sp_update_abr_2_svc( - mdrpc_setno_2_args *args, - mdrpc_generic_res *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - md_error_t *ep = &res->status; - mdrpc_setno_args *setno_args; - int err; - int op_mode = R_OK; - - (void) memset(res, 0, sizeof (*res)); - switch (args->rev) { - case MD_METAD_ARGS_REV_1: - /* setup, check permissions */ - if ((err = svc_init(rqstp, op_mode, ep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - setno_args = &args->mdrpc_setno_2_args_u.rev1; - - (void) meta_mn_sp_update_abr(&(setno_args->setno)); - - err = svc_fini(ep); - return (TRUE); - - default: - return (FALSE); - } -} diff --git a/usr/src/cmd/lvm/rpc.metad/sparc/Makefile b/usr/src/cmd/lvm/rpc.metad/sparc/Makefile deleted file mode 100644 index 62c269a8c935..000000000000 --- a/usr/src/cmd/lvm/rpc.metad/sparc/Makefile +++ /dev/null @@ -1,94 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2003 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# -# Makefile for logical volume management -# - -PROG= rpc.metad - -DERIVED_FILES = \ - metad_svc.c - -OBJECTS= metad_freeresult.o \ - metad_init.o \ - metad_svc_subr.o - -LINTOBJECTS= metad_freeresult.o \ - metad_init.o \ - metad_svc_subr.o - -OBJECTS += $(DERIVED_FILES:.c=.o) - -SRCS = $(OBJECTS:%.o=../%.c) -LINTSRCS = $(LINTOBJECTS:%.o=../%.c) - -POFILES= $(OBJS:%.o=%.po) - -include ../../../Makefile.cmd -include ../../Makefile.lvm - -LDLIBS += -lmeta -lsocket -lnsl -ldevid - -CFLAGS += $(DEFINES) -# -# -lint := LINTFLAGS += -m - -%_svc.c := RPCGENFLAGS += -K -1 - - -.KEEP_STATE: - -%.o: ../%.c - $(COMPILE.c) $< - -all: $(PROG) - -$(PROG): $(OBJECTS) - $(LINK.c) -o $@ $(OBJECTS) $(LDLIBS) - $(POST_PROCESS) - -ROOTUSRSBINPROG=$(PROG:%=$(ROOTUSRSBIN)/%) -install: all $(ROOTUSRSBINPROG) - -catalog: - -cstyle: - ${CSTYLE} ${SRCS} - -lint: - ${LINT.c} $(LINTFLAGS) ${LINTSRCS} - -clean: - ${RM} ${OBJS} ${DERIVED_FILES} *.o - -clobber: clean - $(RM) $(PROG) $(CLOBBERFILES) - -metad_svc.c: $(SRC)/head/metad.x - $(CP) $(SRC)/head/metad.x . - $(RPCGEN) $(RPCGENFLAGS_SERVER) -DDEBUG metad.x -o $@ - ${RM} metad.x diff --git a/usr/src/cmd/lvm/rpc.metamedd/Makefile b/usr/src/cmd/lvm/rpc.metamedd/Makefile deleted file mode 100644 index 4dfafbef5980..000000000000 --- a/usr/src/cmd/lvm/rpc.metamedd/Makefile +++ /dev/null @@ -1,59 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# ident "%Z%%M% %I% %E% SMI" -# -# Makefile for logical volume management -# -# -# cmd/lvm/rpc.metamedd/Makefile - -MANIFEST = metamed.xml - -include ../../Makefile.cmd - -ROOTMANIFESTDIR = $(ROOTSVCNETWORKRPC) - -SUBDIRS= $(MACH) - -all := TARGET = all -install := TARGET = install -clean := TARGET = clean -clobber := TARGET = clobber -lint := TARGET = lint - -.KEEP_STATE: - -all install clean clobber lint: $(SUBDIRS) - -install: $(SUBDIRS) $(ROOTMANIFEST) - -check: $(CHKMANIFEST) - -$(SUBDIRS): FRC - @cd $@; pwd; $(MAKE) $(TARGET) - -FRC: - -include ../../Makefile.targ diff --git a/usr/src/cmd/lvm/rpc.metamedd/Makefile.com b/usr/src/cmd/lvm/rpc.metamedd/Makefile.com deleted file mode 100644 index 08d37bdbd21f..000000000000 --- a/usr/src/cmd/lvm/rpc.metamedd/Makefile.com +++ /dev/null @@ -1,107 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2007 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# ident "%Z%%M% %I% %E% SMI" -# -# Architecture independent makefile for rpc.metamedd -# -# cmd/lvm/rpc.metamedd/Makefile.com - -PROG = rpc.metamedd - -RPC_DIR = $(SRC)/uts/common/sys/lvm - -RPC_OBJS = \ - meta_basic.x \ - metamed.x \ - meta_arr.x - -DERIVED_OBJS = \ - metamed_svc.o \ - metamed_xdr.o \ - meta_basic_xdr.o - -LOCAL_OBJS= \ - med_db.o \ - med_error.o \ - med_freeresult.o \ - med_hash.o \ - med_init.o \ - med_mem.o \ - med_synch.o \ - med_svc_subr.o - -LOCAL_SRCS = $(LOCAL_OBJS:%.o=../%.c) -DERIVED_SRCS = $(DERIVED_OBJS:%.o=%.c) - -include ../../../Makefile.cmd -include ../../Makefile.lvm - -LDLIBS += -lmeta -lsocket -lnsl -LDFLAGS += $(ZINTERPOSE) - -CPPFLAGS += $(DEFINES) - -lint := LINTFLAGS += -m - -metamed_svc.c := RPCGENFLAGS += -K -1 - -.KEEP_STATE: - -%.o: ../%.c - $(COMPILE.c) $< - $(POST_PROCESS_O) - -all: $(PROG) - -$(PROG): $(LOCAL_OBJS) $(DERIVED_OBJS) - $(LINK.c) -o $@ $(LOCAL_OBJS) $(DERIVED_OBJS) $(LDLIBS) - $(POST_PROCESS) - -install: all $(ROOTUSRSBINPROG) - -cstyle: - $(CSTYLE) $(LOCAL_SRCS) - -lint: - $(LINT.c) $(LINTFLAGS) $(LOCAL_SRCS) - -clean: - $(RM) $(DERIVED_SRCS) $(DERIVED_OBJS) $(LOCAL_OBJS) $(RPC_OBJS) - -clobber: clean - $(RM) $(PROG) - -$(RPC_OBJS): $$(@:%=$(RPC_DIR)/%) - $(RM) $@ - $(CP) $(RPC_DIR)/$@ . - -meta_basic_xdr.c: meta_basic.x - $(RPCGEN) $(RPCGENFLAGS) -c meta_basic.x > $@ - -metamed_xdr.c: metamed.x meta_arr.x - $(RPCGEN) $(RPCGENFLAGS) -c metamed.x > $@ - -metamed_svc.c: metamed.x meta_arr.x - $(RPCGEN) $(RPCGENFLAGS_SERVER) metamed.x > $@ diff --git a/usr/src/cmd/lvm/rpc.metamedd/i386/Makefile b/usr/src/cmd/lvm/rpc.metamedd/i386/Makefile deleted file mode 100644 index 3558494b3fcd..000000000000 --- a/usr/src/cmd/lvm/rpc.metamedd/i386/Makefile +++ /dev/null @@ -1,32 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 1996-2002 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# ident "%Z%%M% %I% %E% SMI" -# -# Makefile for logical volume management -# -# cmd/lvm/rpc.metamedd/i386/Makefile - -include ../Makefile.com diff --git a/usr/src/cmd/lvm/rpc.metamedd/med_db.c b/usr/src/cmd/lvm/rpc.metamedd/med_db.c deleted file mode 100644 index 69c1e134ff1b..000000000000 --- a/usr/src/cmd/lvm/rpc.metamedd/med_db.c +++ /dev/null @@ -1,923 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include "med_local.h" -#include "med_hash.h" -#include -#include -#include - -static int med_db_is_inited = 0; -static Cache *med_db_cache = (Cache *) NULL; -static med_rec_t *med_db_medrp = NULL; -static int med_db_nma = 0; -static int med_db_nmu = 0; -static int rec_size = roundup(sizeof (med_rec_t), DEV_BSIZE); -static char *rec_buf = NULL; -static int dbfd = -1; - -#define OP_FLAGS (O_RDWR | O_SYNC) -#define CR_FLAGS (OP_FLAGS | O_CREAT) - -#define HASHSIZE 151 -#define BSZ 4 - -#ifdef DEBUG -void -med_pr(void *keyp, int keyl, void *datap, int datal) -{ - med_med_t *medp = (med_med_t *)keyp; - int medridx = *(int *)datap; - med_rec_t *medrp = &med_db_medrp[medridx]; - - med_eprintf( - "key (%d)[keyp=0x%08x]: setno=%ld, setname=<%s>, caller=<%s>\n", - keyl, (unsigned)keyp, medp->med_setno, medp->med_setname, - medp->med_caller); - med_eprintf("data(%d)[datap=0x%x08][medrp=0x%x08]: medridx=%d\n", - datal, (unsigned)datap, (unsigned)medrp, medridx); -} -#endif /* DEBUG */ - -static int -med_hash(void *datap, int datalen, int hsz) -{ - med_med_t *medp = (med_med_t *)datap; - int i = datalen; - char *cp; - - i = 0; - cp = medp->med_setname; - while (*cp != '\0') - i += *cp++; - - cp = medp->med_caller; - while (*cp != '\0') - i += *cp++; - - i *= medp->med_setno; - - return (i % hsz); -} - -/*ARGSUSED*/ -static int -med_comp(void *datap1, void *datap2, int datalen) -{ - med_med_t *medp1 = (med_med_t *)datap1; - med_med_t *medp2 = (med_med_t *)datap2; - int ret; - - - ret = medp1->med_setno - medp2->med_setno; - - if (ret != 0) - return (ret); - - ret = strcmp(medp1->med_caller, medp2->med_caller); - - if (ret != 0) - return (ret); - - return (strcmp(medp1->med_setname, medp2->med_setname)); -} - -static void -med_kfree(void *keyp) -{ - med_med_t *medp = (med_med_t *)keyp; - - (void) Free(medp->med_caller); - (void) Free(medp->med_setname); - (void) Free(keyp); -} - -static int -add_key(med_med_t *medp, int medridx) -{ - Item *itemp; - int len; - med_med_t *tmedp; - - if (med_db_cache == (Cache *) NULL) { - len = init_cache(&med_db_cache, HASHSIZE, BSZ, med_hash, - med_comp, med_kfree, (void (*)())NULL); - if (len == -1) { - med_eprintf("add_key(): init_cache() failed.\n"); - return (-1); - } - } - - len = sizeof (med_med_t); - - if ((itemp = lookup_cache(med_db_cache, medp, len)) == Null_Item) { - if ((itemp = (Item *) Malloc(sizeof (*itemp))) == NULL) { - med_eprintf("add_key(): itemp = Malloc(%d)\n", - sizeof (*itemp)); - return (-1); - } - if ((tmedp = itemp->key = Malloc(len)) == NULL) { - med_eprintf("add_key(): itemp->key = Malloc(%d)\n", - len); - return (-1); - } - - *tmedp = *medp; /* structure assignment */ - - tmedp->med_caller = Malloc(strlen(medp->med_caller) + 1); - if (tmedp->med_caller == NULL) { - med_eprintf( - "add_key(): tmedp->med_caller = Malloc(%d)\n", - strlen(medp->med_caller) + 1); - return (-1); - } - (void) strcpy(tmedp->med_caller, medp->med_caller); - - tmedp->med_setname = Malloc(strlen(medp->med_setname) + 1); - if (tmedp->med_setname == NULL) { - med_eprintf( - "add_key(): tmedp->med_setname = Malloc(%d)\n", - strlen(medp->med_setname) + 1); - return (-1); - } - (void) strcpy(tmedp->med_setname, medp->med_setname); - - itemp->keyl = len; - - if ((itemp->data = Malloc(sizeof (int))) == NULL) { - med_eprintf("add_key(): itemp->data = Malloc(%d)\n", - sizeof (med_rec_t *)); - return (-1); - } - - *(int *)itemp->data = medridx; - - itemp->datal = sizeof (int); - - if (add_cache(med_db_cache, itemp) == -1) { - med_eprintf("add_key(): add_cache() failed.\n"); - return (-1); - } - return (0); - } - return (1); -} - -static int -del_key(med_med_t *medp) -{ - Item *itemp; - int len; - - if (med_db_cache == (Cache *) NULL) - return (0); - - len = sizeof (med_med_t); - - if ((itemp = lookup_cache(med_db_cache, medp, len)) == Null_Item) - return (0); - - (void) del_cache(med_db_cache, itemp); - - return (0); -} - -static int -find_key(med_med_t *medp) -{ - Item *itemp; - int len; - - if (med_db_cache == (Cache *) NULL) - return (-1); - - len = sizeof (med_med_t); - - if ((itemp = lookup_cache(med_db_cache, medp, len)) == Null_Item) - return (-1); - - return (*(int *)itemp->data); -} - -static int -add_db_keys(int medridx, med_err_t *medep) -{ - med_med_t med; - med_rec_t *medrp; - int i; - - medrp = &med_db_medrp[medridx]; - med.med_setno = medrp->med_rec_sn; - med.med_setname = medrp->med_rec_snm; - - for (i = 0; i < MD_MAXSIDES; i++) { - if (medrp->med_rec_nodes[i][0] == '\0') - continue; - med.med_caller = medrp->med_rec_nodes[i]; - if (add_key(&med, medridx) == -1) - return (med_error(medep, MDE_MED_DBKEYADDFAIL, - medrp->med_rec_nodes[i])); - } - - /* - * Looping through the actual list of mediator hosts - * because a mediator host may not actually be a host - * in the diskset and so access for such a host needs - * to be added. - */ - for (i = 0; i < MED_MAX_HOSTS; i++) { - if ((medrp->med_rec_meds.n_cnt > 0) && - (medrp->med_rec_meds.n_lst[i].a_cnt != 0)) { - med.med_caller = - medrp->med_rec_meds.n_lst[i].a_nm[0]; - if (add_key(&med, medridx) == -1) - return (med_error(medep, MDE_MED_DBKEYADDFAIL, - medrp->med_rec_meds.n_lst[i].a_nm[0])); - } - } - return (0); -} - -static int -del_db_keys(int medridx, med_err_t *medep) -{ - med_med_t med; - med_rec_t *medrp; - int i; - - medrp = &med_db_medrp[medridx]; - med.med_setno = medrp->med_rec_sn; - med.med_setname = medrp->med_rec_snm; - - for (i = 0; i < MD_MAXSIDES; i++) { - if (medrp->med_rec_nodes[i][0] == '\0') - continue; - med.med_caller = medrp->med_rec_nodes[i]; - if (del_key(&med) == -1) - return (med_error(medep, MDE_MED_DBKEYDELFAIL, - medrp->med_rec_nodes[i])); - } - - for (i = 0; i < MED_MAX_HOSTS; i++) { - if ((medrp->med_rec_meds.n_cnt > 0) && - (medrp->med_rec_meds.n_lst[i].a_cnt != 0)) { - med.med_caller = - medrp->med_rec_meds.n_lst[i].a_nm[0]; - if (del_key(&med) == -1) - return (med_error(medep, MDE_MED_DBKEYDELFAIL, - medrp->med_rec_meds.n_lst[i].a_nm[0])); - } - } - return (0); -} - -static int -alloc_rec_buf(med_err_t *medep) -{ - if (rec_buf == NULL) { - if ((rec_buf = Malloc(rec_size)) == NULL) - return (med_error(medep, errno, - "alloc_rec_buf: Malloc()")); - } - - (void) memset(rec_buf, '\0', rec_size); - return (0); -} - -static void -free_rec_buf(void) -{ - if (rec_buf == NULL) - return; - - Free(rec_buf); - rec_buf = NULL; -} - -static int -write_hdr( - int dbfd, - med_err_t *medep -) -{ - med_db_hdr_t dbh; - - if (alloc_rec_buf(medep)) - return (-1); - - (void) memset(&dbh, '\0', sizeof (med_db_hdr_t)); - - /* Setup the new hdr record */ - dbh.med_dbh_mag = MED_DB_MAGIC; - dbh.med_dbh_rev = MED_DB_REV; - dbh.med_dbh_nm = med_db_nmu; - - /* Checksum new header */ - crcgen(&dbh, &dbh.med_dbh_cks, sizeof (med_db_hdr_t), NULL); - - /* Position to the beginning of the file */ - if (lseek(dbfd, 0, SEEK_SET) == -1) - return (med_error(medep, errno, "write_hdr: lseek()")); - - /* Copy the header into the output buffer */ - (void) memmove(rec_buf, &dbh, sizeof (med_db_hdr_t)); - - /* Write out the header */ - if (write(dbfd, rec_buf, rec_size) == -1) - return (med_error(medep, errno, "write_hdr: write()")); - - return (0); -} - -static int -write_rec( - int dbfd, - med_rec_t *medrp, - med_err_t *medep -) -{ - uint_t save_flags = 0; - uint_t save_cks = 0; - - if (alloc_rec_buf(medep)) - return (-1); - - if (medrp->med_rec_data.med_dat_fl) { - save_flags = medrp->med_rec_data.med_dat_fl; - save_cks = medrp->med_rec_data.med_dat_cks; - medrp->med_rec_data.med_dat_fl = 0; - /* Checksum the new data */ - crcgen(&medrp->med_rec_data, &medrp->med_rec_data.med_dat_cks, - sizeof (med_data_t), NULL); - } - - /* Checksum record */ - crcgen(medrp, &medrp->med_rec_cks, sizeof (med_rec_t), NULL); - - /* Load the record into the output buffer */ - (void) memmove(rec_buf, medrp, sizeof (med_rec_t)); - - if (save_flags) { - medrp->med_rec_data.med_dat_fl = save_flags; - medrp->med_rec_data.med_dat_cks = save_cks; - /* Re-checksum the updated record */ - crcgen(medrp, &medrp->med_rec_cks, sizeof (med_rec_t), NULL); - } - - /* Write out the record */ - if (write(dbfd, rec_buf, rec_size) == -1) - return (med_error(medep, errno, "write_rec: write()")); - - return (0); -} - -static int -open_dbfile(med_err_t *medep) -{ - if (dbfd != -1) - return (0); - - /* Open the database file */ - if ((dbfd = open(MED_DB_FILE, OP_FLAGS, 0644)) == -1) { - if (errno != ENOENT) - return (med_error(medep, errno, "open_dbfile: open()")); - - if ((dbfd = open(MED_DB_FILE, CR_FLAGS, 0644)) == -1) - return (med_error(medep, errno, - "open_dbfile: open(create)")); - } - - /* Try to take an advisory lock on the file */ - if (lockf(dbfd, F_TLOCK, (off_t)0) == -1) { - (void) med_error(medep, errno, "open_dbfile: lockf(F_TLOCK)"); - medde_perror(medep, ""); - med_exit(1); - } - - return (0); -} - -static int -close_dbfile(med_err_t *medep) -{ - if (dbfd == -1) - return (0); - - /* Make sure we are at the beginning of the file */ - if (lseek(dbfd, 0, SEEK_SET) == -1) - return (med_error(medep, errno, "close_dbfile: lseek()")); - - /* Release the advisory lock on the file */ - if (lockf(dbfd, F_ULOCK, 0LL) == -1) { - (void) med_error(medep, errno, "close_dbfile: lockf(F_ULOCK)"); - medde_perror(medep, ""); - med_exit(1); - } - - if (close(dbfd) == -1) - return (med_error(medep, errno, "close_dbfile: close()")); - - dbfd = -1; - - return (0); -} - -static int -med_db_del_rec(med_med_t *medp, med_err_t *medep) -{ - med_rec_t *medrp = NULL; - int i; - int medridx = -1; - - - if (! med_db_is_inited) - return (med_error(medep, MDE_MED_DBNOTINIT, "med_db_del_rec")); - - if ((medridx = find_key(medp)) == -1) - return (0); - - /* Delete the old keys */ - if (del_db_keys(medridx, medep)) - return (-1); - - medrp = &med_db_medrp[medridx]; - - /* Mark the record in core as deleted */ - medrp->med_rec_fl |= MED_RFL_DEL; - - /* Decrement the used slot count */ - med_db_nmu--; - - /* Get ready to re-write the file */ - if (ftruncate(dbfd, 0) == -1) - return (med_error(medep, errno, "med_db_del_rec: ftruncate()")); - - if (write_hdr(dbfd, medep)) - return (-1); - - for (i = 0; i < med_db_nma; i++) { - medrp = &med_db_medrp[i]; - - if (medrp->med_rec_fl & MED_RFL_DEL) - continue; - - /* Determine our location in the file */ - if ((medrp->med_rec_foff = lseek(dbfd, 0, SEEK_CUR)) == -1) - return (med_error(medep, errno, - "med_db_del_rec: lseek()")); - - if (write_rec(dbfd, medrp, medep)) - return (-1); - } - return (0); -} - -static int -cmp_medrec(med_rec_t *omedrp, med_rec_t *nmedrp) -{ - int ret; - int i; - - if (omedrp->med_rec_mag != nmedrp->med_rec_mag) - return (0); - - if (omedrp->med_rec_rev != nmedrp->med_rec_rev) - return (0); - - /* Can't compare checksums, since the new record has no data yet */ - - /* Can't compare flags, since the in-core may have golden */ - - if (omedrp->med_rec_sn != nmedrp->med_rec_sn) - return (0); - - if (strcmp(omedrp->med_rec_snm, nmedrp->med_rec_snm) != 0) - return (0); - - for (i = 0; i < MD_MAXSIDES; i++) { - if (omedrp->med_rec_nodes[i][0] == '\0' && - nmedrp->med_rec_nodes[i][0] == '\0') - continue; - - ret = strcmp(omedrp->med_rec_nodes[i], - nmedrp->med_rec_nodes[i]); - if (ret != 0) - return (0); - } - - ret = memcmp(&omedrp->med_rec_meds, &nmedrp->med_rec_meds, - sizeof (md_h_arr_t)); - if (ret != 0) - return (0); - - return (1); -} - -/* - * Exported routines - */ - -int -med_db_init(med_err_t *medep) -{ - int i; - int err = 0; - int ret; - struct stat statb; - med_db_hdr_t *dbhp; - med_rec_t *medrp; - int nm; - off_t cur_off; - - if (med_db_is_inited) - return (0); - - if (open_dbfile(medep)) - return (-1); - - if (fstat(dbfd, &statb) == -1) - return (med_error(medep, errno, "med_db_init: fstat()")); - - /* Empty file */ - if (statb.st_size == 0) - goto out; - - /* File should be a multiple of the record size */ - if (((int)(statb.st_size % (off_t)rec_size)) != 0) - return (med_error(medep, MDE_MED_DBSZBAD, "med_db_init")); - - if (alloc_rec_buf(medep)) - return (-1); - - /* Read in the file header */ - if ((ret = read(dbfd, rec_buf, rec_size)) == -1) - return (med_error(medep, errno, "med_db_init: read(hdr)")); - - if (ret != rec_size) - return (med_error(medep, MDE_MED_DBHDRSZBAD, "med_db_init")); - - /*LINTED*/ - dbhp = (med_db_hdr_t *)rec_buf; - - /* Header magic is not OK */ - if (dbhp->med_dbh_mag != MED_DB_MAGIC) - return (med_error(medep, MDE_MED_DBHDRMAGBAD, "med_db_init")); - - /* Header revision is not OK */ - if (dbhp->med_dbh_rev != MED_DB_REV) - return (med_error(medep, MDE_MED_DBHDRREVBAD, "med_db_init")); - - /* Header checksum is not OK */ - if (crcchk(dbhp, &dbhp->med_dbh_cks, sizeof (med_db_hdr_t), NULL)) - return (med_error(medep, MDE_MED_DBHDRCKSBAD, "med_db_init")); - - /* File size does not add up */ - if (((off_t)((dbhp->med_dbh_nm * rec_size) + rec_size)) - != statb.st_size) - return (med_error(medep, MDE_MED_DBSZBAD, "med_db_init")); - - if ((nm = dbhp->med_dbh_nm) > 0) { - /* Allocate space to hold the records to be read next */ - med_db_medrp = (med_rec_t *)Calloc(nm, sizeof (med_rec_t)); - if (med_db_medrp == NULL) - return (med_error(medep, errno, - "med_db_init: Calloc(med_db_medrp)")); - } - - /* Read in all the records */ - for (i = 0; i < nm; i++) { - if ((cur_off = lseek(dbfd, 0, SEEK_CUR)) == -1) { - err = med_error(medep, errno, - "med_db_init: lseek()"); - goto out; - } - - (void) memset(rec_buf, '\0', rec_size); - - if ((ret = read(dbfd, rec_buf, rec_size)) == -1) { - err = med_error(medep, errno, - "med_db_init: read() rec"); - goto out; - } - - if (ret != rec_size) { - err = med_error(medep, MDE_MED_DBRECSZBAD, - "med_db_init"); - goto out; - } - - /*LINTED*/ - medrp = (med_rec_t *)rec_buf; - - /* Record magic is not OK */ - if (medrp->med_rec_mag != MED_REC_MAGIC) { - err = med_error(medep, MDE_MED_DBRECMAGBAD, - "med_db_init"); - goto out; - } - - /* Record revision is not OK */ - if (medrp->med_rec_rev != MED_REC_REV) { - err = med_error(medep, MDE_MED_DBRECREVBAD, - "med_db_init"); - goto out; - } - - /* Record checksum is not OK */ - ret = crcchk(medrp, &medrp->med_rec_cks, sizeof (med_rec_t), - NULL); - if (ret) { - err = med_error(medep, MDE_MED_DBRECCKSBAD, - "med_db_init"); - goto out; - } - - /* Record is not where it is supposed to be */ - if (medrp->med_rec_foff != cur_off) { - err = med_error(medep, MDE_MED_DBRECOFFBAD, - "med_db_init"); - goto out; - } - - med_db_medrp[i] = *medrp; /* structure assignment */ - } - - /* Add the keys to access this record */ - for (i = 0; i < nm; i++) - if ((err = add_db_keys(i, medep)) == -1) - goto out; - - med_db_nma = nm; - med_db_nmu = nm; - -out: - if (err && med_db_medrp != NULL) - Free(med_db_medrp); - - if (!err) - med_db_is_inited = 1; - - return (err); -} - -med_rec_t * -med_db_get_rec(med_med_t *medp, med_err_t *medep) -{ - int medridx = -1; - - if ((medridx = find_key(medp)) == -1) { - (void) med_error(medep, MDE_MED_DBRECNOENT, "med_db_get_rec"); - return (NULL); - } - - return (&med_db_medrp[medridx]); -} - -med_data_t * -med_db_get_data(med_med_t *medp, med_err_t *medep) -{ - int medridx = -1; - - if ((medridx = find_key(medp)) == -1) { - (void) med_error(medep, MDE_MED_DBRECNOENT, "med_db_get_data"); - return (NULL); - } - - return (&med_db_medrp[medridx].med_rec_data); -} - -int -med_db_put_rec(med_med_t *medp, med_rec_t *nmedrp, med_err_t *medep) -{ - med_rec_t *medrp = NULL; - med_rec_t *tmedrp = NULL; - int i; - int found = 0; - int medridx = -1; - - - if (! med_db_is_inited) - return (med_error(medep, MDE_MED_DBNOTINIT, "med_db_put_rec")); - - if (medp->med_setno != nmedrp->med_rec_sn) - return (med_error(medep, MDE_MED_DBARGSMISMATCH, - "med_db_put_rec")); - - /* See if we are still considered a mediator - is this a delete? */ - for (i = 0; i < MED_MAX_HOSTS; i++) { - if (nmedrp->med_rec_meds.n_lst[i].a_cnt == 0) - continue; - - if (strcmp(nmedrp->med_rec_meds.n_lst[i].a_nm[0], - mynode()) == 0) { - found = 1; - break; - } - } - - /* If it is a delete, make it happen */ - if (! found) - return (med_db_del_rec(medp, medep)); - - /* See if there is an existing record */ - if ((medridx = find_key(medp)) != -1) { - - medrp = &med_db_medrp[medridx]; - - /* Delete the old keys */ - if (del_db_keys(medridx, medep)) - return (-1); - - /* Decrement the used slot count */ - med_db_nmu--; - } else { - for (i = 0; i < MED_MAX_HOSTS; i++) { - med_med_t tmed; - - if (nmedrp->med_rec_meds.n_lst[i].a_cnt == 0) - continue; - - if (strcmp(nmedrp->med_rec_meds.n_lst[i].a_nm[0], - medp->med_caller) == 0) - continue; - - tmed = *medp; /* structure assignment */ - - tmed.med_caller = - Strdup(nmedrp->med_rec_meds.n_lst[i].a_nm[0]); - - medridx = find_key(&tmed); - - Free(tmed.med_caller); - - if (medridx != -1) { - medrp = &med_db_medrp[medridx]; - - if (cmp_medrec(medrp, nmedrp)) - return (0); - } - } - } - - /* Allocate more space if needed */ - if ((med_db_nmu + 1) > med_db_nma) { - - /* Allocate more space to hold the new record */ - tmedrp = (med_rec_t *)Calloc((med_db_nmu + 1), - sizeof (med_rec_t)); - if (tmedrp == NULL) - return (med_error(medep, errno, - "med_db_put_rec: Re-Calloc(tmedrp)")); - - /* Copy the existing information into the new area */ - for (i = 0; i < med_db_nma; i++) - tmedrp[i] = med_db_medrp[i]; /* structure assignment */ - - med_db_nmu++; - med_db_nma = med_db_nmu; - - if (med_db_medrp) - Free(med_db_medrp); - - med_db_medrp = tmedrp; - - medridx = med_db_nma - 1; - - /* Initialize */ - medrp = &med_db_medrp[medridx]; - medrp->med_rec_mag = MED_REC_MAGIC; - medrp->med_rec_rev = MED_REC_REV; - medrp->med_rec_sn = nmedrp->med_rec_sn; - (void) strcpy(medrp->med_rec_snm, nmedrp->med_rec_snm); - - /* Calculate the record offset */ - medrp->med_rec_foff = (off_t)(((med_db_nma - 1) * rec_size) + - rec_size); - } else { - /* - * We did not find the record, but have space allocated. - * Find an empty slot. - */ - if (medrp == NULL) { - for (i = 0; i < med_db_nma; i++) { - medrp = &med_db_medrp[i]; - - if (! (medrp->med_rec_fl & MED_RFL_DEL)) - continue; - - medridx = i; - - /* Mark as no longer deleted */ - medrp->med_rec_fl &= ~MED_RFL_DEL; - - /* Initialize */ - medrp->med_rec_mag = MED_REC_MAGIC; - medrp->med_rec_rev = MED_REC_REV; - medrp->med_rec_sn = nmedrp->med_rec_sn; - (void) strcpy(medrp->med_rec_snm, - nmedrp->med_rec_snm); - - /* Calculate the new offset of the record */ - medrp->med_rec_foff = (off_t) - ((med_db_nmu * rec_size) + rec_size); - - /* Clear the old data */ - (void) memset(&medrp->med_rec_data, '\0', - sizeof (med_data_t)); - - break; - } - } - med_db_nmu++; - } - - assert(medridx != -1); - - /* Update the record with the new information */ - medrp->med_rec_meds = nmedrp->med_rec_meds; /* structure assignment */ - - for (i = 0; i < MD_MAXSIDES; i++) - (void) strcpy(medrp->med_rec_nodes[i], - nmedrp->med_rec_nodes[i]); - - if (write_hdr(dbfd, medep)) - return (-1); - - /* Position to record location */ - if (lseek(dbfd, medrp->med_rec_foff, SEEK_SET) == -1) - return (med_error(medep, errno, "med_db_put_rec: lseek(rec)")); - - if (write_rec(dbfd, medrp, medep)) - return (-1); - - /* Add the keys for this record */ - if (add_db_keys(medridx, medep)) - return (-1); - - return (0); -} - -int -med_db_put_data(med_med_t *medp, med_data_t *meddp, med_err_t *medep) -{ - med_rec_t *medrp = NULL; - int medridx = -1; - - - if (! med_db_is_inited) - return (med_error(medep, MDE_MED_DBNOTINIT, "med_db_put_data")); - - if (medp->med_setno != meddp->med_dat_sn) - return (med_error(medep, MDE_MED_DBARGSMISMATCH, - "med_db_put_data")); - - if ((medridx = find_key(medp)) == -1) - return (med_error(medep, MDE_MED_DBRECNOENT, - "med_db_put_data")); - - medrp = &med_db_medrp[medridx]; - - medrp->med_rec_data = *meddp; /* structure assignment */ - - /* Go to location of the record */ - if (lseek(dbfd, medrp->med_rec_foff, SEEK_SET) == -1) - return (med_error(medep, errno, "med_db_put_data: lseek()")); - - if (write_rec(dbfd, medrp, medep)) - return (-1); - - return (0); -} - -int -med_db_finit(med_err_t *medep) -{ - des_cache(&med_db_cache); - Free(med_db_medrp); - free_rec_buf(); - if (close_dbfile(medep)) - return (-1); - return (0); -} diff --git a/usr/src/cmd/lvm/rpc.metamedd/med_error.c b/usr/src/cmd/lvm/rpc.metamedd/med_error.c deleted file mode 100644 index 84cf44f3a98c..000000000000 --- a/usr/src/cmd/lvm/rpc.metamedd/med_error.c +++ /dev/null @@ -1,215 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include "med_local.h" - -#include - -/* - * debug stuff - */ -#define MED_DEBUG 0 -#ifdef MED_DEBUG -static int med_debug = MED_DEBUG; -#endif - -/* - * free and clear error - */ -static void -med_clrerror( - med_err_t *medep -) -{ - if (medep->med_node != NULL) - Free(medep->med_node); - if (medep->med_misc != NULL) - Free(medep->med_misc); - (void) memset(medep, 0, sizeof (*medep)); -} - -/* - * Exported Entry Points - */ - -/* - * setup error - */ -int -med_error( - med_err_t *medep, - int errnum, - char *misc -) -{ - med_clrerror(medep); - if (errnum != 0) { - medep->med_errno = errnum; - if (med_debug && misc != NULL) - medep->med_misc = Strdup(misc); - medep->med_node = Strdup(mynode()); - return (-1); - } - return (0); -} - -/* - * med_err_t to string - */ -static char * -med_strerror( - med_err_t *medep -) -{ - static char buf[1024]; - char *p = buf; - char *emsg; - - if (medep->med_errno < 0) { - if ((emsg = med_errnum_to_str(medep->med_errno)) != NULL) - return (emsg); - (void) sprintf(p, - "unknown mediator errno %d\n", medep->med_errno); - return (buf); - } else { - if ((emsg = strerror(medep->med_errno)) != NULL) - return (emsg); - (void) sprintf(p, - "errno %d out of range", medep->med_errno); - return (buf); - } -} - -/* - * printf-like log - */ -static void -med_vprintf( - const char *fmt, - va_list ap -) -{ - if (isatty(fileno(stderr))) { -#ifdef _REENTRANT - static mutex_t stderr_mx = DEFAULTMUTEX; - - med_mx_lock(&stderr_mx); -#endif /* _REENTRANT */ - (void) vfprintf(stderr, fmt, ap); - (void) fflush(stderr); - (void) fsync(fileno(stderr)); -#ifdef _REENTRANT - med_mx_unlock(&stderr_mx); -#endif /* _REENTRANT */ - } - vsyslog(LOG_ERR, fmt, ap); -} - -/*PRINTFLIKE1*/ -void -med_eprintf( - const char *fmt, - ... -) -{ - va_list ap; - - va_start(ap, fmt); - med_vprintf(fmt, ap); - va_end(ap); -} - -/* - * printf-like perror() log - */ -/*PRINTFLIKE2*/ -static void -med_vperror( - med_err_t *medep, - const char *fmt, - va_list ap -) -{ - char buf[1024]; - char *p = buf; - size_t len = sizeof (buf); - int n; - - if ((medep->med_node != NULL) && (medep->med_node[0] != '\0')) { - n = snprintf(p, len, "%s: ", medep->med_node); - p += n; - len -= n; - } - if ((medep->med_misc != NULL) && (medep->med_misc[0] != '\0')) { - n = snprintf(p, len, "%s: ", medep->med_misc); - p += n; - len -= n; - } - if ((fmt != NULL) && (*fmt != '\0')) { - n = vsnprintf(p, len, fmt, ap); - p += n; - len -= n; - n = snprintf(p, len, ": "); - p += n; - len -= n; - } - (void) snprintf(p, len, "%s", med_strerror(medep)); - med_eprintf("%s\n", buf); -} - -/*PRINTFLIKE2*/ -void -medde_perror( - med_err_t *medep, - const char *fmt, - ... -) -{ - va_list ap; - - va_start(ap, fmt); - med_vperror(medep, fmt, ap); - va_end(ap); -} - -/*PRINTFLIKE1*/ -void -med_perror( - const char *fmt, - ... -) -{ - va_list ap; - med_err_t status = med_null_err; - - (void) med_error(&status, errno, NULL); - va_start(ap, fmt); - med_vperror(&status, fmt, ap); - va_end(ap); - med_clrerror(&status); -} diff --git a/usr/src/cmd/lvm/rpc.metamedd/med_freeresult.c b/usr/src/cmd/lvm/rpc.metamedd/med_freeresult.c deleted file mode 100644 index bb4a441d80b6..000000000000 --- a/usr/src/cmd/lvm/rpc.metamedd/med_freeresult.c +++ /dev/null @@ -1,41 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 1994, 2000 by Sun Microsystems, Inc. - * All rights reserved. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include "med_local.h" - -/*ARGSUSED*/ -bool_t -med_prog_1_freeresult( - SVCXPRT *unused, - xdrproc_t xdr_result, - caddr_t result -) -{ - xdr_free(xdr_result, result); - return (TRUE); -} diff --git a/usr/src/cmd/lvm/rpc.metamedd/med_hash.c b/usr/src/cmd/lvm/rpc.metamedd/med_hash.c deleted file mode 100644 index d0d4b05590c7..000000000000 --- a/usr/src/cmd/lvm/rpc.metamedd/med_hash.c +++ /dev/null @@ -1,395 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 1994, 2000 by Sun Microsystems, Inc. - * All rights reserved. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include -#include -#include -#include "med_hash.h" -#include "med_local.h" - -#ifdef _KERNEL -#define memmove(a, b, c) bcopy(b, a, c) -#define memcmp bcmp -#define memset(a, '\0', c) bzero(a, c) -#define Malloc bkmem_alloc -#endif /* _KERNEL */ - -#define VERIFY_HASH_REALLOC - -static int -BCMP(void *str1, void *str2, int len) -{ - return (memcmp((char *)str1, (char *)str2, len)); -} - -static int -HASH(void *datap, int datalen, int hsz) -{ - char *cp; - int hv = 0; - - for (cp = (char *)datap; cp != ((char *)datap + datalen); hv += *cp++) - ; - return (hv % hsz); -} - -int -init_cache( - Cache **cp, - int hsz, - int bsz, - int (*hfunc)(void *, int, int), - int (*cfunc)(void *, void *, int), - void (*kffunc)(void *), - void (*dffunc)(void *) -) -{ - int i; - - if ((*cp = (Cache *) Malloc(sizeof (**cp))) == NULL) { - (void) fprintf(stderr, "Malloc(Cache **cp)"); - return (-1); - } - (*cp)->bp = (Bucket *) Malloc(sizeof (*(*cp)->bp) * hsz); - if ((*cp)->bp == NULL) { - (void) fprintf(stderr, "Malloc(Bucket cp->bp)"); - return (-1); - } - (*cp)->hsz = hsz; - (*cp)->bsz = bsz; - for (i = 0; i < (*cp)->hsz; i++) { - (*cp)->bp[i].nent = 0; - (*cp)->bp[i].nalloc = 0; - (*cp)->bp[i].itempp = NULL; - } - /* Hash function */ - if (hfunc != (int (*)()) NULL) - (*cp)->hfunc = hfunc; - else - (*cp)->hfunc = HASH; - - /* Compare function */ - if (cfunc != (int (*)()) NULL) - (*cp)->cfunc = cfunc; - else - (*cp)->cfunc = BCMP; - - /* Key free function */ - if (kffunc != (void (*)()) NULL) - (*cp)->kffunc = kffunc; - else - (*cp)->kffunc = Free; - - /* Data free function */ - if (dffunc != (void (*)()) NULL) - (*cp)->dffunc = dffunc; - else - (*cp)->dffunc = Free; - - return (0); -} - -int -add_cache(Cache *cp, Item *itemp) -{ - Bucket *bp; - Item **titempp; - - if (cp == NULL) { - (void) fprintf(stderr, - "add_cache(): init_cache() not called.\n"); - return (-1); - } - - bp = &cp->bp[(*cp->hfunc)(itemp->key, itemp->keyl, cp->hsz)]; - if (bp->nent >= bp->nalloc) { - if (bp->nalloc == 0) { - bp->itempp = - (Item **) Malloc(sizeof (*bp->itempp) * cp->bsz); - } else { -#ifdef VERIFY_HASH_REALLOC - (void) fprintf(stderr, - "realloc(%d) bucket=%d\n", bp->nalloc + cp->bsz, - (*cp->hfunc)(itemp->key, itemp->keyl, cp->hsz)); -#endif /* VERIFY_HASH_REALLOC */ - titempp = - (Item **) Malloc(sizeof (*bp->itempp) * - (bp->nalloc + cp->bsz)); - if (titempp != NULL) { - (void) memmove((char *)titempp, - (char *)bp->itempp, - (sizeof (*bp->itempp) * bp->nalloc)); -#ifdef _KERNEL - bkmem_free(bp->itempp, - (sizeof (*bp->itempp) * bp->nalloc)); -#else /* !_KERNEL */ - Free(bp->itempp); -#endif /* _KERNEL */ - bp->itempp = titempp; - } else - bp->itempp = NULL; - } - if (bp->itempp == NULL) { - (void) fprintf(stderr, - "add_cache(): out of memory\n"); - return (-1); - } - bp->nalloc += cp->bsz; - } - bp->itempp[bp->nent] = itemp; - bp->nent++; - return (0); -} - -Item * -lookup_cache(Cache *cp, void *datap, int datalen) -{ - int i; - Bucket *bp; - - if (cp == NULL) { - (void) fprintf(stderr, - "lookup_cache(): init_cache() not called.\n"); - return (Null_Item); - } - - bp = &cp->bp[(*cp->hfunc)(datap, datalen, cp->hsz)]; - for (i = 0; i < bp->nent; i++) - if (!(*cp->cfunc)((void *)bp->itempp[i]->key, datap, datalen)) - return (bp->itempp[i]); - return (Null_Item); -} - -Item * -first_item(Cache *cp, int *bidx, int *iidx) -{ - Item *itemp = Null_Item; - - if (cp == NULL) { - (void) fprintf(stderr, - "first_item(): init_cache() not called.\n"); - return (Null_Item); - } - - for (*bidx = 0; *bidx < cp->hsz && (cp->bp[*bidx].nalloc == 0 || - cp->bp[*bidx].nent == 0); (*bidx)++) - /* void */; - - if (*bidx < cp->hsz && cp->bp[*bidx].nent > 0) { - itemp = cp->bp[*bidx].itempp[0]; - *iidx = 0; - } else { - *bidx = -1; - *iidx = -1; - } - return (itemp); -} - -Item * -next_item(Cache *cp, int *bidx, int *iidx) -{ - Item *itemp = Null_Item; - - if (cp == NULL) { - (void) fprintf(stderr, - "next_item(): init_cache() not called.\n"); - return (Null_Item); - } - - if (*bidx < cp->hsz && *bidx >= 0) { - if ((*iidx + 1) < cp->bp[*bidx].nent) { - itemp = cp->bp[*bidx].itempp[++(*iidx)]; - } else { - for (++(*bidx); - *bidx < cp->hsz && (cp->bp[*bidx].nalloc == 0 || - cp->bp[*bidx].nent == 0); - (*bidx)++) - /* void */; - if (*bidx < cp->hsz && cp->bp[*bidx].nent > 0) { - *iidx = 0; - itemp = cp->bp[*bidx].itempp[(*iidx)++]; - } else { - *bidx = -1; - *iidx = -1; - } - } - } else { - *bidx = -1; - *iidx = -1; - } - return (itemp); -} - -void -des_cache(Cache **cpp) -{ - Cache *cp = *cpp; - Bucket *bp; - Item *itemp; - int i; - int j; - - if (cp == NULL) { - (void) fprintf(stderr, - "des_cache(): init_cache() not called.\n"); - return; - } - - for (i = 0; i < cp->hsz; i++) { - bp = &cp->bp[i]; - if (bp->nalloc > 0) { - for (j = 0; j < bp->nent; j++) { - itemp = bp->itempp[j]; - if (itemp->key) - (void) (*cp->kffunc)(itemp->key); - if (itemp->data) - (void) (*cp->dffunc)(itemp->data); - } - } - (void) Free(bp->itempp); - } - (void) Free(cp->bp); - (void) Free(cp); - *cpp = NULL; -} - -int -del_cache(Cache *cp, Item *itemp) -{ - Bucket *bp; - int bidx; - int iidx; - int tidx; - int retval = 0; - void *datap = itemp->key; - int datalen = itemp->keyl; - Item *titemp; - - if (cp == NULL) { - (void) fprintf(stderr, - "del_cache(): init_cache() not called.\n"); - return (-1); - } - - bidx = (*cp->hfunc)(datap, datalen, cp->hsz); - bp = &cp->bp[bidx]; - - for (iidx = 0; iidx < bp->nent; iidx++) - if (!(*cp->cfunc)((void *)bp->itempp[iidx]->key, datap, - datalen)) { - titemp = bp->itempp[iidx]; - break; - } - if (iidx < bp->nent) { - if (titemp->key) - (void) (*cp->kffunc)(titemp->key); - if (titemp->data) - (void) (*cp->dffunc)(titemp->data); - titemp->keyl = 0; - titemp->datal = 0; - bp->nent--; - if (bp->nent == 0) { - (void) Free(bp->itempp); - bp->itempp = NULL; - bp->nalloc = 0; - } else { - for (tidx = iidx + 1; tidx < (bp->nent + 1); tidx++) { - bp->itempp[iidx] = bp->itempp[tidx]; - iidx = tidx; - } - } - } else { - (void) fprintf(stderr, - "del_cache(): item not found.\n"); - retval = -1; - } - return (retval); -} - -#ifdef DEBUG -void -cache_stat(Cache *cp, char *tag) -{ - Bucket *bp; - int bidx; - - if (cp == NULL) { - (void) fprintf(stderr, - "cache_stat(): init_cache() not called.\n"); - return; - } - - if (tag && *tag) - (void) printf("%s", tag); - - for (bidx = 0; bidx < cp->hsz; bidx++) { - bp = &cp->bp[bidx]; - if (bp->nalloc > 0) { - (void) printf("Bucket #%d Alloc %d", bidx, bp->nalloc); - if (bp->nent > 0) { - (void) printf( - " Entries %d Reallocs %d", bp->nent, - (bp->nalloc / cp->hsz)); - (void) printf( - " Utilization %d%%", - ((bp->nent * 100)/bp->nalloc)); - } - (void) printf("\n"); - (void) fflush(stdout); - } - } -} - -void -pr_cache(Cache *cp, char *tag, void (*pfunc)(void *, int, void *, int)) -{ - int bidx; - int iidx; - Bucket *bp; - Item *itemp; - - if (cp == NULL) { - (void) fprintf(stderr, - "pr_cache(): init_cache() not called.\n"); - return; - } - - if (tag && *tag) - (void) printf("%s", tag); - - for (bidx = 0; bidx < cp->hsz; bidx++) { - bp = &cp->bp[bidx]; - if (bp->nent > 0) - for (iidx = 0; iidx < bp->nent; iidx++) { - itemp = bp->itempp[iidx]; - (*pfunc)(itemp->key, itemp->keyl, - itemp->data, itemp->datal); - } - } -} -#endif /* DEBUG */ diff --git a/usr/src/cmd/lvm/rpc.metamedd/med_hash.h b/usr/src/cmd/lvm/rpc.metamedd/med_hash.h deleted file mode 100644 index e378fa154d7c..000000000000 --- a/usr/src/cmd/lvm/rpc.metamedd/med_hash.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 1994, 2000 by Sun Microsystems, Inc. - * All rights reserved. - */ - -#ifndef _MED_HASH_H -#define _MED_HASH_H - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct item_t { - void *key; - int keyl; - void *data; - int datal; -} Item; - -#define Null_Item ((Item *) NULL) - -typedef struct bucket_t { - int nent; - int nalloc; - Item **itempp; -} Bucket; - -typedef struct cache_t { - int hsz; - int bsz; - Bucket *bp; - int (*hfunc)(void *, int, int); - int (*cfunc)(void *, void *, int); - void (*kffunc)(void *); - void (*dffunc)(void *); -} Cache; - -#ifdef _KERNEL -#define malloc bkmem_alloc -#endif /* _KERNEL */ - -extern int init_cache(Cache **cp, int hsz, int bsz, - int (*hfunc)(void *, int, int), - int (*cfunc)(void *, void *, int), - void (*kffunc)(void *), void (*dffunc)(void *)); -extern int add_cache(Cache *cp, Item *itemp); -extern Item *lookup_cache(Cache *cp, void *datap, int datalen); -extern Item *first_item(Cache *cp, int *bidx, int *iidx); -extern Item *next_item(Cache *cp, int *bidx, int *iidx); -extern void des_cache(Cache **cpp); -extern int del_cache(Cache *cp, Item *itemp); -extern void cache_stat(Cache *cp, char *tag); -extern void pr_cache(Cache *cp, char *tag, - void (*pfunc)(void *, int, void *, int)); - -#ifdef __cplusplus -} -#endif - -#endif /* _MED_HASH_H */ diff --git a/usr/src/cmd/lvm/rpc.metamedd/med_init.c b/usr/src/cmd/lvm/rpc.metamedd/med_init.c deleted file mode 100644 index bc76420d8679..000000000000 --- a/usr/src/cmd/lvm/rpc.metamedd/med_init.c +++ /dev/null @@ -1,381 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 1999-2002 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include "med_local.h" -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -extern void nc_perror(const char *msg); - -/* daemon name */ -static char *medname = MED_SERVNAME; - -/* - * reset and exit daemon - */ -void -med_exit( - int eval -) -{ - med_err_t status = med_null_err; - - if (med_db_finit(&status)) - medde_perror(&status, "med_db_finit"); - - /* log exit */ - med_eprintf("exiting with %d\n", eval); - - /* exit with value */ - exit(eval); -} - -/* - * signal catchers - */ -static void -med_catcher( - int sig -) -{ - char buf[128]; - char *msg; - - /* log signal */ - if ((msg = strsignal(sig)) == NULL) { - (void) sprintf(buf, "unknown signal %d", sig); - msg = buf; - } - med_eprintf("%s\n", msg); - - /* let default handler do it's thing */ - (void) sigset(sig, SIG_DFL); - if (kill(getpid(), sig) != 0) { - med_perror("kill(getpid())"); - med_exit(-sig); - } -} - -/* - * initialize daemon - */ -static int -med_setup( - med_err_t *medep -) -{ - /* catch common signals */ - if ((sigset(SIGHUP, med_catcher) == SIG_ERR) || - (sigset(SIGINT, med_catcher) == SIG_ERR) || - (sigset(SIGABRT, med_catcher) == SIG_ERR) || - (sigset(SIGBUS, med_catcher) == SIG_ERR) || - (sigset(SIGSEGV, med_catcher) == SIG_ERR) || - (sigset(SIGPIPE, med_catcher) == SIG_ERR) || - (sigset(SIGTERM, med_catcher) == SIG_ERR)) { - return (med_error(medep, errno, "sigset")); - } - - /* ignore SIGALRM (used in med_cv_timedwait) */ - if (sigset(SIGALRM, SIG_IGN) == SIG_ERR) { - return (med_error(medep, errno, "sigset")); - } - - /* return success */ - return (0); -} - -/* - * (re)initalize daemon - */ -static int -med_init_daemon( - med_err_t *medep -) -{ - static int already = 0; - - /* setup */ - if (! already) { - if (med_setup(medep) != 0) - return (-1); - openlog(medname, LOG_CONS, LOG_DAEMON); - already = 1; - } - - /* return success */ - return (0); -} - -/* - * get my nodename - */ -char * -mynode(void) -{ - static struct utsname myuname; - static int done = 0; - - if (! done) { - if (uname(&myuname) == -1) { - med_perror("uname"); - assert(0); - } - done = 1; - } - return (myuname.nodename); -} - -/* - * check for trusted host and user - */ -static int -check_host( - struct svc_req *rqstp /* RPC stuff */ -) -{ - struct authsys_parms *sys_credp; - SVCXPRT *transp = rqstp->rq_xprt; - struct netconfig *nconfp = NULL; - struct nd_hostservlist *hservlistp = NULL; - int i; - int rval = -1; - char *inplace = NULL; - - /* check for root */ - /*LINTED*/ - sys_credp = (struct authsys_parms *)rqstp->rq_clntcred; - assert(sys_credp != NULL); - if (sys_credp->aup_uid != 0) - goto out; - - /* get hostnames */ - if (transp->xp_netid == NULL) { - med_eprintf("transp->xp_netid == NULL\n"); - goto out; - } - if ((nconfp = getnetconfigent(transp->xp_netid)) == NULL) { -#ifdef DEBUG - nc_perror("getnetconfigent(transp->xp_netid)"); -#endif - goto out; - } - if ((__netdir_getbyaddr_nosrv(nconfp, &hservlistp, &transp->xp_rtaddr) - != 0) || (hservlistp == NULL)) { -#ifdef DEBUG - netdir_perror("netdir_getbyaddr(transp->xp_rtaddr)"); -#endif - goto out; - } - - /* check hostnames */ - for (i = 0; (i < hservlistp->h_cnt); ++i) { - struct nd_hostserv *hservp = &hservlistp->h_hostservs[i]; - char *hostname = hservp->h_host; - - inplace = strdup(hostname); - sdssc_cm_nm2nid(inplace); - if (strcmp(inplace, hostname)) { - - /* - * If the names are now different it indicates - * that hostname was converted to a nodeid. This - * will only occur if hostname is part of the same - * cluster that the current node is in. - * If the machine is not running in a cluster than - * sdssc_cm_nm2nid is a noop which leaves inplace - * alone. - */ - rval = 0; - goto out; - } - - /* localhost is OK */ - if (strcmp(hostname, mynode()) == 0) { - rval = 0; - goto out; - } - - if (strcmp(hostname, "localhost") == 0) { - rval = 0; - goto out; - } - - /* check for remote root access */ - if (ruserok(hostname, 1, "root", "root") == 0) { - rval = 0; - goto out; - } - } - - /* cleanup, return success */ -out: - if (inplace) - free(inplace); - if (hservlistp != NULL) - netdir_free(hservlistp, ND_HOSTSERVLIST); - if (nconfp != NULL) - Free(nconfp); - return (rval); -} - -/* - * check for user in local group 14 - */ -static int -check_gid14( - uid_t uid -) -{ - struct passwd *pwp; - struct group *grp; - char **namep; - - /* get user info, check default GID */ - if ((pwp = getpwuid(uid)) == NULL) - return (-1); - if (pwp->pw_gid == MED_GID) - return (0); - - /* check in group */ - if ((grp = getgrgid(MED_GID)) == NULL) - return (-1); - for (namep = grp->gr_mem; ((*namep != NULL) && (**namep != '\0')); - ++namep) { - if (strcmp(*namep, pwp->pw_name) == 0) - return (0); - } - return (-1); -} - -/* - * check AUTH_SYS - */ -static int -check_sys( - struct svc_req *rqstp, /* RPC stuff */ - int amode, /* R_OK | W_OK */ - med_err_t *medep /* returned status */ -) -{ -#ifdef _REENTRANT - static mutex_t mx = DEFAULTMUTEX; -#endif /* _REENTRANT */ - struct authsys_parms *sys_credp; - - /* for read, anything is OK */ - if (! (amode & W_OK)) - return (0); - -#ifdef _REENTRANT - /* single thread (not really needed if daemon stays single threaded) */ - mutex_lock(&mx); -#endif /* _REENTRANT */ - - /* check for remote root or METAMED_GID */ - /*LINTED*/ - sys_credp = (struct authsys_parms *)rqstp->rq_clntcred; - if ((check_gid14(sys_credp->aup_uid) == 0) || - (check_host(rqstp) == 0)) { -#ifdef _REENTRANT - mutex_unlock(&mx); -#endif /* _REENTRANT */ - return (0); - } - - /* return failure */ -#ifdef _REENTRANT - mutex_unlock(&mx); -#endif /* _REENTRANT */ - return (med_error(medep, EACCES, medname)); -} - -/* - * setup RPC service - * - * if can't authenticate return < 0 - * if any other error return > 0 - */ -int -med_init( - struct svc_req *rqstp, /* RPC stuff */ - int amode, /* R_OK | W_OK */ - med_err_t *medep /* returned status */ -) -{ - SVCXPRT *transp = rqstp->rq_xprt; - - /* - * initialize - */ - (void) memset(medep, 0, sizeof (*medep)); - - if (sdssc_bind_library() == SDSSC_ERROR) { - (void) med_error(medep, EACCES, - "can't bind to cluster library"); - return (1); - } - - /* - * check credentials - */ - switch (rqstp->rq_cred.oa_flavor) { - - /* UNIX flavor */ - case AUTH_SYS: - { - if (check_sys(rqstp, amode, medep) != 0) - return (1); /* error */ - break; - } - - /* can't authenticate anything else */ - default: - svcerr_weakauth(transp); - return (-1); /* weak authentication */ - - } - - /* - * (re)initialize - */ - if (med_init_daemon(medep) != 0) - return (1); /* error */ - - /* return success */ - return (0); -} diff --git a/usr/src/cmd/lvm/rpc.metamedd/med_local.h b/usr/src/cmd/lvm/rpc.metamedd/med_local.h deleted file mode 100644 index fca533c1015b..000000000000 --- a/usr/src/cmd/lvm/rpc.metamedd/med_local.h +++ /dev/null @@ -1,121 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _MED_LOCAL_H -#define _MED_LOCAL_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef _REENTRANT -/* - * millisecond time - */ -typedef u_longlong_t med_msec_t; -#endif /* _REENTRANT */ - -/* - * extern functions - */ -/* med_error.c */ -extern int med_error(med_err_t *medep, int errnum, char *name); -/*PRINTFLIKE2*/ -extern void medde_perror(med_err_t *medep, const char *fmt, ...); -/*PRINTFLIKE1*/ -extern void med_perror(const char *fmt, ...); -/*PRINTFLIKE1*/ -extern void med_eprintf(const char *fmt, ...); - -/* med_init.c */ -extern void med_exit(int eval); -extern int med_init(struct svc_req *rqstp, int amode, - med_err_t *medep); -extern char *mynode(void); - -/* med_mem.c */ -extern void *Malloc(size_t s); -extern void *Zalloc(size_t s); -extern void *Realloc(void *p, size_t s); -extern void *Calloc(size_t n, size_t s); -extern char *Strdup(char *p); -extern void Free(void *p); - -/* meta_metad.c */ -#ifdef _REENTRANT -/* med_synch.c */ -extern void med_cv_init(cond_t *cvp); -extern void med_cv_destroy(cond_t *cvp); -extern void med_cv_wait(cond_t *cvp, mutex_t *mp); -extern void med_cv_timedwait(cond_t *cvp, mutex_t *mp, - med_msec_t to); -extern void med_cv_broadcast(cond_t *cvp); -extern void med_mx_init(mutex_t *mp); -extern void med_mx_destroy(mutex_t *mp); -extern void med_mx_lock(mutex_t *mp); -extern void med_mx_unlock(mutex_t *mp); -extern void med_rw_rdlock(rwlock_t *rwlp); -extern void med_rw_wrlock(rwlock_t *rwlp); -extern void med_rw_unlock(rwlock_t *rwlp); -#endif /* _REENTRANT */ - -/* med_db.c */ -extern int med_db_init(med_err_t *medep); -extern med_rec_t *med_db_get_rec(med_med_t *medp, med_err_t *medep); -extern med_data_t *med_db_get_data(med_med_t *medp, med_err_t *medep); -extern int med_db_put_rec(med_med_t *medp, med_rec_t *nmedrp, - med_err_t *medep); -extern int med_db_put_data(med_med_t *medp, med_data_t *meddp, - med_err_t *medep); -extern int med_db_finit(med_err_t *medep); - -#ifdef __cplusplus -} -#endif - -#endif /* _MED_LOCAL_H */ diff --git a/usr/src/cmd/lvm/rpc.metamedd/med_mem.c b/usr/src/cmd/lvm/rpc.metamedd/med_mem.c deleted file mode 100644 index b52cc4c375ae..000000000000 --- a/usr/src/cmd/lvm/rpc.metamedd/med_mem.c +++ /dev/null @@ -1,122 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -/* - * Copyright (c) 1994, 2000 by Sun Microsystems, Inc. - * All rights reserved. - */ - -#include "med_local.h" - -/* - * free - */ -void -Free( - void *p -) -{ - free(p); -} - -/* - * malloc - */ -void * -Malloc( - size_t s -) -{ - void *mem; - - if ((mem = malloc(s)) == NULL) { - med_perror(""); - med_exit(1); - } - return (mem); -} - -/* - * zalloc - */ -void * -Zalloc( - size_t s -) -{ - return (memset(Malloc(s), 0, s)); -} - -/* - * realloc - */ -void * -Realloc( - void *p, - size_t s -) -{ - if ((p = realloc(p, s)) == NULL) { - med_perror(""); - med_exit(1); - } - return (p); -} - -/* - * calloc - */ -void * -Calloc( - size_t n, - size_t s -) -{ - unsigned long total; - - if (n == 0 || s == 0) { - total = 0; - } else { - total = (unsigned long)n * s; - /* check for overflow */ - if (total / n != s) - return (NULL); - } - return (Zalloc(total)); -} - -/* - * strdup - */ -char * -Strdup( - char *p -) -{ - if ((p = strdup(p)) == NULL) { - med_perror(""); - med_exit(1); - } - return (p); -} diff --git a/usr/src/cmd/lvm/rpc.metamedd/med_svc_subr.c b/usr/src/cmd/lvm/rpc.metamedd/med_svc_subr.c deleted file mode 100644 index d9d1f33ec321..000000000000 --- a/usr/src/cmd/lvm/rpc.metamedd/med_svc_subr.c +++ /dev/null @@ -1,217 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 1993, 2000 by Sun Microsystems, Inc. - * All rights reserved. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include "med_local.h" - -/* - * return a response - */ -/*ARGSUSED*/ -bool_t -med_null_1_svc( - void *argp, - med_err_t *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - /* Initialization */ - *res = med_null_err; - - /* do nothing */ - return (TRUE); -} - -/* - * Update the mediator data file. - */ -/*ARGSUSED*/ -bool_t -med_upd_data_1_svc( - med_upd_data_args_t *argp, - med_err_t *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - int err; - - /* Initialization */ - *res = med_null_err; - - /* setup, check permissions */ - if ((err = med_init(rqstp, W_OK, res)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - /* doit */ - if (med_db_init(res)) - goto out; - - (void) med_db_put_data(&argp->med, &argp->med_data, res); - -out: - return (TRUE); -} - -/* - * Get the mediator data - */ -/*ARGSUSED*/ -bool_t -med_get_data_1_svc( - med_args_t *argp, - med_get_data_res_t *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - int err; - med_data_t *meddp; - med_err_t *medep = &res->med_status; - - /* Initialization */ - (void) memset(res, 0, sizeof (*res)); - *medep = med_null_err; - - /* setup, check permissions */ - if ((err = med_init(rqstp, R_OK, medep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - /* doit */ - if (med_db_init(medep)) - goto out; - - if ((meddp = med_db_get_data(&argp->med, medep)) == NULL) - goto out; - - res->med_data = *meddp; /* structure assignment */ - -out: - return (TRUE); -} - -/* - * Update the mediator record. - */ -/*ARGSUSED*/ -bool_t -med_upd_rec_1_svc( - med_upd_rec_args_t *argp, - med_err_t *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - int err; - - /* Initialization */ - *res = med_null_err; - - /* setup, check permissions */ - if ((err = med_init(rqstp, W_OK, res)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - /* doit */ - if (med_db_init(res)) - goto out; - - (void) med_db_put_rec(&argp->med, &argp->med_rec, res); - -out: - return (TRUE); -} - -/* - * Get the mediator record - */ -/*ARGSUSED*/ -bool_t -med_get_rec_1_svc( - med_args_t *argp, - med_get_rec_res_t *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - med_rec_t *medrp; - int err; - med_err_t *medep = &res->med_status; - - /* Initialization */ - (void) memset(res, 0, sizeof (*res)); - *medep = med_null_err; - - /* setup, check permissions */ - if ((err = med_init(rqstp, R_OK, medep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - /* doit */ - if (med_db_init(medep)) - goto out; - - if ((medrp = med_db_get_rec(&argp->med, medep)) == NULL) - goto out; - - res->med_rec = *medrp; /* structure assignment */ - -out: - return (TRUE); -} - -/* - * return the official host name for the callee - */ -/*ARGSUSED*/ -bool_t -med_hostname_1_svc( - void *argp, - med_hnm_res_t *res, - struct svc_req *rqstp /* RPC stuff */ -) -{ - med_err_t *medep = &res->med_status; - int err; - - /* Initialization */ - (void) memset(res, 0, sizeof (*res)); - *medep = med_null_err; - - /* setup, check permissions */ - if ((err = med_init(rqstp, R_OK, medep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - /* doit */ - res->med_hnm = Strdup(mynode()); - - return (TRUE); -} diff --git a/usr/src/cmd/lvm/rpc.metamedd/med_synch.c b/usr/src/cmd/lvm/rpc.metamedd/med_synch.c deleted file mode 100644 index aad764d2036f..000000000000 --- a/usr/src/cmd/lvm/rpc.metamedd/med_synch.c +++ /dev/null @@ -1,213 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 1994, 2000 by Sun Microsystems, Inc. - * All rights reserved. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include "med_local.h" - -#ifdef _REENTRANT -/* - * manipulate conditional variables, handle errors - */ -void -med_cv_init( - cond_t *cvp -) -{ - if (cond_init(cvp, USYNC_THREAD, NULL) != 0) { - med_perror("cond_init"); - med_exit(1); - } -} - -void -med_cv_destroy( - cond_t *cvp -) -{ - if (cond_destroy(cvp) != 0) { - med_perror("cond_destroy"); - med_exit(1); - } -} - -void -med_cv_wait( - cond_t *cvp, - mutex_t *mp -) -{ - int err; - - assert(MUTEX_HELD(mp)); - if (((err = cond_wait(cvp, mp)) != 0) && - (err != EINTR)) { - errno = err; - med_perror("cond_wait"); - med_exit(1); - } -} - -void -med_cv_timedwait( - cond_t *cvp, - mutex_t *mp, - med_msec_t to -) -{ - struct itimerval new, old; - int err; - - /* check lock */ - assert(MUTEX_HELD(mp)); - assert(to != 0); - - /* set timer */ - new.it_interval.tv_sec = 0; - new.it_interval.tv_usec = 0; - new.it_value.tv_sec = to / 1000; - new.it_value.tv_usec = (to % 1000) * 1000; - if (setitimer(ITIMER_REAL, &new, &old) != 0) { - med_perror("cond_wait"); - med_exit(1); - } - - /* wait for condition or timeout */ - if (((err = cond_wait(cvp, mp)) != 0) && - (err != EINTR)) { - errno = err; - med_perror("cond_wait"); - med_exit(1); - } - - /* reset timer */ - if (err != EINTR) { - new.it_interval.tv_sec = 0; - new.it_interval.tv_usec = 0; - new.it_value.tv_sec = 0; - new.it_value.tv_usec = 0; - if (setitimer(ITIMER_REAL, &new, &old) != 0) { - med_perror("cond_wait"); - med_exit(1); - } - } -} - -void -med_cv_broadcast( - cond_t *cvp -) -{ - if (cond_broadcast(cvp) != 0) { - med_perror("cond_broadcast"); - med_exit(1); - } -} - -/* - * manipulate mutexs, handle errors - */ -void -med_mx_init( - mutex_t *mp -) -{ - if (mutex_init(mp, USYNC_THREAD, NULL) != 0) { - med_perror("mutex_init"); - med_exit(1); - } -} - -void -med_mx_destroy( - mutex_t *mp -) -{ - if (mutex_destroy(mp) != 0) { - med_perror("mutex_destroy"); - med_exit(1); - } -} - -void -med_mx_lock( - mutex_t *mp -) -{ - if (mutex_lock(mp) != 0) { - med_perror("mutex_lock"); - med_exit(1); - } -} - -void -med_mx_unlock( - mutex_t *mp -) -{ - assert(MUTEX_HELD(mp)); - if (mutex_unlock(mp) != 0) { - med_perror("mutex_unlock"); - med_exit(1); - } -} - -/* - * manipulate rwlockss, handle errors - */ -void -med_rw_rdlock( - rwlock_t *rwlp -) -{ - if (rw_rdlock(rwlp) != 0) { - med_perror("rw_rdlock"); - med_exit(1); - } -} - -void -med_rw_wrlock( - rwlock_t *rwlp -) -{ - if (rw_wrlock(rwlp) != 0) { - med_perror("rw_wrlock"); - med_exit(1); - } -} - -void -med_rw_unlock( - rwlock_t *rwlp -) -{ - if (rw_unlock(rwlp) != 0) { - med_perror("rw_unlock"); - med_exit(1); - } -} -#endif /* _REENTRANT */ diff --git a/usr/src/cmd/lvm/rpc.metamedd/metamed.xml b/usr/src/cmd/lvm/rpc.metamedd/metamed.xml deleted file mode 100644 index 8fc3a6c530fb..000000000000 --- a/usr/src/cmd/lvm/rpc.metamedd/metamed.xml +++ /dev/null @@ -1,120 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/usr/src/cmd/lvm/rpc.metamedd/sparc/Makefile b/usr/src/cmd/lvm/rpc.metamedd/sparc/Makefile deleted file mode 100644 index 72bbe3bd4278..000000000000 --- a/usr/src/cmd/lvm/rpc.metamedd/sparc/Makefile +++ /dev/null @@ -1,33 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 1996-2002 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# ident "%Z%%M% %I% %E% SMI" -# -# Makefile for logical volume management -# -# -# cmd/lvm/rpc.metamedd/sparc/Makefile - -include ../Makefile.com diff --git a/usr/src/cmd/lvm/rpc.metamhd/Makefile b/usr/src/cmd/lvm/rpc.metamhd/Makefile deleted file mode 100644 index d67620219c7e..000000000000 --- a/usr/src/cmd/lvm/rpc.metamhd/Makefile +++ /dev/null @@ -1,68 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -#pragma ident "%Z%%M% %I% %E% SMI" -# -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# Makefile for logical volume management -# -# -# cmd/lvm/rpc.metamhd/Makefile - -PROG= rpc.metamhd -MANIFEST= metamh.xml - -RPCMOD = metamhd - -include ../../Makefile.cmd -include ../Makefile.lvm - -ROOTMANIFESTDIR= $(ROOTSVCNETWORKRPC) - -SUBDIRS= $(MACH) - -all := TARGET = all -install := TARGET = install -clean := TARGET = clean -clobber := TARGET = clobber -lint := TARGET = lint - -.KEEP_STATE: - -all: $(SUBDIRS) - -clean: $(SUBDIRS) - $(RM) metamhd_svc.c metamhd_xdr.c mhdx_xdr.c - -clobber lint: $(SUBDIRS) - -install: $(SUBDIRS) $(ROOTMANIFEST) - -check: $(CHKMANIFEST) - -$(SUBDIRS): FRC - @cd $@; pwd; $(MAKE) $(TARGET) - -FRC: - -include ../../Makefile.targ diff --git a/usr/src/cmd/lvm/rpc.metamhd/ff.h b/usr/src/cmd/lvm/rpc.metamhd/ff.h deleted file mode 100644 index 9f3e2a8e0832..000000000000 --- a/usr/src/cmd/lvm/rpc.metamhd/ff.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 1989-1996, 2000 by Sun Microsystems, Inc. - * All rights reserved. - */ - -/* - * ff.h - Failfast device driver header file. - */ - -#ifndef _FF_H -#define _FF_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * Supported ioctl calls. - */ -#define FAILFAST_BASE ('f' << 8) - -#define FAILFAST_ARM (FAILFAST_BASE|1) -#define FAILFAST_DISARM (FAILFAST_BASE|2) -#define FAILFAST_DEBUG_MODE (FAILFAST_BASE|3) -#define FAILFAST_HALT_MODE (FAILFAST_BASE|4) -#define FAILFAST_PANIC_MODE (FAILFAST_BASE|5) -#define FAILFAST_SETNAME (FAILFAST_BASE|6) - -#ifdef __cplusplus -} -#endif - -#endif /* !_FF_H */ diff --git a/usr/src/cmd/lvm/rpc.metamhd/i386/Makefile b/usr/src/cmd/lvm/rpc.metamhd/i386/Makefile deleted file mode 100644 index 1eec3121798f..000000000000 --- a/usr/src/cmd/lvm/rpc.metamhd/i386/Makefile +++ /dev/null @@ -1,128 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 1996, 2001-2003 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# Copyright 2015 Igor Kozhukhov -# -# -# Makefile for logical volume management -# - -PROG= rpc.metamhd - -RPCMOD = metamhd - -DERIVED_FILES = \ - $(RPCMOD)_svc.c \ - $(RPCMOD)_xdr.c \ - mhdx_xdr.c - -OBJECTS= \ - mhd_drive.o \ - mhd_error.o \ - mhd_failfast.o \ - mhd_freeresult.o \ - mhd_init.o \ - mhd_mem.o \ - mhd_metamhd.o \ - mhd_set.o \ - mhd_synch.o \ - mhd_time.o - -LINTOBJECTS= \ - mhd_drive.o \ - mhd_error.o \ - mhd_failfast.o \ - mhd_freeresult.o \ - mhd_init.o \ - mhd_mem.o \ - mhd_metamhd.o \ - mhd_set.o \ - mhd_synch.o \ - mhd_time.o - -OBJECTS += $(DERIVED_FILES:.c=.o) - -SRCS = $(OBJECTS:%.o=../%.c) -LINTSRCS = $(LINTOBJECTS:%.o=../%.c) - -include ../../../Makefile.cmd -include ../../Makefile.lvm - - -MDLIBS = -LDLIBS += -ladm -lsocket -lnsl - -POFILES= $(OBJECTS:%.o=%.po) - - -$(RPCMOD)_svc.c := RPCGENFLAGS += -A -K -1 - -DEFINES += -D_REENTRANT -CFLAGS += $(DEFINES) - - -# -# -lint := LINTFLAGS += -m - -.KEEP_STATE: - -%.o: ../%.c - $(COMPILE.c) $< - -all: $(PROG) - -$(PROG): $(OBJECTS) - $(LINK.c) -o $@ $(OBJECTS) $(LDLIBS) - $(POST_PROCESS) - - -ROOTUSRSBINPROG=$(PROG:%=$(ROOTUSRSBIN)/%) -install: all $(ROOTUSRSBINPROG) - -catalog: - -cstyle: - ${CSTYLE} ${SRCS} - -lint: - ${LINT.c} $(LINTFLAGS) ${LINTSRCS} - -clean: - ${RM} ${OBJECTS} ${DERIVED_FILES} *.o - -clobber: clean - $(RM) $(PROG) $(CLOBBERFILES) - -metamhd_svc.c: $(SRC)/head/metamhd.x - $(CP) $(SRC)/head/metamhd.x . - $(RPCGEN) $(RPCGENFLAGS_SERVER) metamhd.x -o $@ - ${RM} metamhd.x - -metamhd_xdr.c: $(SRC)/head/metamhd.x - $(RPCGEN) $(RPCGENFLAGS) -c $(SRC)/head/metamhd.x -o $@ - -mhdx_xdr.c: $(SRC)/uts/common/sys/lvm/mhdx.x - $(RPCGEN) $(RPCGENFLAGS) -c $(SRC)/uts/common/sys/lvm/mhdx.x | \ - $(AWK) '{sub(/uts\/common\/sys\/lvm/, "head") ; print $$0}' >$@ diff --git a/usr/src/cmd/lvm/rpc.metamhd/metamh.xml b/usr/src/cmd/lvm/rpc.metamhd/metamh.xml deleted file mode 100644 index 952a59064d8d..000000000000 --- a/usr/src/cmd/lvm/rpc.metamhd/metamh.xml +++ /dev/null @@ -1,120 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/usr/src/cmd/lvm/rpc.metamhd/mhd_drive.c b/usr/src/cmd/lvm/rpc.metamhd/mhd_drive.c deleted file mode 100644 index a8d9b661343f..000000000000 --- a/usr/src/cmd/lvm/rpc.metamhd/mhd_drive.c +++ /dev/null @@ -1,1274 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include "mhd_local.h" - -#include -#include -#include -#include -#include -#include - -/* - * manipulate drives - */ - -/* - * null list constant - */ -const mhd_drive_list_t mhd_null_list = MHD_NULL_LIST; - -/* - * add drive to list - */ -void -mhd_add_drive( - mhd_drive_list_t *dlp, - mhd_drive_t *dp -) -{ - /* add drive to list */ - if (dlp->dl_ndrive >= dlp->dl_alloc) { - dlp->dl_alloc += 10; - dlp->dl_drives = Realloc(dlp->dl_drives, - (dlp->dl_alloc * sizeof (*dlp->dl_drives))); - } - dlp->dl_drives[dlp->dl_ndrive++] = dp; -} - -/* - * delete drive from list - */ -void -mhd_del_drive( - mhd_drive_list_t *dlp, - mhd_drive_t *dp -) -{ - uint_t i; - - /* delete drive from list */ - for (i = 0; (i < dlp->dl_ndrive); ++i) { - if (dlp->dl_drives[i] == dp) - break; - } - assert(dlp->dl_drives[i] == dp); - for (/* void */; (i < dlp->dl_ndrive); ++i) - dlp->dl_drives[i] = dlp->dl_drives[i + 1]; - dlp->dl_ndrive--; -} - -/* - * free drive list - */ -void -mhd_free_list( - mhd_drive_list_t *dlp -) -{ - if (dlp->dl_drives != NULL) - Free(dlp->dl_drives); - (void) memset(dlp, 0, sizeof (*dlp)); -} - -/* - * manipulate drive state - */ -int -mhd_state( - mhd_drive_t *dp, - mhd_state_t new_state, - mhd_error_t *mhep -) -{ - mhd_drive_set_t *sp = dp->dr_sp; - mhd_state_t old_state = dp->dr_state; - - /* check lock */ - assert(MUTEX_HELD(&sp->sr_mx)); - - /* set state and kick thread */ - MHDPRINTF2(("%s: state 0x%x now 0x%x\n", - dp->dr_rname, dp->dr_state, new_state)); - dp->dr_state = new_state; - mhd_cv_broadcast(&dp->dr_cv); - - /* if this is the last PROBING drive, disable any failfast */ - if ((old_state & DRIVE_PROBING) && (! (new_state & DRIVE_PROBING))) { - mhd_drive_list_t *dlp = &sp->sr_drives; - uint_t cnt, i; - - for (cnt = 0, i = 0; (i < dlp->dl_ndrive); ++i) { - if (dlp->dl_drives[i]->dr_state & DRIVE_PROBING) - ++cnt; - } - if (cnt == 0) { - mhd_error_t status = mhd_null_error; - - if (mhep == NULL) - mhep = &status; - if (mhd_ff_disarm(sp, mhep) != 0) { - if (mhep == &status) { - mhde_perror(mhep, dp->dr_rname); - mhd_clrerror(mhep); - } - return (-1); - } - } - } - - /* return success */ - return (0); -} - -int -mhd_state_set( - mhd_drive_t *dp, - mhd_state_t new_state, - mhd_error_t *mhep -) -{ - return (mhd_state(dp, (dp->dr_state | new_state), mhep)); -} - -static int -mhd_state_clr( - mhd_drive_t *dp, - mhd_state_t new_state, - mhd_error_t *mhep -) -{ - return (mhd_state(dp, (dp->dr_state & ~new_state), mhep)); -} - -/* - * idle a drive - */ -int -mhd_idle( - mhd_drive_t *dp, - mhd_error_t *mhep -) -{ - mhd_drive_set_t *sp = dp->dr_sp; - - /* check lock */ - assert(MUTEX_HELD(&sp->sr_mx)); - - /* wait for thread to idle */ - for (;;) { - if (DRIVE_IS_IDLE(dp)) - return (0); - if (mhd_state(dp, DRIVE_IDLING, mhep) != 0) - return (-1); - (void) mhd_cv_wait(&sp->sr_cv, &sp->sr_mx); - } -} - -/* - * reserve the drive - */ -static int -mhd_reserve( - mhd_drive_t *dp -) -{ - mhd_drive_set_t *sp = dp->dr_sp; - int serial = (sp->sr_options & MHD_SERIAL); - mhd_mhioctkown_t *tkp = &sp->sr_timeouts.mh_tk; - struct mhioctkown tkown; - int err; - - /* check locks */ - assert(MUTEX_HELD(&sp->sr_mx)); - assert(dp->dr_fd >= 0); - assert(dp->dr_state == DRIVE_RESERVING); - - /* setup timeouts */ - (void) memset(&tkown, 0, sizeof (tkown)); - tkown.reinstate_resv_delay = tkp->reinstate_resv_delay; - tkown.min_ownership_delay = tkp->min_ownership_delay; - tkown.max_ownership_delay = tkp->max_ownership_delay; - - /* reserve drive */ - if (! serial) - mhd_mx_unlock(&sp->sr_mx); - err = ioctl(dp->dr_fd, MHIOCTKOWN, &tkown); - if (! serial) - mhd_mx_lock(&sp->sr_mx); - if (err != 0) { - mhd_perror("%s: MHIOCTKOWN", dp->dr_rname); - (void) mhd_state(dp, DRIVE_ERRORED, NULL); - dp->dr_errnum = errno; - return (-1); - } - - /* return success */ - MHDPRINTF(("%s: MHIOCTKOWN: succeeded\n", dp->dr_rname)); - (void) mhd_state(dp, DRIVE_IDLE, NULL); - return (0); -} - -/* - * failfast the drive - */ -static int -mhd_failfast( - mhd_drive_t *dp -) -{ - mhd_drive_set_t *sp = dp->dr_sp; - int serial = (sp->sr_options & MHD_SERIAL); - int ff = sp->sr_timeouts.mh_ff; - char *release = ((ff == 0) ? " (release)" : ""); - int err; - - /* check locks */ - assert(MUTEX_HELD(&sp->sr_mx)); - assert(dp->dr_fd >= 0); - assert(dp->dr_state == DRIVE_FAILFASTING); - - /* failfast drive */ - if (! serial) - mhd_mx_unlock(&sp->sr_mx); - err = ioctl(dp->dr_fd, MHIOCENFAILFAST, &ff); - if (! serial) - mhd_mx_lock(&sp->sr_mx); - if (err != 0) { - mhd_perror("%s: MHIOCENFAILFAST%s", dp->dr_rname, release); - (void) mhd_state(dp, DRIVE_ERRORED, NULL); - dp->dr_errnum = errno; - return (-1); - } - - /* return success */ - MHDPRINTF(("%s: MHIOCENFAILFAST%s: succeeded\n", - dp->dr_rname, release)); - (void) mhd_state(dp, DRIVE_IDLE, NULL); - return (0); -} - -/* - * release the drive - */ -static int -mhd_release( - mhd_drive_t *dp -) -{ - mhd_drive_set_t *sp = dp->dr_sp; - int serial = (sp->sr_options & MHD_SERIAL); - int ff = 0; /* disable failfast */ - int err; - - /* check locks */ - assert(MUTEX_HELD(&sp->sr_mx)); - assert(dp->dr_fd >= 0); - assert(dp->dr_state == DRIVE_RELEASING); - - /* disable failfast */ - if (! serial) - mhd_mx_unlock(&sp->sr_mx); - err = ioctl(dp->dr_fd, MHIOCENFAILFAST, &ff); - if (! serial) - mhd_mx_lock(&sp->sr_mx); - if (err != 0) { - mhd_perror("%s: MHIOCENFAILFAST (release)", dp->dr_rname); - (void) mhd_state(dp, DRIVE_ERRORED, NULL); - dp->dr_errnum = errno; - return (-1); - } - MHDPRINTF(("%s: MHIOCENFAILFAST (release): succeeded\n", - dp->dr_rname)); - - /* release drive */ - if (! serial) - mhd_mx_unlock(&sp->sr_mx); - err = ioctl(dp->dr_fd, MHIOCRELEASE, NULL); - if (! serial) - mhd_mx_lock(&sp->sr_mx); - if (err != 0) { - mhd_perror("%s: MHIOCRELEASE", dp->dr_rname); - (void) mhd_state(dp, DRIVE_ERRORED, NULL); - dp->dr_errnum = errno; - return (-1); - } - - /* return success */ - MHDPRINTF(("%s: MHIOCRELEASE: succeeded\n", dp->dr_rname)); - (void) mhd_state(dp, DRIVE_IDLE, NULL); - return (0); -} - -/* - * probe the drive - */ -static int -mhd_probe( - mhd_drive_t *dp -) -{ - mhd_drive_set_t *sp = dp->dr_sp; - int serial = (sp->sr_options & MHD_SERIAL); - int err; - mhd_msec_t now; - - /* check locks */ - assert(MUTEX_HELD(&sp->sr_mx)); - assert(dp->dr_fd >= 0); - assert(dp->dr_state & (DRIVE_PROBING | DRIVE_STATUSING)); - - /* get status (we may get dumped from PROBING here) */ - if (! serial) - mhd_mx_unlock(&sp->sr_mx); - err = ioctl(dp->dr_fd, MHIOCSTATUS, NULL); - now = mhd_time(); - if (! serial) - mhd_mx_lock(&sp->sr_mx); - if (! (dp->dr_state & (DRIVE_PROBING | DRIVE_STATUSING))) - return (0); - - /* update status */ - if (dp->dr_state & DRIVE_STATUSING) { - if (err == 1) { - MHDPRINTF(("%s: MHIOCSTATUS: reserved\n", - dp->dr_rname)); - dp->dr_errnum = MHD_E_RESERVED; - } else if (err != 0) { - mhd_perror("%s: MHIOCSTATUS", dp->dr_rname); - dp->dr_errnum = errno; - } else { - MHDPRINTF(("%s: MHIOCSTATUS: available\n", - dp->dr_rname)); - dp->dr_errnum = 0; - } - (void) mhd_state_clr(dp, DRIVE_STATUSING, NULL); - } - - /* update time or die */ - if (dp->dr_state & DRIVE_PROBING) { - /* check our drive */ - if (err == 0) { - dp->dr_time = now; - } else if (err == 1) { - mhd_eprintf("%s: %s: reservation conflict\n", - sp->sr_name, dp->dr_rname); - mhd_ff_die(sp); - } - - /* check other drives */ - mhd_ff_check(sp); - } - - /* return success */ - return (0); -} - -/* - * cached controller map - */ -typedef struct { - char *regexpr1; - uint_t tray; - uint_t bus; - char *regexpr2; - char *scan; -} mhd_ctlrmap_t; - -static rwlock_t ctlr_rw = DEFAULTRWLOCK; -static time_t ctlr_mtime = 0; -static size_t ctlr_num = 0; -static mhd_ctlrmap_t *ctlr_map = NULL; - -/* - * free up controller map - */ -static void -free_map() -{ - size_t i; - - assert(RW_WRITE_HELD(&ctlr_rw)); - - for (i = 0; (i < ctlr_num); ++i) { - mhd_ctlrmap_t *cmp = &ctlr_map[i]; - - if (cmp->regexpr1 != NULL) - Free(cmp->regexpr1); - if (cmp->regexpr2 != NULL) - Free(cmp->regexpr2); - if (cmp->scan != NULL) - Free(cmp->scan); - } - if (ctlr_map != NULL) - Free(ctlr_map); - ctlr_num = 0; - ctlr_map = NULL; -} - -/* - * unlock controller map - */ -static void -unlock_map() -{ - assert(RW_WRITE_HELD(&ctlr_rw) | RW_READ_HELD(&ctlr_rw)); - - mhd_rw_unlock(&ctlr_rw); -} - -/* - * update controller map and lock it - */ -static int -update_map() -{ - struct stat statbuf; - FILE *fp; - char line[256], expr1[256], expr2[256], scan[256]; - unsigned tray, bus; - int rval = -1; - - /* see if map file has changed */ - mhd_rw_rdlock(&ctlr_rw); - if (stat(METACTLRMAP, &statbuf) != 0) { - mhd_perror(METACTLRMAP); - goto out; - } - if (statbuf.st_mtime == ctlr_mtime) { - rval = 0; - goto out; - } - - /* trade up to writer lock, check again */ - mhd_rw_unlock(&ctlr_rw); - mhd_rw_wrlock(&ctlr_rw); - if (statbuf.st_mtime == ctlr_mtime) { - rval = 0; - goto out; - } - if (ctlr_mtime != 0) - mhd_eprintf("updating controller map\n"); - ctlr_mtime = statbuf.st_mtime; - - /* toss existing cache */ - free_map(); - - /* parse md.ctlrmap */ - if ((fp = fopen(METACTLRMAP, "r")) == NULL) { - mhd_perror(METACTLRMAP); - goto out; - } - clearerr(fp); - while (fgets(line, sizeof (line), fp) != NULL) { - char *regexpr1 = NULL; - char *regexpr2 = NULL; - mhd_ctlrmap_t *cmp; - - /* skip blank lines and comments */ - if ((line[0] == '\0') || (line[0] == '\n') || (line[0] == '#')) - continue; - - /* parse line */ - if (((sscanf(line, "\"%[^\"]\" %u %u \"%[^\"]\" \"%[^\"]\"", - expr1, &tray, &bus, expr2, scan)) != 5) || - ((regexpr1 = regcmp(expr1, 0)) == NULL) || - ((regexpr2 = regcmp(expr2, 0)) == NULL)) { - mhd_eprintf("%s: bad regex(es) '%s'\n", - METACTLRMAP, line); - if (regexpr1 != NULL) - Free(regexpr1); - if (regexpr2 != NULL) - Free(regexpr2); - continue; - } - - /* add to cache */ - ctlr_map = Realloc(ctlr_map, - ((ctlr_num + 1) * sizeof (*ctlr_map))); - cmp = &ctlr_map[ctlr_num++]; - cmp->regexpr1 = regexpr1; - cmp->tray = tray; - cmp->bus = bus; - cmp->regexpr2 = regexpr2; - cmp->scan = Strdup(scan); - } - if (ferror(fp)) { - mhd_perror(METACTLRMAP); - (void) fclose(fp); - goto out; - } - if (fclose(fp) != 0) { - mhd_perror(METACTLRMAP); - goto out; - } - - /* success */ - rval = 0; - - /* return success */ -out: - if (rval != 0) { - mhd_rw_unlock(&ctlr_rw); - return (-1); - } - return (0); -} - -static char * -get_pln_ctlr_name( - char *path -) -{ - char *devicesname, *p; - char retval[MAXPATHLEN]; - - devicesname = Strdup(path); - if ((p = strrchr(devicesname, '/')) == NULL) { - Free(devicesname); - return (NULL); - } - - /* strip off the "ssd@..." portion of the devices name */ - *p = '\0'; - - /* strip off the "../../" in front of "devices" */ - if ((p = strstr(devicesname, "/devices/")) == NULL) { - Free(devicesname); - return (NULL); - } - - (void) snprintf(retval, sizeof (retval), "%s:ctlr", p); - Free(devicesname); - return (Strdup(retval)); -} - -struct pln_cache { - char *pln_name; - enum mhd_ctlrtype_t ctype; - struct pln_cache *next; -}; - -static struct pln_cache *pln_cache_anchor = NULL; -static mutex_t mhd_pln_mx = DEFAULTMUTEX; - -/* singled threaded by caller */ -static void -add_pln_cache( - char *pln_name, - enum mhd_ctlrtype_t ctype - -) -{ - struct pln_cache *p; - - p = Malloc(sizeof (*p)); - - p->pln_name = pln_name; - p->ctype = ctype; - p->next = pln_cache_anchor; - pln_cache_anchor = p; -} - -/* singled threaded by caller */ -static int -find_pln_cache( - char *pln_name, - enum mhd_ctlrtype_t *ctype_ret -) -{ - struct pln_cache *p; - - for (p = pln_cache_anchor; p != NULL; p = p->next) { - if (strcmp(pln_name, p->pln_name) == 0) { - *ctype_ret = p->ctype; - return (1); - } - } - return (0); -} - -static void -free_pln_cache(void) -{ - struct pln_cache *p, *n = NULL; - - (void) mutex_lock(&mhd_pln_mx); - for (p = pln_cache_anchor; p != NULL; p = n) { - n = p->next; - Free(p->pln_name); - Free(p); - } - - pln_cache_anchor = NULL; - (void) mutex_unlock(&mhd_pln_mx); -} - -/* - * match on SSA Model 200. - */ -static void -match_SSA200( - mhd_drive_t *dp, - char *path -) -{ - mhd_cinfo_t *cinfop = &dp->dr_drive_id.did_cinfo; - struct uscsi_cmd ucmd; - union scsi_cdb cdb; - struct scsi_inquiry inq; - int fd; - char *pln_ctlr_name; - enum mhd_ctlrtype_t ctype; - char *p; - - if ((pln_ctlr_name = get_pln_ctlr_name(path)) == NULL) - return; - - (void) mutex_lock(&mhd_pln_mx); - if (find_pln_cache(pln_ctlr_name, &ctype) == 1) { - (void) mutex_unlock(&mhd_pln_mx); - if (ctype != MHD_CTLR_SSA200) - return; - - /* over-ride for SSA200 */ - cinfop->mhc_ctype = ctype; - cinfop->mhc_tray = cinfop->mhc_bus; - return; - } - - if ((fd = open(pln_ctlr_name, (O_RDONLY|O_NDELAY), 0)) < 0) { - (void) mutex_unlock(&mhd_pln_mx); - Free(pln_ctlr_name); - return; - } - - (void) memset(&ucmd, 0, sizeof (ucmd)); - (void) memset(&cdb, 0, sizeof (cdb)); - (void) memset(&inq, 0, sizeof (inq)); - cdb.scc_cmd = SCMD_INQUIRY; - cdb.g0_count0 = sizeof (inq); - ucmd.uscsi_cdb = (caddr_t)&cdb; - ucmd.uscsi_cdblen = CDB_GROUP0; - ucmd.uscsi_bufaddr = (caddr_t)&inq; - ucmd.uscsi_buflen = sizeof (inq); - ucmd.uscsi_flags = USCSI_READ | USCSI_ISOLATE | USCSI_DIAGNOSE; - ucmd.uscsi_timeout = 30; - if (ioctl(fd, USCSICMD, &ucmd)) { - (void) mutex_unlock(&mhd_pln_mx); - (void) close(fd); - MHDPRINTF(("%s: USCSICMD(SCMD_INQUIRY): failed errno %d\n", - pln_ctlr_name, errno)); - Free(pln_ctlr_name); - return; - } - - (void) close(fd); - MHDPRINTF(("%s: USCSICMD(SCMD_INQUIRY): success\n", pln_ctlr_name)); - - /* Make all trailing spaces be null char */ - for (p = inq.inq_pid + sizeof (inq.inq_pid) - 1; p != inq.inq_pid; - p--) { - if (*p == '\0') - continue; - if (!isspace(*p)) - break; - *p = '\0'; - } - - if (strncmp(inq.inq_pid, META_SSA200_PID, sizeof (inq.inq_pid)) != 0) - goto out; - - /* over-ride the ctype, and tray */ - cinfop->mhc_ctype = MHD_CTLR_SSA200; - cinfop->mhc_tray = cinfop->mhc_bus; - -out: - add_pln_cache(pln_ctlr_name, cinfop->mhc_ctype); - (void) mutex_unlock(&mhd_pln_mx); -} - -/* - * get controller info - */ -static void -match_SSA100( - mhd_drive_t *dp, - char *path -) -{ - mhd_cinfo_t *cinfop = &dp->dr_drive_id.did_cinfo; - uint_t i; - char *p; - lloff_t wwn; - const char *fmt; - - /* update and lock controller map */ - if (update_map() != 0) - return; /* give up */ - assert(RW_WRITE_HELD(&ctlr_rw) || RW_READ_HELD(&ctlr_rw)); - - /* look for match in cache */ - for (i = 0; (i < ctlr_num); ++i) { - mhd_ctlrmap_t *cmp = &ctlr_map[i]; - - fmt = cmp->scan; - if ((regex(cmp->regexpr1, path) != NULL) && - ((p = regex(cmp->regexpr2, path)) != NULL) && - (sscanf(p, fmt, - (ulong_t *)&wwn._p._u, (ulong_t *)&wwn._p._l) == 2)) { - cinfop->mhc_ctype = MHD_CTLR_SSA100; - cinfop->mhc_tray = cmp->tray; - cinfop->mhc_bus = cmp->bus; - cinfop->mhc_wwn = wwn._f; - match_SSA200(dp, path); - break; - } - } - - /* unlock controller map */ - unlock_map(); -} - -/* - * get unique drive ID - */ -static int -mhd_ident( - mhd_drive_t *dp -) -{ - mhd_drive_set_t *sp = dp->dr_sp; - int serial = (sp->sr_options & MHD_SERIAL); - struct uscsi_cmd ucmd; - union scsi_cdb cdb; - struct scsi_inquiry inq; - struct extvtoc vtoc_buf; - char path[MAXPATHLEN + 1]; - int len; - int err; - - /* check locks */ - assert(MUTEX_HELD(&sp->sr_mx)); - assert(dp->dr_fd >= 0); - assert(dp->dr_state & DRIVE_IDENTING); - - /* reset ID */ - (void) memset(&dp->dr_drive_id, 0, sizeof (dp->dr_drive_id)); - - /* get serial number */ - if (dp->dr_state & DRIVE_SERIALING) { - if (! serial) - mhd_mx_unlock(&sp->sr_mx); - (void) memset(&ucmd, 0, sizeof (ucmd)); - (void) memset(&cdb, 0, sizeof (cdb)); - (void) memset(&inq, 0, sizeof (inq)); - cdb.scc_cmd = SCMD_INQUIRY; - cdb.g0_count0 = sizeof (inq); - ucmd.uscsi_cdb = (caddr_t)&cdb; - ucmd.uscsi_cdblen = CDB_GROUP0; - ucmd.uscsi_bufaddr = (caddr_t)&inq; - ucmd.uscsi_buflen = sizeof (inq); - ucmd.uscsi_flags = USCSI_READ | USCSI_ISOLATE | USCSI_DIAGNOSE; - ucmd.uscsi_timeout = 30; - err = ioctl(dp->dr_fd, USCSICMD, &ucmd); - if (! serial) - mhd_mx_lock(&sp->sr_mx); - if (err != 0) { - MHDPRINTF(( - "%s: USCSICMD(SCMD_INQUIRY): failed errno %d\n", - dp->dr_rname, errno)); - dp->dr_drive_id.did_flags &= ~MHD_DID_SERIAL; - } else { - char *p, *e; - uint_t i; - - MHDPRINTF(("%s: USCSICMD(SCMD_INQUIRY): success\n", - dp->dr_rname)); - dp->dr_drive_id.did_flags |= MHD_DID_SERIAL; - p = dp->dr_drive_id.did_serial; - e = p + sizeof (dp->dr_drive_id.did_serial); - for (i = 0; - ((i < sizeof (inq.inq_vid)) && (p < e)); ++i) - *p++ = inq.inq_vid[i]; - for (i = 0; - ((i < sizeof (inq.inq_pid)) && (p < e)); ++i) - *p++ = inq.inq_pid[i]; - for (i = 0; - ((i < sizeof (inq.inq_revision)) && (p < e)); ++i) - *p++ = inq.inq_revision[i]; - for (i = 0; - ((i < sizeof (inq.inq_serial)) && (p < e)); ++i) - *p++ = inq.inq_serial[i]; - assert(p == e); - for (p = dp->dr_drive_id.did_serial; (p < e); ++p) { - if (*p == '\0') - *p = ' '; - } - } - } else { - dp->dr_drive_id.did_flags &= ~MHD_DID_SERIAL; - } - - /* get VTOC */ - if (dp->dr_state & DRIVE_VTOCING) { - if (! serial) - mhd_mx_unlock(&sp->sr_mx); - (void) memset(&vtoc_buf, 0, sizeof (vtoc_buf)); - err = read_extvtoc(dp->dr_fd, &vtoc_buf); - if (! serial) - mhd_mx_lock(&sp->sr_mx); - if (err < 0) { - MHDPRINTF(("%s: read_extvtoc: failed errno %d\n", - dp->dr_rname, errno)); - dp->dr_drive_id.did_flags &= ~MHD_DID_TIME; - } else { - MHDPRINTF(("%s: read_extvtoc: success\n", - dp->dr_rname)); - dp->dr_drive_id.did_flags |= MHD_DID_TIME; - dp->dr_drive_id.did_time = vtoc_buf.timestamp[0]; - } - } else { - dp->dr_drive_id.did_flags &= ~MHD_DID_TIME; - } - - /* get controller info */ - if (dp->dr_state & DRIVE_CINFOING) { - if (! serial) - mhd_mx_unlock(&sp->sr_mx); - len = readlink(dp->dr_rname0, path, (sizeof (path) - 1)); - if (! serial) - mhd_mx_lock(&sp->sr_mx); - if (len >= sizeof (path)) { - len = -1; - errno = ENAMETOOLONG; - } - if (len < 0) { - MHDPRINTF(("%s: readlink: failed errno %d\n", - dp->dr_rname0, errno)); - dp->dr_drive_id.did_flags &= ~MHD_DID_CINFO; - } else { - MHDPRINTF(("%s: readlink: success\n", - dp->dr_rname0)); - dp->dr_drive_id.did_flags |= MHD_DID_CINFO; - (void) memset(&dp->dr_drive_id.did_cinfo, 0, - sizeof (dp->dr_drive_id.did_cinfo)); - match_SSA100(dp, path); - } - } else { - dp->dr_drive_id.did_flags &= ~MHD_DID_CINFO; - } - - /* return success */ - (void) mhd_state_clr(dp, DRIVE_IDENTING, NULL); - return (0); -} - -/* - * disk thread - */ -static void -mhd_drive_thread( - mhd_drive_t *dp -) -{ - mhd_drive_set_t *sp = dp->dr_sp; - - /* wait for dp->dr_thread to be filled in */ - assert(sp != NULL); - mhd_mx_lock(&sp->sr_mx); - - /* forever */ - for (;;) { - /* check locks */ - assert(MUTEX_HELD(&sp->sr_mx)); - assert(dp->dr_thread == thr_self()); - - /* check for changed set */ - if (sp != dp->dr_sp) { - MHDPRINTF2(("%s: changed from set '%s' to '%s'\n", - dp->dr_rname, sp->sr_name, dp->dr_sp->sr_name)); - - mhd_mx_unlock(&sp->sr_mx); - sp = dp->dr_sp; - mhd_mx_lock(&sp->sr_mx); - } - - /* open drive, if necessary */ - if ((dp->dr_fd < 0) && (! (DRIVE_IS_IDLE(dp) || - (dp->dr_state == DRIVE_IDLING)))) { - int serial = (sp->sr_options & MHD_SERIAL); - - if (! serial) - mhd_mx_unlock(&sp->sr_mx); - dp->dr_fd = open(dp->dr_rname0, (O_RDWR|O_NDELAY), 0); - if (! serial) - mhd_mx_lock(&sp->sr_mx); - if (dp->dr_fd < 0) { - mhd_perror("%s: open", dp->dr_rname); - (void) mhd_state(dp, DRIVE_ERRORED, NULL); - dp->dr_errnum = errno; - } - continue; - } - - /* dispatch */ - switch (dp->dr_state) { - case DRIVE_IDLE: - MHDPRINTF1(("%s: IDLE\n", dp->dr_rname)); - break; - - case DRIVE_ERRORED: - MHDPRINTF1(("%s: ERRORED %d\n", - dp->dr_rname, dp->dr_errnum)); - break; - - case DRIVE_IDLING: - (void) mhd_state(dp, DRIVE_IDLE, NULL); - continue; - - case DRIVE_RESERVING: - MHDPRINTF1(("%s: RESERVING\n", dp->dr_rname)); - (void) mhd_reserve(dp); - assert(DRIVE_IS_IDLE(dp)); - continue; - - case DRIVE_FAILFASTING: - MHDPRINTF1(("%s: FAILFASTING\n", dp->dr_rname)); - (void) mhd_failfast(dp); - assert(DRIVE_IS_IDLE(dp)); - continue; - - case DRIVE_RELEASING: - MHDPRINTF1(("%s: RELEASING\n", dp->dr_rname)); - (void) mhd_release(dp); - assert(DRIVE_IS_IDLE(dp)); - continue; - - /* non-exclusive states */ - default: - assert(! (dp->dr_state & - (DRIVE_EXCLUSIVE_STATES & ~DRIVE_ERRORED))); - if (dp->dr_state & (DRIVE_PROBING | DRIVE_STATUSING)) { - MHDPRINTF1(("%s: PROBING\n", dp->dr_rname)); - (void) mhd_probe(dp); - assert(! (dp->dr_state & DRIVE_STATUSING)); - } - if (dp->dr_state & DRIVE_IDENTING) { - MHDPRINTF1(("%s: IDENTING\n", dp->dr_rname)); - (void) mhd_ident(dp); - assert(! (dp->dr_state & DRIVE_IDENTING)); - continue; /* in case we're probing */ - } - break; - } - - /* close drive, if possible */ - if ((dp->dr_fd >= 0) && (DRIVE_IS_IDLE(dp))) { - int serial = (sp->sr_options & MHD_SERIAL); - - if (! serial) - mhd_mx_unlock(&sp->sr_mx); - (void) close(dp->dr_fd); /* sd/ssd bug */ - if (! serial) - mhd_mx_lock(&sp->sr_mx); - dp->dr_fd = -1; - } - - /* wake up anybody waiting */ - mhd_cv_broadcast(&sp->sr_cv); - - /* see if anything happened */ - if (! DRIVE_IS_IDLE(dp)) - continue; - - /* wait for something to happen */ - if (! (dp->dr_state & DRIVE_PROBING)) { - mhd_cv_wait(&dp->dr_cv, &sp->sr_mx); - } else { - mhd_cv_timedwait(&dp->dr_cv, &sp->sr_mx, - (sp->sr_timeouts.mh_ff / 2)); - } - } -} - -/* - * kick off drive thread - */ -static int -mhd_thread_create( - mhd_drive_t *dp, - mhd_error_t *mhep -) -{ - mhd_drive_set_t *sp = dp->dr_sp; - thread_t thread = NULL; - int rval = 0; - - /* check lock and thread */ - assert(MUTEX_HELD(&sp->sr_mx)); - assert(dp->dr_thread == NULL); - - /* create thread */ - if (thr_create(NULL, 0, (void *(*)(void *))mhd_drive_thread, - (void *)dp, (THR_DETACHED | THR_BOUND), &thread) != 0) { - rval = mhd_error(mhep, errno, "thr_create"); - } else { - assert(thread != NULL); - dp->dr_thread = thread; - } - - /* return success */ - return (rval); -} - -/* - * peel off s%u from name - */ -static char * -diskname( - const char *sname -) -{ - char *dname; - char *p, *e; - - /* duplicate name */ - if ((dname = Strdup(sname)) == NULL) - return (NULL); - - /* gobble number and 's' */ - p = e = dname + strlen(dname) - 1; - for (; (p > dname); --p) { - if (!isdigit(*p)) - break; - } - if ((p == e) || (p <= dname)) { - Free(dname); - return (NULL); - } - if (*p-- != 's') { - Free(dname); - return (NULL); - } - if ((p <= dname) || (!isdigit(*p))) { - Free(dname); - return (NULL); - } - *(++p) = '\0'; - return (dname); -} - -/* - * create new drive - */ -mhd_drive_t * -mhd_create_drive( - mhd_drive_set_t *sp, /* new set */ - char *rname, /* raw drive name */ - int *fdp, /* open device or -1 */ - mhd_error_t *mhep /* returned error */ -) -{ - mhd_drive_t *dp = NULL; - char *rname0 = NULL; - - /* check locks */ - assert(MUTEX_HELD(&sp->sr_mx)); - - /* if drive already exists */ - if ((dp = mhd_find_drive(rname)) != NULL) { - mhd_drive_set_t *oldsp = dp->dr_sp; - - /* if set has changed, move drive */ - if (oldsp != sp) { - mhd_mx_unlock(&sp->sr_mx); - mhd_mx_lock(&oldsp->sr_mx); - if (mhd_idle(dp, mhep) != 0) { - mhd_mx_unlock(&oldsp->sr_mx); - mhd_mx_lock(&sp->sr_mx); - return (NULL); - } - mhd_del_drive_from_set(dp); - mhd_mx_unlock(&oldsp->sr_mx); - mhd_mx_lock(&sp->sr_mx); - mhd_add_drive_to_set(sp, dp); - } - - /* return drive */ - return (dp); - } - - /* build slice0 */ - rname0 = Malloc(strlen(rname) + strlen("s0") + 1); - (void) strcpy(rname0, rname); - (void) strcat(rname0, "s0"); - - /* allocate and initialize drive */ - dp = Zalloc(sizeof (*dp)); - dp->dr_sp = sp; - dp->dr_rname = Strdup(rname); - dp->dr_rname0 = rname0; - mhd_cv_init(&dp->dr_cv); - dp->dr_thread = NULL; - dp->dr_fd = -1; - dp->dr_state = DRIVE_IDLE; - - /* steal open drive */ - if ((fdp != NULL) && (*fdp >= 0)) { - dp->dr_fd = *fdp; - *fdp = -1; - } - - /* add to set */ - mhd_add_drive_to_set(sp, dp); - - /* kick off drive thread */ - if (mhd_thread_create(dp, mhep) != 0) { - Free(dp->dr_rname0); - Free(dp->dr_rname); - Free(dp); - return (NULL); - } - - /* return drive */ - return (dp); -} - -/* - * find or create drive in any set - */ -static mhd_drive_t * -mhd_create_drive_anyset( - char *rname, - int *fdp, - mhd_error_t *mhep -) -{ - mhd_drive_set_t *null_sp = mhd_create_set(NULL, 0, NULL, NULL); - mhd_drive_t *dp; - - /* check locks */ - assert(null_sp != NULL); - - /* drive already exists */ - if ((dp = mhd_find_drive(rname)) != NULL) - return (dp); - - /* add to null set */ - mhd_mx_lock(&null_sp->sr_mx); - dp = mhd_create_drive(null_sp, rname, fdp, mhep); - mhd_mx_unlock(&null_sp->sr_mx); - - /* return drive */ - return (dp); -} - -/* - * process a file in the tree walk - */ -static int -do_disk( - const char *path, - const struct stat *statp, - int type -) -{ - char *dname = NULL; - int fd = -1; - struct dk_cinfo cinfo; - mhd_error_t status = mhd_null_error; - - /* skip all but character devices */ - if ((type != FTW_F) || (! S_ISCHR(statp->st_mode)) || - ((dname = diskname(path)) == NULL)) { - return (0); - } - - /* see if drive already exists */ - if (mhd_find_drive(dname) != NULL) - return (0); - - /* see if device is a disk */ - if ((fd = open(path, (O_RDONLY|O_NDELAY), 0)) < 0) - goto out; - if (ioctl(fd, DKIOCINFO, &cinfo) != 0) { - switch (errno) { - case EINVAL: - case ENOTTY: - break; - default: - mhd_perror("DKIOCINFO: %s", path); - break; - } - goto out; - } - - /* skip CDROMs */ - if (cinfo.dki_ctype == DKC_CDROM) { - (void) close(fd); - Free(dname); - return (0); - } - - /* put disk on list */ - if (mhd_create_drive_anyset(dname, &fd, &status) == NULL) { - mhde_perror(&status, ""); - goto out; - } - - /* cleanup, return success (no matter what) */ -out: - if (dname != NULL) - Free(dname); - if (fd >= 0) - (void) close(fd); - mhd_clrerror(&status); - return (0); -} - -/* - * find or create all the drives under a given directory - */ -int -mhd_create_drives( - char *path, - mhd_error_t *mhep -) -{ - /* default */ - if ((path == NULL) || (*path == '\0')) - path = "/dev/rdsk"; - - free_pln_cache(); - - /* walk the directory, adding disks */ - if (ftw(path, do_disk, 5) != 0) - return (mhd_error(mhep, errno, path)); - - /* return success */ - return (0); -} diff --git a/usr/src/cmd/lvm/rpc.metamhd/mhd_error.c b/usr/src/cmd/lvm/rpc.metamhd/mhd_error.c deleted file mode 100644 index 5714cb36a1ca..000000000000 --- a/usr/src/cmd/lvm/rpc.metamhd/mhd_error.c +++ /dev/null @@ -1,195 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include "mhd_local.h" - -#include - -/* - * debug stuff - */ -#ifdef MHD_DEBUG -int mhd_debug = MHD_DEBUG; -#endif - -/* - * free and clear error - */ -void -mhd_clrerror( - mhd_error_t *mhep -) -{ - if (mhep->name != NULL) - Free(mhep->name); - (void) memset(mhep, 0, sizeof (*mhep)); -} - -/* - * setup error - */ -int -mhd_error( - mhd_error_t *mhep, - int errnum, - char *name -) -{ - mhd_clrerror(mhep); - if (errnum != 0) { - mhep->errnum = errnum; - if (name != NULL) - mhep->name = Strdup(name); - return (-1); - } - return (0); -} - -/* - * mhd_error_t to string - */ -static char * -mhd_strerror( - mhd_error_t *mhep -) -{ - static char buf[1024]; - char *emsg; - - switch (mhep->errnum) { - case MHD_E_MAJORITY: - return ("could not get any reservations"); - case MHD_E_RESERVED: - return ("disk is reserved"); - default: - if ((emsg = strerror(mhep->errnum)) != NULL) - return (emsg); - (void) sprintf(buf, "errno %d out of range", errno); - return (buf); - } -} - -/* - * printf-like log - */ -static void -mhd_vprintf( - const char *fmt, - va_list ap -) -{ - if (isatty(fileno(stderr))) { - static mutex_t stderr_mx = DEFAULTMUTEX; - - mhd_mx_lock(&stderr_mx); - (void) vfprintf(stderr, fmt, ap); - (void) fflush(stderr); - (void) fsync(fileno(stderr)); - mhd_mx_unlock(&stderr_mx); - } - vsyslog(LOG_ERR, fmt, ap); -} - -/*PRINTFLIKE1*/ -void -mhd_eprintf( - const char *fmt, - ... -) -{ - va_list ap; - - va_start(ap, fmt); - mhd_vprintf(fmt, ap); - va_end(ap); -} - -/* - * printf-like perror() log - */ -/*PRINTFLIKE2*/ -static void -mhd_vperror( - mhd_error_t *mhep, - const char *fmt, - va_list ap -) -{ - char buf[1024]; - char *p = buf; - size_t len = sizeof (buf); - int n; - - if ((mhep->name != NULL) && (mhep->name[0] != '\0')) { - n = snprintf(p, len, "%s: ", mhep->name); - p += n; - len -= n; - } - if ((fmt != NULL) && (*fmt != '\0')) { - n = vsnprintf(p, len, fmt, ap); - p += n; - len -= n; - n = snprintf(p, len, ": "); - p += n; - len -= n; - } - (void) snprintf(p, len, "%s", mhd_strerror(mhep)); - mhd_eprintf("%s\n", buf); -} - -/*PRINTFLIKE2*/ -void -mhde_perror( - mhd_error_t *mhep, - const char *fmt, - ... -) -{ - va_list ap; - - va_start(ap, fmt); - mhd_vperror(mhep, fmt, ap); - va_end(ap); -} - -/*PRINTFLIKE1*/ -void -mhd_perror( - const char *fmt, - ... -) -{ - va_list ap; - mhd_error_t status = mhd_null_error; - - (void) mhd_error(&status, errno, NULL); - va_start(ap, fmt); - mhd_vperror(&status, fmt, ap); - va_end(ap); - mhd_clrerror(&status); -} diff --git a/usr/src/cmd/lvm/rpc.metamhd/mhd_failfast.c b/usr/src/cmd/lvm/rpc.metamhd/mhd_failfast.c deleted file mode 100644 index 9bdc0ca1e010..000000000000 --- a/usr/src/cmd/lvm/rpc.metamhd/mhd_failfast.c +++ /dev/null @@ -1,269 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 1994, 2000 by Sun Microsystems, Inc. - * All rights reserved. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include "mhd_local.h" - -#include -#include "ff.h" - -/* - * manipulate failfast driver - */ - -/* - * disarm failfast - */ -int -mhd_ff_disarm( - mhd_drive_set_t *sp, - mhd_error_t *mhep -) -{ - struct strioctl si; - - MHDPRINTF1(("%s: disarm\n", sp->sr_name)); - - /* check locks */ - assert(MUTEX_HELD(&sp->sr_mx)); - - /* ignore not open */ - if (sp->sr_ff < 0) - return (0); - - /* disarm any existing failfast */ - (void) memset(&si, 0, sizeof (si)); - si.ic_cmd = FAILFAST_DISARM; - si.ic_timout = INFTIM; - if (ioctl(sp->sr_ff, I_STR, &si) != 0) - return (mhd_error(mhep, errno, "/dev/ff")); - - /* return success */ - return (0); -} - -/* - * open failfast - */ -int -mhd_ff_open( - mhd_drive_set_t *sp, - mhd_error_t *mhep -) -{ - struct strioctl si; - - /* check locks */ - assert(MUTEX_HELD(&sp->sr_mx)); - assert((sp->sr_ff_mode == MHD_FF_DEBUG) || - (sp->sr_ff_mode == MHD_FF_HALT) || - (sp->sr_ff_mode == MHD_FF_PANIC)); - - /* open if not already */ - if ((sp->sr_ff < 0) && - ((sp->sr_ff = open("/dev/ff", O_RDWR, 0)) < 0)) { - return (mhd_error(mhep, errno, "/dev/ff")); - } - - /* disarm any existing failfast */ - if (mhd_ff_disarm(sp, mhep) != 0) - return (-1); - - /* load setname */ - (void) memset(&si, 0, sizeof (si)); - si.ic_cmd = FAILFAST_SETNAME; - si.ic_timout = INFTIM; - si.ic_len = strlen(sp->sr_name); - si.ic_dp = sp->sr_name; - if (ioctl(sp->sr_ff, I_STR, &si) != 0) - return (mhd_error(mhep, errno, "/dev/ff")); - - /* load failfast mode */ - (void) memset(&si, 0, sizeof (si)); - switch (sp->sr_ff_mode) { - case MHD_FF_DEBUG: - si.ic_cmd = FAILFAST_DEBUG_MODE; - break; - case MHD_FF_HALT: - si.ic_cmd = FAILFAST_HALT_MODE; - break; - default: - assert(0); - /* FALLTHROUGH */ - case MHD_FF_PANIC: - si.ic_cmd = FAILFAST_PANIC_MODE; - break; - } - si.ic_timout = INFTIM; - if (ioctl(sp->sr_ff, I_STR, &si) != 0) - return (mhd_error(mhep, errno, "/dev/ff")); - - /* return success */ - return (0); -} - -/* - * close failfast - */ -int -mhd_ff_close( - mhd_drive_set_t *sp, - mhd_error_t *mhep -) -{ - int rval = 0; - - /* check locks */ - assert(MUTEX_HELD(&sp->sr_mx)); - - /* ignore not open */ - if (sp->sr_ff < 0) - return (0); - - /* disarm any existing failfast */ - if (mhd_ff_disarm(sp, mhep) != 0) - rval = -1; - - /* close device */ - if (close(sp->sr_ff) != 0) - rval = mhd_error(mhep, errno, "/dev/ff"); - sp->sr_ff = -1; - - /* return success */ - return (rval); -} - -/* - * reset failfast - */ -int -mhd_ff_rearm( - mhd_drive_set_t *sp, - mhd_error_t *mhep -) -{ - uint_t ff = sp->sr_timeouts.mh_ff; - struct strioctl si; - - MHDPRINTF1(("%s: rearm\n", sp->sr_name)); - - /* check locks */ - assert(MUTEX_HELD(&sp->sr_mx)); - assert(sp->sr_ff >= 0); - - /* if timeout is 0, disarm */ - if (ff == 0) - return (mhd_ff_disarm(sp, mhep)); - - /* rearm failfast */ - (void) memset(&si, 0, sizeof (si)); - si.ic_cmd = FAILFAST_ARM; - si.ic_timout = INFTIM; - si.ic_len = sizeof (ff); - si.ic_dp = (char *)&ff; - if (ioctl(sp->sr_ff, I_STR, &si) != 0) - return (mhd_error(mhep, errno, "/dev/ff")); - - /* return success */ - return (0); -} - -/* - * die right now - */ -void -mhd_ff_die( - mhd_drive_set_t *sp -) -{ - uint_t ff = 0; - struct strioctl si; - - MHDPRINTF(("%s: die\n", sp->sr_name)); - - /* check locks */ - assert(MUTEX_HELD(&sp->sr_mx)); - assert(sp->sr_ff >= 0); - - /* rearm failfast for now */ - (void) memset(&si, 0, sizeof (si)); - si.ic_cmd = FAILFAST_ARM; - si.ic_timout = INFTIM; - si.ic_len = sizeof (ff); - si.ic_dp = (char *)&ff; - if (ioctl(sp->sr_ff, I_STR, &si) != 0) - mhd_perror("/dev/ff"); -} - -/* - * check set and reset failfast - */ -void -mhd_ff_check( - mhd_drive_set_t *sp -) -{ - mhd_drive_list_t *dlp = &sp->sr_drives; - mhd_msec_t ff = sp->sr_timeouts.mh_ff; - mhd_msec_t now = mhd_time(); - uint_t i, ok, cnt; - - /* check locks */ - assert(MUTEX_HELD(&sp->sr_mx)); - assert(sp->sr_ff >= 0); - assert((sp->sr_ff_mode == MHD_FF_DEBUG) || - (sp->sr_ff_mode == MHD_FF_HALT) || - (sp->sr_ff_mode == MHD_FF_PANIC)); - - /* see how many drives are within alloted time */ - for (ok = cnt = 0, i = 0; (i < dlp->dl_ndrive); ++i) { - mhd_drive_t *dp = dlp->dl_drives[i]; - - if (dp->dr_state != DRIVE_PROBING) - continue; - ++cnt; - - MHDPRINTF2(("%s: now %llu dr_time %llu diff %llu ff %llu\n", - dp->dr_rname, now, dp->dr_time, (now - dp->dr_time), ff)); - if ((now - dp->dr_time) <= ff) - ++ok; - } - - /* check for majority */ - if ((cnt == 0) || (ok >= ((cnt / 2) + 1))) { - mhd_error_t status = mhd_null_error; - - if (mhd_ff_rearm(sp, &status) == 0) - return; - mhd_clrerror(&status); - } - - /* die */ - mhd_eprintf("%s: failed majority cnt %d ok %d\n", - sp->sr_name, cnt, ok); - mhd_ff_die(sp); -} diff --git a/usr/src/cmd/lvm/rpc.metamhd/mhd_freeresult.c b/usr/src/cmd/lvm/rpc.metamhd/mhd_freeresult.c deleted file mode 100644 index 443fdb620e81..000000000000 --- a/usr/src/cmd/lvm/rpc.metamhd/mhd_freeresult.c +++ /dev/null @@ -1,41 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 1994, 2000 by Sun Microsystems, Inc. - * All rights reserved. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include "mhd_local.h" - -/*ARGSUSED*/ -bool_t -metamhd_1_freeresult( - SVCXPRT *unused, - xdrproc_t xdr_result, - caddr_t result -) -{ - xdr_free(xdr_result, result); - return (TRUE); -} diff --git a/usr/src/cmd/lvm/rpc.metamhd/mhd_init.c b/usr/src/cmd/lvm/rpc.metamhd/mhd_init.c deleted file mode 100644 index d0c47e0b4917..000000000000 --- a/usr/src/cmd/lvm/rpc.metamhd/mhd_init.c +++ /dev/null @@ -1,361 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include "mhd_local.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -extern void nc_perror(const char *msg); - -/* daemon name */ -static char *myname = "rpc.metamhd"; - -/* - * reset and exit daemon - */ -void -mhd_exit( - int eval -) -{ - /* log exit */ - mhd_eprintf("exiting with %d\n", eval); - - /* exit with value */ - exit(eval); -} - -/* - * signal catchers - */ -static void -mhd_catcher( - int sig -) -{ - char buf[128]; - char *msg; - - /* log signal */ - if ((msg = strsignal(sig)) == NULL) { - (void) sprintf(buf, "unknown signal %d", sig); - msg = buf; - } - mhd_eprintf("%s\n", msg); - - /* let default handler do it's thing */ - (void) sigset(sig, SIG_DFL); - if (kill(getpid(), sig) != 0) { - mhd_perror("kill(getpid())"); - mhd_exit(-sig); - } -} - -/* - * initialize daemon - */ -static int -mhd_setup( - mhd_error_t *mhep -) -{ - struct rlimit rlimit; - pcinfo_t pcinfo; - pcparms_t pcparms; - rtparms_t *rtparmsp = (rtparms_t *)pcparms.pc_clparms; - - /* catch common signals */ - if ((sigset(SIGHUP, mhd_catcher) == SIG_ERR) || - (sigset(SIGINT, mhd_catcher) == SIG_ERR) || - (sigset(SIGABRT, mhd_catcher) == SIG_ERR) || - (sigset(SIGBUS, mhd_catcher) == SIG_ERR) || - (sigset(SIGSEGV, mhd_catcher) == SIG_ERR) || - (sigset(SIGPIPE, mhd_catcher) == SIG_ERR) || - (sigset(SIGTERM, mhd_catcher) == SIG_ERR)) { - return (mhd_error(mhep, errno, "sigset")); - } - - /* ignore SIGHUP (used in mhd_cv_timedwait) */ - if (sigset(SIGALRM, SIG_IGN) == SIG_ERR) { - return (mhd_error(mhep, errno, "sigset")); - } - - /* increase number of file descriptors */ - (void) memset(&rlimit, 0, sizeof (rlimit)); - if (getrlimit(RLIMIT_NOFILE, &rlimit) != 0) - return (mhd_error(mhep, errno, "getrlimit(RLIMIT_NOFILE)")); - rlimit.rlim_cur = rlimit.rlim_max = 1024; - if (setrlimit(RLIMIT_NOFILE, &rlimit) != 0) - return (mhd_error(mhep, errno, "setrlimit(RLIMIT_NOFILE)")); - (void) enable_extended_FILE_stdio(-1, -1); - - /* set default RT priority */ - (void) memset(&pcinfo, 0, sizeof (pcinfo)); - (void) strncpy(pcinfo.pc_clname, "RT", sizeof (pcinfo.pc_clname)); - if (priocntl(0, 0, PC_GETCID, (caddr_t)&pcinfo) < 0) - return (mhd_error(mhep, errno, "priocntl(PC_GETCID): \"RT\"")); - (void) memset(&pcparms, 0, sizeof (pcparms)); - pcparms.pc_cid = pcinfo.pc_cid; - rtparmsp->rt_pri = RT_NOCHANGE; - rtparmsp->rt_tqsecs = (ulong_t)RT_NOCHANGE; - rtparmsp->rt_tqnsecs = RT_NOCHANGE; - if (priocntl(P_PID, getpid(), PC_SETPARMS, (caddr_t)&pcparms) != 0) - return (mhd_error(mhep, errno, "priocntl(PC_SETPARMS)")); - - /* return success */ - return (0); -} - -/* - * (re)initalize daemon - */ -static int -mhd_init_daemon( - mhd_error_t *mhep -) -{ - static int already = 0; - - /* setup */ - if (! already) { - if (mhd_setup(mhep) != 0) - return (-1); - openlog(myname, LOG_CONS, LOG_DAEMON); - already = 1; - } - - /* return success */ - return (0); -} - -/* - * get my nodename - */ -static char * -mynodename() -{ - static struct utsname myuname; - static int done = 0; - - if (! done) { - if (uname(&myuname) == -1) { - mhd_perror("uname"); - assert(0); - } - done = 1; - } - return (myuname.nodename); -} - -/* - * check for trusted host and user - */ -static int -check_host( - struct svc_req *rqstp /* RPC stuff */ -) -{ - struct authsys_parms *sys_credp; - SVCXPRT *transp = rqstp->rq_xprt; - struct netconfig *nconfp = NULL; - struct nd_hostservlist *hservlistp = NULL; - int i; - int rval = -1; - - /* check for root */ - /*LINTED*/ - sys_credp = (struct authsys_parms *)rqstp->rq_clntcred; - assert(sys_credp != NULL); - if (sys_credp->aup_uid != 0) - goto out; - - /* get hostnames */ - if (transp->xp_netid == NULL) { - mhd_eprintf("transp->xp_netid == NULL\n"); - goto out; - } - if ((nconfp = getnetconfigent(transp->xp_netid)) == NULL) { -#ifdef DEBUG - nc_perror("getnetconfigent(transp->xp_netid)"); -#endif - goto out; - } - if ((__netdir_getbyaddr_nosrv(nconfp, &hservlistp, &transp->xp_rtaddr) - != 0) || (hservlistp == NULL)) { -#ifdef DEBUG - netdir_perror("netdir_getbyaddr(transp->xp_rtaddr)"); -#endif - goto out; - } - - /* check hostnames */ - for (i = 0; (i < hservlistp->h_cnt); ++i) { - struct nd_hostserv *hservp = &hservlistp->h_hostservs[i]; - char *hostname = hservp->h_host; - - /* localhost is OK */ - if (strcmp(hostname, mynodename()) == 0) { - rval = 0; - goto out; - } - - /* check for remote root access */ - if (ruserok(hostname, 1, "root", "root") == 0) { - rval = 0; - goto out; - } - } - - /* cleanup, return success */ -out: - if (hservlistp != NULL) - netdir_free(hservlistp, ND_HOSTSERVLIST); - if (nconfp != NULL) - Free(nconfp); - return (rval); -} - -/* - * check for user in local group 14 - */ -static int -check_gid14( - uid_t uid -) -{ - struct passwd *pwp; - struct group *grp; - char **namep; - - /* get user info, check default GID */ - if ((pwp = getpwuid(uid)) == NULL) - return (-1); - if (pwp->pw_gid == METAMHD_GID) - return (0); - - /* check in group */ - if ((grp = getgrgid(METAMHD_GID)) == NULL) - return (-1); - for (namep = grp->gr_mem; ((*namep != NULL) && (**namep != '\0')); - ++namep) { - if (strcmp(*namep, pwp->pw_name) == 0) - return (0); - } - return (-1); -} - -/* - * check AUTH_SYS - */ -static int -check_sys( - struct svc_req *rqstp, /* RPC stuff */ - int amode, /* R_OK | W_OK */ - mhd_error_t *mhep /* returned status */ -) -{ - static mutex_t mx = DEFAULTMUTEX; - struct authsys_parms *sys_credp; - - /* for read, anything is OK */ - if (! (amode & W_OK)) - return (0); - - /* single thread (not really needed if daemon stays single threaded) */ - (void) mutex_lock(&mx); - - /* check for remote root or METAMHD_GID */ - /*LINTED*/ - sys_credp = (struct authsys_parms *)rqstp->rq_clntcred; - if ((check_gid14(sys_credp->aup_uid) == 0) || - (check_host(rqstp) == 0)) { - (void) mutex_unlock(&mx); - return (0); - } - - /* return failure */ - (void) mutex_unlock(&mx); - return (mhd_error(mhep, EACCES, myname)); -} - -/* - * setup RPC service - * - * if can't authenticate return < 0 - * if any other error return > 0 - */ -int -mhd_init( - struct svc_req *rqstp, /* RPC stuff */ - int amode, /* R_OK | W_OK */ - mhd_error_t *mhep /* returned status */ -) -{ - SVCXPRT *transp = rqstp->rq_xprt; - - /* - * initialize - */ - (void) memset(mhep, 0, sizeof (*mhep)); - - /* - * check credentials - */ - switch (rqstp->rq_cred.oa_flavor) { - - /* UNIX flavor */ - case AUTH_SYS: - { - if (check_sys(rqstp, amode, mhep) != 0) - return (1); /* error */ - break; - } - - /* can't authenticate anything else */ - default: - svcerr_weakauth(transp); - return (-1); /* weak authentication */ - - } - - /* - * (re)initialize - */ - if (mhd_init_daemon(mhep) != 0) - return (1); /* error */ - - /* return success */ - return (0); -} diff --git a/usr/src/cmd/lvm/rpc.metamhd/mhd_local.h b/usr/src/cmd/lvm/rpc.metamhd/mhd_local.h deleted file mode 100644 index 41cdd66a86e1..000000000000 --- a/usr/src/cmd/lvm/rpc.metamhd/mhd_local.h +++ /dev/null @@ -1,227 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _MHD_LOCAL_H -#define _MHD_LOCAL_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * millisecond time - */ -typedef u_longlong_t mhd_msec_t; - -/* - * drive record - */ -typedef uint_t mhd_state_t; -#define DRIVE_IDLE 0x0000 /* exclusive state */ -#define DRIVE_ERRORED 0x0001 /* exclusive state */ -#define DRIVE_IDLING 0x0002 /* exclusive state */ -#define DRIVE_RESERVING 0x0004 /* exclusive state */ -#define DRIVE_FAILFASTING 0x0008 /* exclusive state */ -#define DRIVE_RELEASING 0x0010 /* exclusive state */ -#define DRIVE_EXCLUSIVE_STATES 0x00ff /* all exclusive states */ -#define DRIVE_PROBING 0x0100 -#define DRIVE_STATUSING 0x0200 -#define DRIVE_SERIALING 0x0400 -#define DRIVE_VTOCING 0x0800 -#define DRIVE_CINFOING 0x1000 -#define DRIVE_IDENTING (DRIVE_SERIALING | DRIVE_VTOCING | \ - DRIVE_CINFOING) -#define DRIVE_IS_IDLE(dp) (((dp)->dr_state == DRIVE_IDLE) || \ - ((dp)->dr_state == DRIVE_ERRORED)) -typedef struct mhd_drive { - struct mhd_drive_set *dr_sp; /* back pointer to set */ - char *dr_rname; /* raw device name */ - char *dr_rname0; /* slice 0 raw device name */ - cond_t dr_cv; /* synchronization */ - thread_t dr_thread; /* daemon thread */ - int dr_fd; /* open slice 0 */ - mhd_state_t dr_state; /* drive state */ - int dr_errnum; /* errno for DRIVE_ERRORED */ - mhd_msec_t dr_time; /* last successful probe time */ - mhd_drive_id_t dr_drive_id; /* unique drive identifier */ -} mhd_drive_t; - -/* - * drive list - */ -typedef struct mhd_drive_list { - mhd_drive_t **dl_drives; /* allocated list */ - size_t dl_alloc; /* amount allocated */ - size_t dl_ndrive; /* amount used */ -} mhd_drive_list_t; -#define MHD_NULL_LIST { NULL, 0, 0 } - -/* - * drive set - */ -typedef struct mhd_drive_set { - char *sr_name; /* set name */ - mutex_t sr_mx; /* set mutex */ - cond_t sr_cv; /* synchronization */ - mhd_opts_t sr_options; /* common options */ - mhd_mhiargs_t sr_timeouts; /* reservation timeouts */ - mhd_ff_mode_t sr_ff_mode; /* failfast mode */ - int sr_ff; /* failfast device descriptor */ - mhd_drive_list_t sr_drives; /* drives in set */ -} mhd_drive_set_t; - -/* - * debug stuff - */ -#define MHD_DEBUG 0 -#ifdef MHD_DEBUG -extern int mhd_debug; -#define MHDPRINTF(n) if (mhd_debug > 0) mhd_eprintf n -#define MHDPRINTF1(n) if (mhd_debug > 1) mhd_eprintf n -#define MHDPRINTF2(n) if (mhd_debug > 2) mhd_eprintf n -#else /* ! MHD_DEBUG */ -#define MHDPRINTF(n) -#define MHDPRINTF1(n) -#define MHDPRINTF2(n) -#endif /* ! MHD_DEBUG */ - -/* - * extern functions - */ -/* mhd_drive.c */ -extern const mhd_drive_list_t mhd_null_list; -extern void mhd_add_drive(mhd_drive_list_t *dlp, mhd_drive_t *dp); -extern void mhd_del_drive(mhd_drive_list_t *dlp, mhd_drive_t *dp); -extern void mhd_free_list(mhd_drive_list_t *dlp); -extern int mhd_state(mhd_drive_t *dp, mhd_state_t new_state, - mhd_error_t *mhep); -extern int mhd_state_set(mhd_drive_t *dp, mhd_state_t new_state, - mhd_error_t *mhep); -extern int mhd_idle(mhd_drive_t *dp, mhd_error_t *mhep); -extern mhd_drive_t *mhd_create_drive(mhd_drive_set_t *defaultsp, - char *rname, int *fdp, mhd_error_t *mhep); -extern int mhd_create_drives(char *path, mhd_error_t *mhep); - -/* mhd_error.c */ -extern void mhd_clrerror(mhd_error_t *mhep); -extern int mhd_error(mhd_error_t *mhep, int errnum, char *name); -/*PRINTFLIKE2*/ -extern void mhde_perror(mhd_error_t *mhep, const char *fmt, ...); -/*PRINTFLIKE1*/ -extern void mhd_perror(const char *fmt, ...); -/*PRINTFLIKE1*/ -extern void mhd_eprintf(const char *fmt, ...); - -/* mhd_failfast.c */ -extern int mhd_ff_disarm(mhd_drive_set_t *sp, mhd_error_t *mhep); -extern int mhd_ff_open(mhd_drive_set_t *sp, mhd_error_t *mhep); -extern int mhd_ff_close(mhd_drive_set_t *sp, mhd_error_t *mhep); -extern int mhd_ff_rearm(mhd_drive_set_t *sp, mhd_error_t *mhep); -extern void mhd_ff_die(mhd_drive_set_t *sp); -extern void mhd_ff_check(mhd_drive_set_t *sp); - -/* mhd_init.c */ -extern void mhd_exit(int eval); -extern int mhd_init(struct svc_req *rqstp, int amode, - mhd_error_t *mhep); - -/* mhd_ioctl.c */ -extern int tk_own(mhd_set_t *mhsp, mhd_error_t *mhep); -extern int rel_own(mhd_set_t *mhsp, mhd_error_t *mhep); -extern int get_status(mhd_status_args_t *argsp, - mhd_status_res_t *resp); - -/* mhd_mem.c */ -extern void *Malloc(size_t s); -extern void *Zalloc(size_t s); -extern void *Realloc(void *p, size_t s); -extern void *Calloc(size_t n, size_t s); -extern char *Strdup(const char *p); -extern void Free(void *p); - -/* mhd_set.c */ -extern void mhd_add_drive_to_set(mhd_drive_set_t *sp, - mhd_drive_t *dp); -extern void mhd_del_drive_from_set(mhd_drive_t *dp); -extern mhd_drive_set_t *mhd_create_set(mhd_set_t *mhsp, mhd_opts_t options, - mhd_drive_list_t *dlp, mhd_error_t *mhep); -extern mhd_drive_t *mhd_find_drive(char *rname); -extern int mhd_list_drives(char *path, mhd_did_flags_t flags, - mhd_list_res_t *resultsp, mhd_error_t *mhep); -extern int mhd_release_drives(mhd_set_t *mhsp, mhd_opts_t options, - mhd_error_t *mhep); -extern int mhd_reserve_drives(mhd_set_t *mhsp, - mhd_mhiargs_t *timeoutp, mhd_ff_mode_t ff_mode, - mhd_opts_t options, mhd_error_t *mhep); -extern int mhd_status_drives(mhd_set_t *mhsp, mhd_opts_t options, - mhd_drive_status_t **status, mhd_error_t *mhep); - -/* mhd_synch.c */ -extern void mhd_cv_init(cond_t *cvp); -extern void mhd_cv_destroy(cond_t *cvp); -extern void mhd_cv_wait(cond_t *cvp, mutex_t *mp); -extern void mhd_cv_timedwait(cond_t *cvp, mutex_t *mp, - mhd_msec_t to); -extern void mhd_cv_broadcast(cond_t *cvp); -extern void mhd_mx_init(mutex_t *mp); -extern void mhd_mx_destroy(mutex_t *mp); -extern void mhd_mx_lock(mutex_t *mp); -extern void mhd_mx_unlock(mutex_t *mp); -extern void mhd_rw_rdlock(rwlock_t *rwlp); -extern void mhd_rw_wrlock(rwlock_t *rwlp); -extern void mhd_rw_unlock(rwlock_t *rwlp); - -/* mhd_time.c */ -extern mhd_msec_t mhd_time(); - -#ifdef __cplusplus -} -#endif - -#endif /* _MHD_LOCAL_H */ diff --git a/usr/src/cmd/lvm/rpc.metamhd/mhd_mem.c b/usr/src/cmd/lvm/rpc.metamhd/mhd_mem.c deleted file mode 100644 index c7e6855a4adc..000000000000 --- a/usr/src/cmd/lvm/rpc.metamhd/mhd_mem.c +++ /dev/null @@ -1,110 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 1994, 2000 by Sun Microsystems, Inc. - * All rights reserved. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include "mhd_local.h" - -void -Free( - void *p -) -{ - free(p); -} - -void * -Malloc( - size_t s -) -{ - void *mem; - - if ((mem = malloc(s)) == NULL) { - mhd_perror(""); - mhd_exit(1); - } - return (mem); -} - -void * -Zalloc( - size_t s -) -{ - return (memset(Malloc(s), 0, s)); -} - -void * -Realloc( - void *p, - size_t s -) -{ - if (p == NULL) - p = malloc(s); - else - p = realloc(p, s); - if (p == NULL) { - mhd_perror(""); - mhd_exit(1); - } - return (p); -} - -void * -Calloc( - size_t n, - size_t s -) -{ - unsigned long total; - - if (n == 0 || s == 0) { - total = 0; - } else { - total = (unsigned long)n * s; - /* check for overflow */ - if (total / n != s) - return (NULL); - } - return (Zalloc(total)); -} - -char * -Strdup( - const char *p -) -{ - char *n; - - if ((n = strdup(p)) == NULL) { - mhd_perror(""); - mhd_exit(1); - } - return (n); -} diff --git a/usr/src/cmd/lvm/rpc.metamhd/mhd_metamhd.c b/usr/src/cmd/lvm/rpc.metamhd/mhd_metamhd.c deleted file mode 100644 index fd00a758bfe7..000000000000 --- a/usr/src/cmd/lvm/rpc.metamhd/mhd_metamhd.c +++ /dev/null @@ -1,137 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 1994, 2000 by Sun Microsystems, Inc. - * All rights reserved. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include "mhd_local.h" - -/* - * list drives - */ -/*ARGSUSED*/ -bool_t -mhd_list_1_svc( - mhd_list_args_t *argp, - mhd_list_res_t *resp, - struct svc_req *rqstp /* RPC stuff */ -) -{ - mhd_error_t *mhep = &resp->status; - int err; - - /* setup, check permissions */ - (void) memset(resp, 0, sizeof (*resp)); - if ((err = mhd_init(rqstp, R_OK, mhep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - /* doit */ - (void) mhd_list_drives(argp->path, argp->flags, resp, mhep); - return (TRUE); -} - -/* - * take ownership of drives - */ -/*ARGSUSED*/ -bool_t -mhd_tkown_1_svc( - mhd_tkown_args_t *argp, - mhd_error_t *mhep, - struct svc_req *rqstp /* RPC stuff */ -) -{ - int err; - - /* setup, check permissions */ - if ((err = mhd_init(rqstp, W_OK, mhep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - /* doit */ - (void) mhd_reserve_drives(&argp->set, &argp->timeouts, argp->ff_mode, - argp->options, mhep); - return (TRUE); -} - -/* - * release ownership of drives - */ -/*ARGSUSED*/ -bool_t -mhd_relown_1_svc( - mhd_relown_args_t *argp, - mhd_error_t *mhep, - struct svc_req *rqstp /* RPC stuff */ -) -{ - int err; - - /* setup, check permissions */ - if ((err = mhd_init(rqstp, W_OK, mhep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - /* doit */ - (void) mhd_release_drives(&argp->set, argp->options, mhep); - return (TRUE); -} - -/* - * status drives - */ -/*ARGSUSED*/ -bool_t -mhd_status_1_svc( - mhd_status_args_t *argp, - mhd_status_res_t *resp, - struct svc_req *rqstp /* RPC stuff */ -) -{ - mhd_error_t *mhep = &resp->status; - mhd_drive_status_t *status = NULL; - int cnt; - int err; - - /* setup, check permissions */ - (void) memset(resp, 0, sizeof (*resp)); - if ((err = mhd_init(rqstp, W_OK, mhep)) < 0) - return (FALSE); - else if (err != 0) - return (TRUE); - - /* doit */ - if ((cnt = mhd_status_drives(&argp->set, argp->options, - &status, mhep)) < 0) { - return (TRUE); - } - resp->results.results_len = cnt; - resp->results.results_val = status; - return (TRUE); -} diff --git a/usr/src/cmd/lvm/rpc.metamhd/mhd_set.c b/usr/src/cmd/lvm/rpc.metamhd/mhd_set.c deleted file mode 100644 index b949051612ee..000000000000 --- a/usr/src/cmd/lvm/rpc.metamhd/mhd_set.c +++ /dev/null @@ -1,815 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include "mhd_local.h" - -/* - * manipulate set list - */ - -/* - * global set list - */ -static mutex_t mhd_set_mx = DEFAULTMUTEX; -static uint_t mhd_nset = 0; -static mhd_drive_set_t **mhd_sets = NULL; - -/* - * add drive to set - */ -void -mhd_add_drive_to_set( - mhd_drive_set_t *sp, - mhd_drive_t *dp -) -{ - mhd_drive_list_t *dlp = &sp->sr_drives; - - /* check locks */ - assert(MUTEX_HELD(&mhd_set_mx)); - assert(MUTEX_HELD(&sp->sr_mx)); - assert(DRIVE_IS_IDLE(dp)); - - /* add to set */ - mhd_add_drive(dlp, dp); - - /* adjust backlink */ - dp->dr_sp = sp; -} - -/* - * delete drive from set - */ -void -mhd_del_drive_from_set( - mhd_drive_t *dp -) -{ - mhd_drive_set_t *sp = dp->dr_sp; - mhd_drive_list_t *dlp = &sp->sr_drives; - - /* check locks */ - assert(MUTEX_HELD(&mhd_set_mx)); - assert(MUTEX_HELD(&sp->sr_mx)); - assert(DRIVE_IS_IDLE(dp)); - - /* delete from set */ - mhd_del_drive(dlp, dp); - - /* adjust backlink */ - dp->dr_sp = NULL; -} - -/* - * find set in list - */ -static mhd_drive_set_t * -mhd_find_set( - char *setname -) -{ - uint_t i; - - /* check lock */ - assert(MUTEX_HELD(&mhd_set_mx)); - - /* look for set */ - for (i = 0; (i < mhd_nset); ++i) { - mhd_drive_set_t *sp = mhd_sets[i]; - - if (strcmp(setname, sp->sr_name) == 0) - return (sp); - } - - /* not found */ - return (NULL); -} - -/* - * wait for operation to complete - */ -static void -mhd_wait_set( - mhd_drive_set_t *sp, - mhd_drive_list_t *dlp, - mhd_state_t state -) -{ - /* check lock */ - assert(MUTEX_HELD(&mhd_set_mx)); - assert(MUTEX_HELD(&sp->sr_mx)); - - /* wait for complete */ - for (;;) { - uint_t cnt = 0; - uint_t i; - - /* kick threads */ - for (i = 0; (i < dlp->dl_ndrive); ++i) { - mhd_drive_t *dp = dlp->dl_drives[i]; - - /* IDLE or ERRORED */ - if (state == DRIVE_IDLE) { - if (DRIVE_IS_IDLE(dp)) - continue; - } - - /* operation complete */ - else { - if (! (dp->dr_state & state)) - continue; - } - - /* kick thread */ - mhd_cv_broadcast(&dp->dr_cv); - ++cnt; - } - - /* if complete, quit */ - if (cnt == 0) - break; - - /* wait for something to happen */ - (void) mhd_cv_wait(&sp->sr_cv, &sp->sr_mx); - } -} - -/* - * idle set - */ -static int -mhd_idle_set( - mhd_drive_set_t *sp, - mhd_drive_list_t *dlp, - mhd_error_t *mhep -) -{ - uint_t i; - - /* check lock */ - assert(MUTEX_HELD(&mhd_set_mx)); - assert(MUTEX_HELD(&sp->sr_mx)); - - /* disarm any failfast */ - if (dlp->dl_ndrive >= sp->sr_drives.dl_ndrive) { - if (mhd_ff_disarm(sp, mhep) != 0) - return (-1); - } - - /* set IDLING */ - for (i = 0; (i < dlp->dl_ndrive); ++i) { - mhd_drive_t *dp = dlp->dl_drives[i]; - - if (! DRIVE_IS_IDLE(dp)) { - if (mhd_state(dp, DRIVE_IDLING, mhep) != 0) - return (-1); - } - } - - /* wait for IDLE */ - mhd_wait_set(sp, dlp, DRIVE_IDLE); - - /* return success */ - return (0); -} - -/* - * create or update new set - */ -mhd_drive_set_t * -mhd_create_set( - mhd_set_t *mhsp, - mhd_opts_t options, - mhd_drive_list_t *dlp, - mhd_error_t *mhep -) -{ - char *setname; - mhd_drive_set_t *sp; - mhd_drive_list_t *sp_dlp; - mhd_drive_set_t *null_sp; - uint_t i; - - /* check locks */ - assert(MUTEX_HELD(&mhd_set_mx)); - - /* get setname */ - if (mhsp == NULL) - setname = ""; - else - setname = mhsp->setname; - - /* find or create set */ - if ((sp = mhd_find_set(setname)) == NULL) { - /* allocate and initialize set */ - sp = Zalloc(sizeof (*sp)); - sp->sr_name = Strdup(setname); - mhd_mx_init(&sp->sr_mx); - mhd_cv_init(&sp->sr_cv); - sp->sr_ff = -1; - - /* append to set list */ - ++mhd_nset; - mhd_sets = Realloc(mhd_sets, (mhd_nset * sizeof (*mhd_sets))); - mhd_sets[mhd_nset - 1] = sp; - } - sp_dlp = &sp->sr_drives; - - /* if just grabbing null set, return */ - if (mhsp == NULL) - return (sp); - assert(strcmp(setname, "") != 0); - assert(mhep != NULL); - - /* get null set */ - null_sp = mhd_create_set(NULL, 0, NULL, NULL); - assert(null_sp != NULL); - assert(sp != null_sp); - - /* grab set lock */ - mhd_mx_lock(&sp->sr_mx); - - /* save options */ - if (options & MHD_SERIAL) - sp->sr_options |= MHD_SERIAL; - else - sp->sr_options &= ~MHD_SERIAL; - - /* move drives no longer in set to null set */ - if (! (options & MHD_PARTIAL_SET)) { - for (i = 0; (i < sp_dlp->dl_ndrive); /* void */) { - mhd_drive_t *dp = sp_dlp->dl_drives[i]; - uint_t j; - - /* check still there */ - for (j = 0; (j < mhsp->drives.drives_len); ++j) { - mhd_drivename_t mhdp; - - mhdp = mhsp->drives.drives_val[j]; - if (strcmp(dp->dr_rname, mhdp) == 0) - break; - } - if (j < mhsp->drives.drives_len) { - ++i; - continue; - } - - /* idle the drive */ - if (mhd_idle(dp, mhep) != 0) - mhd_clrerror(mhep); - - /* move to null set */ - mhd_del_drive_from_set(dp); - mhd_mx_unlock(&sp->sr_mx); - mhd_mx_lock(&null_sp->sr_mx); - mhd_add_drive_to_set(null_sp, dp); - mhd_mx_unlock(&null_sp->sr_mx); - mhd_mx_lock(&sp->sr_mx); - } - } - - /* add new drives to lists */ - for (i = 0; (i < mhsp->drives.drives_len); ++i) { - mhd_drivename_t mhdp = mhsp->drives.drives_val[i]; - uint_t j; - mhd_drive_t *dp; - - /* check already there */ - for (j = 0; (j < dlp->dl_ndrive); ++j) { - dp = dlp->dl_drives[j]; - if (strcmp(mhdp, dp->dr_rname) == 0) - break; - } - if (j < dlp->dl_ndrive) { - mhd_add_drive(dlp, dp); - continue; - } - - /* add drive to set */ - if ((dp = mhd_create_drive(sp, mhdp, NULL, mhep)) == NULL) { - mhde_perror(mhep, "mhd_create_drive: %s", mhdp); - continue; - } - mhd_add_drive(dlp, dp); - } - - /* debug */ -#ifdef MHD_DEBUG - if (mhd_debug > 0) { - for (i = 0; (i < mhd_nset); ++i) { - mhd_drive_set_t *sp = mhd_sets[i]; - mhd_drive_list_t *dlp = &sp->sr_drives; - char buf[10240]; - uint_t j; - - (void) snprintf(buf, sizeof (buf), "set '%s':", - sp->sr_name); - for (j = 0; (j < dlp->dl_ndrive); ++j) { - mhd_drive_t *dp = dlp->dl_drives[j]; - char *p; - - if ((p = strrchr(dp->dr_rname, '/')) != NULL) - ++p; - else - p = dp->dr_rname; - (void) strncat(buf, " ", sizeof (buf)); - (void) strncat(buf, p, sizeof (buf)); - } - buf[sizeof (buf) - 1] = '\0'; - mhd_eprintf("%s\n", buf); - } - } -#endif /* MHD_DEBUG */ - - /* unlock, return set */ - mhd_mx_unlock(&sp->sr_mx); - return (sp); -} - -/* - * find drive - */ -mhd_drive_t * -mhd_find_drive( - char *rname -) -{ - uint_t i; - - /* check locks */ - assert(MUTEX_HELD(&mhd_set_mx)); - - /* for each set */ - for (i = 0; (i < mhd_nset); ++i) { - mhd_drive_set_t *sp = mhd_sets[i]; - mhd_drive_list_t *dlp = &sp->sr_drives; - uint_t j; - - /* for each drive */ - for (j = 0; (j < dlp->dl_ndrive); ++j) { - mhd_drive_t *dp = dlp->dl_drives[j]; - - if (strcmp(rname, dp->dr_rname) == 0) - return (dp); - } - } - - /* not found */ - return (NULL); -} - -/* - * list all the drives - */ -int -mhd_list_drives( - char *path, - mhd_did_flags_t flags, - mhd_list_res_t *resultsp, - mhd_error_t *mhep -) -{ - mhd_state_t state; - uint_t ndrive, i, j, c; - - /* grab lock */ - mhd_mx_lock(&mhd_set_mx); - - /* add path to list */ - if (mhd_create_drives(path, mhep) != 0) { - mhd_mx_unlock(&mhd_set_mx); - return (-1); - } - - /* get what we want */ - state = 0; - if (flags & MHD_DID_SERIAL) - state |= DRIVE_SERIALING; - if (flags & MHD_DID_TIME) - state |= DRIVE_VTOCING; - if (flags & MHD_DID_CINFO) - state |= DRIVE_CINFOING; - - /* ident and count drives */ - for (ndrive = 0, i = 0; (i < mhd_nset); ++i) { - mhd_drive_set_t *sp = mhd_sets[i]; - mhd_drive_list_t *dlp = &sp->sr_drives; - - /* count drives */ - ndrive += dlp->dl_ndrive; - - /* ident drives */ - if (state != 0) { - mhd_mx_lock(&sp->sr_mx); - for (j = 0; (j < dlp->dl_ndrive); ++j) { - mhd_drive_t *dp = dlp->dl_drives[j]; - - if (mhd_state_set(dp, state, mhep) != 0) { - mhd_mx_unlock(&sp->sr_mx); - mhd_mx_unlock(&mhd_set_mx); - return (-1); - } - } - mhd_wait_set(sp, dlp, state); - mhd_mx_unlock(&sp->sr_mx); - } - } - - /* build list */ - assert(resultsp->results.mhd_drive_info_list_t_len == 0); - assert(resultsp->results.mhd_drive_info_list_t_val == NULL); - resultsp->results.mhd_drive_info_list_t_len = ndrive; - resultsp->results.mhd_drive_info_list_t_val = Zalloc( - ndrive * sizeof (*resultsp->results.mhd_drive_info_list_t_val)); - for (c = 0, i = 0; (i < mhd_nset); ++i) { - mhd_drive_set_t *sp = mhd_sets[i]; - mhd_drive_list_t *dlp = &sp->sr_drives; - - mhd_mx_lock(&sp->sr_mx); - for (j = 0; (j < dlp->dl_ndrive); ++j) { - mhd_drive_t *dp = dlp->dl_drives[j]; - mhd_drive_info_t *ip = - &resultsp->results.mhd_drive_info_list_t_val[c++]; - - ip->dif_name = Strdup(dp->dr_rname); - ip->dif_id = dp->dr_drive_id; - } - mhd_mx_unlock(&sp->sr_mx); - } - assert(c == ndrive); - - /* unlock, return count */ - mhd_mx_unlock(&mhd_set_mx); - return (ndrive); -} - -/* - * release drives - */ -static int -mhd_release_set( - mhd_drive_set_t *sp, - mhd_drive_list_t *dlp, - mhd_error_t *mhep -) -{ - uint_t i; - - /* check locks */ - assert(MUTEX_HELD(&mhd_set_mx)); - assert(MUTEX_HELD(&sp->sr_mx)); - - /* idle set */ - if (mhd_idle_set(sp, dlp, mhep) != 0) - return (-1); - - /* release drives */ - for (i = 0; (i < dlp->dl_ndrive); i++) { - mhd_drive_t *dp = dlp->dl_drives[i]; - - if (mhd_state(dp, DRIVE_RELEASING, mhep) != 0) - return (-1); - } - mhd_wait_set(sp, dlp, DRIVE_IDLE); - - /* return success */ - return (0); -} - -/* - * release drives in set - */ -int -mhd_release_drives( - mhd_set_t *mhsp, - mhd_opts_t options, - mhd_error_t *mhep -) -{ - mhd_drive_list_t dl = mhd_null_list; - mhd_drive_set_t *sp; - int rval; - - /* grab global lock */ - mhd_mx_lock(&mhd_set_mx); - - /* create or update set */ - if ((sp = mhd_create_set(mhsp, options, &dl, mhep)) == NULL) { - mhd_mx_unlock(&mhd_set_mx); - mhd_free_list(&dl); - return (-1); - } - - /* lock set */ - mhd_mx_lock(&sp->sr_mx); - - /* release drives */ - rval = mhd_release_set(sp, &dl, mhep); - - /* unlock, return success */ -out: - mhd_mx_unlock(&sp->sr_mx); - mhd_mx_unlock(&mhd_set_mx); - mhd_free_list(&dl); - return (rval); -} - -/* - * reserve drives - */ -static int -mhd_reserve_set( - mhd_drive_set_t *sp, - mhd_drive_list_t *dlp, - mhd_error_t *mhep -) -{ - mhd_msec_t ff = sp->sr_timeouts.mh_ff; - uint_t retry, i, ok; - int rval = 0; - - /* check locks */ - assert(MUTEX_HELD(&mhd_set_mx)); - assert(MUTEX_HELD(&sp->sr_mx)); - - /* idle set, idle everyone if cancelling failfast */ - if (ff == 0) { - if (mhd_idle_set(sp, &sp->sr_drives, mhep) != 0) - return (-1); - } else { - if (mhd_idle_set(sp, dlp, mhep) != 0) - return (-1); - } - - /* - * Try to take ownership of the drives twice. This helps - * to avoid the situation where the other machine retakes - * ownership of a majority drives back, but then kills itself - * leaving no owners. - */ - for (retry = 0; (retry < 2); ++retry) { - for (i = 0; (i < dlp->dl_ndrive); i++) { - mhd_drive_t *dp = dlp->dl_drives[i]; - - if ((retry == 0) || - ((dp->dr_state == DRIVE_ERRORED) && - (dp->dr_errnum == EACCES))) { - if (mhd_state(dp, DRIVE_RESERVING, mhep) != 0) - return (-1); - } - } - mhd_wait_set(sp, dlp, DRIVE_IDLE); - } - - /* - * Did the take ownership succeed on a majority of the drives? - */ - ok = 0; - for (i = 0; (i < dlp->dl_ndrive); ++i) { - mhd_drive_t *dp = dlp->dl_drives[i]; - - if (dp->dr_state == DRIVE_IDLE) - ++ok; - } - - /* - * Let the replica majority be the deciding factor, if able to get - * at least a single drive reserved. - */ - if (ok == 0) { - rval = mhd_error(mhep, MHD_E_MAJORITY, sp->sr_name); - goto out; - } - - /* - * Enable the failfast probes if we haven't given up yet. - */ - switch (sp->sr_ff_mode) { - - /* do nothing */ - default: - assert(0); - /* FALLTHROUGH */ - case MHD_FF_NONE: - goto out; - - /* old style per drive failfast */ - case MHD_FF_DRIVER: - for (i = 0; (i < dlp->dl_ndrive); i++) { - mhd_drive_t *dp = dlp->dl_drives[i]; - - if (dp->dr_state != DRIVE_ERRORED) { - if (mhd_state(dp, DRIVE_FAILFASTING, - mhep) != 0) { - rval = -1; - goto out; - } - } - } - mhd_wait_set(sp, dlp, DRIVE_IDLE); - break; - - /* failfast probe threads */ - case MHD_FF_DEBUG: - case MHD_FF_HALT: - case MHD_FF_PANIC: - if (ff != 0) { - if (mhd_ff_open(sp, mhep) != 0) { - rval = -1; - goto out; - } - for (i = 0; (i < dlp->dl_ndrive); i++) { - mhd_drive_t *dp = dlp->dl_drives[i]; - - if (mhd_state_set(dp, DRIVE_PROBING, - mhep) != 0) { - rval = -1; - goto out; - } - dp->dr_time = mhd_time(); - } - (void) mhd_ff_rearm(sp, mhep); - } - break; - } - - /* cleanup, return success */ -out: - if (rval != 0) { - mhd_error_t status = mhd_null_error; - - (void) mhd_release_set(sp, dlp, &status); - mhd_clrerror(&status); - } - return (rval); -} - -/* - * reserve drives in set - */ -int -mhd_reserve_drives( - mhd_set_t *mhsp, - mhd_mhiargs_t *timeoutp, - mhd_ff_mode_t ff_mode, - mhd_opts_t options, - mhd_error_t *mhep -) -{ - mhd_drive_list_t dl = mhd_null_list; - mhd_drive_set_t *sp; - int rval; - - /* grab global lock */ - mhd_mx_lock(&mhd_set_mx); - - /* create or update set */ - if ((sp = mhd_create_set(mhsp, options, &dl, mhep)) == NULL) { - mhd_mx_unlock(&mhd_set_mx); - mhd_free_list(&dl); - return (-1); - } - - /* lock set */ - mhd_mx_lock(&sp->sr_mx); - - /* can't change mode or timeouts of partial set */ - if ((dl.dl_ndrive != sp->sr_drives.dl_ndrive) && - (options & MHD_PARTIAL_SET)) { - if (ff_mode != sp->sr_ff_mode) { - mhd_eprintf("%s: invalid ff_mode %d now %d\n", - sp->sr_name, ff_mode, sp->sr_ff_mode); - ff_mode = sp->sr_ff_mode; - } - if (timeoutp->mh_ff < sp->sr_timeouts.mh_ff) { - mhd_eprintf("%s: invalid mh_ff %d now %d\n", - sp->sr_name, timeoutp->mh_ff, - sp->sr_timeouts.mh_ff); - timeoutp->mh_ff = sp->sr_timeouts.mh_ff; - } - } - - /* save timouts and mode */ - sp->sr_timeouts = *timeoutp; - sp->sr_ff_mode = ff_mode; - - /* reserve drives */ - rval = mhd_reserve_set(sp, &dl, mhep); - - /* unlock, return success */ -out: - mhd_mx_unlock(&sp->sr_mx); - mhd_mx_unlock(&mhd_set_mx); - mhd_free_list(&dl); - return (rval); -} - -/* - * status drives - */ -static int -mhd_status_set( - mhd_drive_set_t *sp, - mhd_drive_list_t *dlp, - mhd_error_t *mhep -) -{ - uint_t i; - - /* check locks */ - assert(MUTEX_HELD(&mhd_set_mx)); - assert(MUTEX_HELD(&sp->sr_mx)); - - /* status drives */ - for (i = 0; (i < dlp->dl_ndrive); i++) { - mhd_drive_t *dp = dlp->dl_drives[i]; - - if (mhd_state_set(dp, DRIVE_STATUSING, mhep) != 0) - return (-1); - } - mhd_wait_set(sp, dlp, DRIVE_STATUSING); - - /* return success */ - return (0); -} - -/* - * status drives in set - */ -int -mhd_status_drives( - mhd_set_t *mhsp, - mhd_opts_t options, - mhd_drive_status_t **status, - mhd_error_t *mhep -) -{ - mhd_drive_list_t dl = mhd_null_list; - mhd_drive_list_t *dlp = &dl; - mhd_drive_set_t *sp; - uint_t i; - int rval = 0; - - /* grab global lock */ - mhd_mx_lock(&mhd_set_mx); - - /* create or update set */ - if ((sp = mhd_create_set(mhsp, options, &dl, mhep)) == NULL) { - mhd_mx_unlock(&mhd_set_mx); - mhd_free_list(&dl); - return (-1); - } - - /* lock set */ - mhd_mx_lock(&sp->sr_mx); - - /* status drives */ - if (mhd_status_set(sp, &dl, mhep) != 0) { - rval = -1; - goto out; - } - - /* build list */ - *status = Zalloc(dlp->dl_ndrive * sizeof (**status)); - for (i = 0; (i < dlp->dl_ndrive); ++i) { - mhd_drive_t *dp = dlp->dl_drives[i]; - mhd_drive_status_t *statusp = &(*status)[i]; - - statusp->drive = Strdup(dp->dr_rname); - statusp->errnum = dp->dr_errnum; - } - assert(i == dlp->dl_ndrive); - rval = dlp->dl_ndrive; - - /* unlock, return count */ -out: - mhd_mx_unlock(&sp->sr_mx); - mhd_mx_unlock(&mhd_set_mx); - mhd_free_list(&dl); - return (rval); -} diff --git a/usr/src/cmd/lvm/rpc.metamhd/mhd_synch.c b/usr/src/cmd/lvm/rpc.metamhd/mhd_synch.c deleted file mode 100644 index 28072c2250d2..000000000000 --- a/usr/src/cmd/lvm/rpc.metamhd/mhd_synch.c +++ /dev/null @@ -1,211 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 1994, 2000 by Sun Microsystems, Inc. - * All rights reserved. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include "mhd_local.h" - -/* - * manipulate conditional variables, handle errors - */ -void -mhd_cv_init( - cond_t *cvp -) -{ - if (cond_init(cvp, USYNC_THREAD, NULL) != 0) { - mhd_perror("cond_init"); - mhd_exit(1); - } -} - -void -mhd_cv_destroy( - cond_t *cvp -) -{ - if (cond_destroy(cvp) != 0) { - mhd_perror("cond_destroy"); - mhd_exit(1); - } -} - -void -mhd_cv_wait( - cond_t *cvp, - mutex_t *mp -) -{ - int err; - - assert(MUTEX_HELD(mp)); - if (((err = cond_wait(cvp, mp)) != 0) && - (err != EINTR)) { - errno = err; - mhd_perror("cond_wait"); - mhd_exit(1); - } -} - -void -mhd_cv_timedwait( - cond_t *cvp, - mutex_t *mp, - mhd_msec_t to -) -{ - struct itimerval new, old; - int err; - - /* check lock */ - assert(MUTEX_HELD(mp)); - assert(to != 0); - - /* set timer */ - new.it_interval.tv_sec = 0; - new.it_interval.tv_usec = 0; - new.it_value.tv_sec = to / 1000; - new.it_value.tv_usec = (to % 1000) * 1000; - if (setitimer(ITIMER_REAL, &new, &old) != 0) { - mhd_perror("cond_wait"); - mhd_exit(1); - } - - /* wait for condition or timeout */ - if (((err = cond_wait(cvp, mp)) != 0) && - (err != EINTR)) { - errno = err; - mhd_perror("cond_wait"); - mhd_exit(1); - } - - /* reset timer */ - if (err != EINTR) { - new.it_interval.tv_sec = 0; - new.it_interval.tv_usec = 0; - new.it_value.tv_sec = 0; - new.it_value.tv_usec = 0; - if (setitimer(ITIMER_REAL, &new, &old) != 0) { - mhd_perror("cond_wait"); - mhd_exit(1); - } - } -} - -void -mhd_cv_broadcast( - cond_t *cvp -) -{ - if (cond_broadcast(cvp) != 0) { - mhd_perror("cond_broadcast"); - mhd_exit(1); - } -} - -/* - * manipulate mutexs, handle errors - */ -void -mhd_mx_init( - mutex_t *mp -) -{ - if (mutex_init(mp, USYNC_THREAD, NULL) != 0) { - mhd_perror("mutex_init"); - mhd_exit(1); - } -} - -void -mhd_mx_destroy( - mutex_t *mp -) -{ - if (mutex_destroy(mp) != 0) { - mhd_perror("mutex_destroy"); - mhd_exit(1); - } -} - -void -mhd_mx_lock( - mutex_t *mp -) -{ - if (mutex_lock(mp) != 0) { - mhd_perror("mutex_lock"); - mhd_exit(1); - } -} - -void -mhd_mx_unlock( - mutex_t *mp -) -{ - assert(MUTEX_HELD(mp)); - if (mutex_unlock(mp) != 0) { - mhd_perror("mutex_unlock"); - mhd_exit(1); - } -} - -/* - * manipulate rwlockss, handle errors - */ -void -mhd_rw_rdlock( - rwlock_t *rwlp -) -{ - if (rw_rdlock(rwlp) != 0) { - mhd_perror("rw_rdlock"); - mhd_exit(1); - } -} - -void -mhd_rw_wrlock( - rwlock_t *rwlp -) -{ - if (rw_wrlock(rwlp) != 0) { - mhd_perror("rw_wrlock"); - mhd_exit(1); - } -} - -void -mhd_rw_unlock( - rwlock_t *rwlp -) -{ - if (rw_unlock(rwlp) != 0) { - mhd_perror("rw_unlock"); - mhd_exit(1); - } -} diff --git a/usr/src/cmd/lvm/rpc.metamhd/mhd_time.c b/usr/src/cmd/lvm/rpc.metamhd/mhd_time.c deleted file mode 100644 index 6eb490008745..000000000000 --- a/usr/src/cmd/lvm/rpc.metamhd/mhd_time.c +++ /dev/null @@ -1,42 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 1994, 2000 by Sun Microsystems, Inc. - * All rights reserved. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include "mhd_local.h" - -/* - * manipulate times - */ - -/* - * get current realtime - */ -mhd_msec_t -mhd_time() -{ - return (gethrtime() / (1000 * 1000)); -} diff --git a/usr/src/cmd/lvm/rpc.metamhd/sparc/Makefile b/usr/src/cmd/lvm/rpc.metamhd/sparc/Makefile deleted file mode 100644 index 1eec3121798f..000000000000 --- a/usr/src/cmd/lvm/rpc.metamhd/sparc/Makefile +++ /dev/null @@ -1,128 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 1996, 2001-2003 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# Copyright 2015 Igor Kozhukhov -# -# -# Makefile for logical volume management -# - -PROG= rpc.metamhd - -RPCMOD = metamhd - -DERIVED_FILES = \ - $(RPCMOD)_svc.c \ - $(RPCMOD)_xdr.c \ - mhdx_xdr.c - -OBJECTS= \ - mhd_drive.o \ - mhd_error.o \ - mhd_failfast.o \ - mhd_freeresult.o \ - mhd_init.o \ - mhd_mem.o \ - mhd_metamhd.o \ - mhd_set.o \ - mhd_synch.o \ - mhd_time.o - -LINTOBJECTS= \ - mhd_drive.o \ - mhd_error.o \ - mhd_failfast.o \ - mhd_freeresult.o \ - mhd_init.o \ - mhd_mem.o \ - mhd_metamhd.o \ - mhd_set.o \ - mhd_synch.o \ - mhd_time.o - -OBJECTS += $(DERIVED_FILES:.c=.o) - -SRCS = $(OBJECTS:%.o=../%.c) -LINTSRCS = $(LINTOBJECTS:%.o=../%.c) - -include ../../../Makefile.cmd -include ../../Makefile.lvm - - -MDLIBS = -LDLIBS += -ladm -lsocket -lnsl - -POFILES= $(OBJECTS:%.o=%.po) - - -$(RPCMOD)_svc.c := RPCGENFLAGS += -A -K -1 - -DEFINES += -D_REENTRANT -CFLAGS += $(DEFINES) - - -# -# -lint := LINTFLAGS += -m - -.KEEP_STATE: - -%.o: ../%.c - $(COMPILE.c) $< - -all: $(PROG) - -$(PROG): $(OBJECTS) - $(LINK.c) -o $@ $(OBJECTS) $(LDLIBS) - $(POST_PROCESS) - - -ROOTUSRSBINPROG=$(PROG:%=$(ROOTUSRSBIN)/%) -install: all $(ROOTUSRSBINPROG) - -catalog: - -cstyle: - ${CSTYLE} ${SRCS} - -lint: - ${LINT.c} $(LINTFLAGS) ${LINTSRCS} - -clean: - ${RM} ${OBJECTS} ${DERIVED_FILES} *.o - -clobber: clean - $(RM) $(PROG) $(CLOBBERFILES) - -metamhd_svc.c: $(SRC)/head/metamhd.x - $(CP) $(SRC)/head/metamhd.x . - $(RPCGEN) $(RPCGENFLAGS_SERVER) metamhd.x -o $@ - ${RM} metamhd.x - -metamhd_xdr.c: $(SRC)/head/metamhd.x - $(RPCGEN) $(RPCGENFLAGS) -c $(SRC)/head/metamhd.x -o $@ - -mhdx_xdr.c: $(SRC)/uts/common/sys/lvm/mhdx.x - $(RPCGEN) $(RPCGENFLAGS) -c $(SRC)/uts/common/sys/lvm/mhdx.x | \ - $(AWK) '{sub(/uts\/common\/sys\/lvm/, "head") ; print $$0}' >$@ diff --git a/usr/src/cmd/lvm/util/Makefile b/usr/src/cmd/lvm/util/Makefile deleted file mode 100644 index 5e55fb0f9235..000000000000 --- a/usr/src/cmd/lvm/util/Makefile +++ /dev/null @@ -1,106 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# -# Makefile for logical volume management -# -# -# cmd/lvm/util/Makefile - -include ../../Makefile.cmd - -PROGS= \ - medstat \ - metaclear \ - metadb \ - metadetach \ - metadevadm \ - metahs \ - metainit \ - metaoffline \ - metaonline \ - metaparam \ - metarecover \ - metarename \ - metareplace \ - metaroot \ - metaset \ - metastat \ - metasync \ - metattach \ - metaimport \ - metaclust - -MANIFEST= metainit.xml \ - metasync.xml - -SVCMETHOD= svc-metainit \ - svc-metasync - -ROOTMANIFESTDIR= $(ROOTSVCSYSTEM) - -SRCS = $(PROGS:%=%.c) - -include ../Makefile.lvm - -SUBDIRS= $(MACH) - -metainit.i := CPPFLAGS += -I$(SRC)/lib/lvm/libmeta/common/hdrs - -MSGFILES= $(SRCS:%.c=%.i) -POFILE= utilp.po - -all := TARGET = all -install := TARGET = install -clean := TARGET = clean -clobber := TARGET = clobber -lint := TARGET = lint - -.KEEP_STATE: - -all: $(SCRIPTS) $(SUBDIRS) - -catalog: $(POFILE) - -$(POFILE): $(MSGFILES) - $(BUILDPO.msgfiles) - -lint: $(SUBDIRS) - -check: $(CHKMANIFEST) - -clobber: $(SUBDIRS) - -clean: $(SUBDIRS) - $(RM) $(MSGFILES) - -install: $(ROOTSVCMETHOD) $(ROOTMANIFEST) $(SUBDIRS) - -$(SUBDIRS): FRC - @cd $@; pwd; $(MAKE) $(TARGET) - -FRC: - -include ../../Makefile.targ -include $(SRC)/Makefile.msg.targ diff --git a/usr/src/cmd/lvm/util/Makefile.com b/usr/src/cmd/lvm/util/Makefile.com deleted file mode 100644 index 97026216288d..000000000000 --- a/usr/src/cmd/lvm/util/Makefile.com +++ /dev/null @@ -1,158 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2004 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# -# Architecture independent makefile for svm utilities -# -# cmd/lvm/util/Makefile.com -# - -# programs that are installed in /usr/sbin -PROG= \ - medstat \ - metaclear \ - metadetach \ - metahs \ - metaoffline \ - metaonline \ - metaparam \ - metarename \ - metareplace \ - metaroot \ - metaset \ - metasync \ - metattach \ - metaimport - -# programs that are installed in /sbin, with links from /usr/sbin -ROOTFS_PROG = \ - metadb \ - metadevadm \ - metainit \ - metarecover \ - metastat - -# programs that are installed in /usr/lib/lvm -METACLUST= metaclust - -OBJECTS = \ - medstat.o \ - metaclear.o \ - metadb.o \ - metadetach.o \ - metadevadm.o \ - metahs.o \ - metainit.o \ - metaoffline.o \ - metaonline.o \ - metaparam.o \ - metarecover.o \ - metarename.o \ - metareplace.o \ - metaroot.o \ - metaset.o \ - metastat.o \ - metasync.o \ - metattach.o \ - metaclust.o \ - metaimport.o - -SRCS= $(OBJECTS:%.o=../%.c) - -include ../../../Makefile.cmd -include ../../Makefile.lvm - -ROOTLIBSVM = $(ROOTLIB)/lvm - -CLOBBERFILES += $(ROOTFS_PROG) $(METACLUST) - -ROOTUSRSBINPROG = $(PROG:%=$(ROOTUSRSBIN)/%) - -ROOTSBINPROG = $(ROOTFS_PROG:%=$(ROOTSBIN)/%) - -ROOTUSRSBINLINKS = $(ROOTFS_PROG:%=$(ROOTUSRSBIN)/%) - -POFILE= utilp.po -DEFINES += -DDEBUG -CPPFLAGS += $(DEFINES) - -metainit := CPPFLAGS += -I$(SRC)/lib/lvm/libmeta/common/hdrs -metaset := LDFLAGS += -ldevid - -LDLIBS += -lmeta - -lint := LINTFLAGS += -m - -install := TARGET = install -clean := TARGET = clean - -.KEEP_STATE: - -%.o: ../%.c - $(COMPILE.c) $< - $(POST_PROCESS_O) - -all: $(PROG) $(METACLUST) $(ROOTFS_PROG) - -catalog: $(POFILE) - -$(PROG) $(ROOTFS_PROG): $$(@).o - $(LINK.c) -o $@ $(@).o $(LDLIBS) - $(POST_PROCESS) - -$(METACLUST): $$(@).o - $(LINK.c) -o $@ $(@).o $(LDLIBS) - $(POST_PROCESS) - - -install: all .WAIT $(ROOTLIBSVM) $(ROOTUSRSBINPROG) $(ROOTSBINPROG) $(ROOTUSRSBINLINKS) $(ROOTLIBSVM)/$(METACLUST) - -$(ROOTUSRSBINLINKS): - -$(RM) $@; $(SYMLINK) ../../sbin/$(@F) $@ - -cstyle: - $(CSTYLE) $(SRCS) - -lint: - for f in $(SRCS) ; do \ - if [ $$f = "../metainit.c" ]; then \ - $(LINT.c) $(LINTFLAGS) \ - -I$(SRC)/lib/lvm/libmeta/common/hdrs $$f ; \ - else \ - $(LINT.c) $(LINTFLAGS) $$f ; \ - fi \ - done - -clean: - $(RM) $(OBJECTS) $(PROG) - -include ../../../Makefile.targ - -${ROOTLIBSVM}/%: % - ${INS.file} - -${ROOTLIBSVM}: - ${INS.dir} - diff --git a/usr/src/cmd/lvm/util/i386/Makefile b/usr/src/cmd/lvm/util/i386/Makefile deleted file mode 100644 index 34f6628dfac7..000000000000 --- a/usr/src/cmd/lvm/util/i386/Makefile +++ /dev/null @@ -1,32 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 1996-2002 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# ident "%Z%%M% %I% %E% SMI" -# -# Makefile for logical volume management -# -# cmd/lvm/util/i386/Makefile - -include ../Makefile.com diff --git a/usr/src/cmd/lvm/util/medstat.c b/usr/src/cmd/lvm/util/medstat.c deleted file mode 100644 index 841b2e458070..000000000000 --- a/usr/src/cmd/lvm/util/medstat.c +++ /dev/null @@ -1,219 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* - * mediator status utility. - */ - -#include -#include - -static void -usage( - mdsetname_t *sp, - char *string) -{ - if ((string != NULL) && (*string != '\0')) - md_eprintf("%s\n", string); - (void) fprintf(stderr, gettext( - "usage: %s [-q] -s setname\n"), - myname); - md_exit(sp, (string == NULL) ? 0 : 1); -} - -/* - * parse args and do it - */ -int -main( - int argc, - char *argv[] -) -{ - int c; - char *sname = MD_LOCAL_NAME; - md_error_t status = mdnullerror; - md_error_t *ep = &status; - mdsetname_t *sp = NULL; - md_set_desc *sd; - int i; - md_h_t mdh; - med_data_t medd; - int medok = 0; - int golden = 0; - int verbose = 1; - - /* - * Get the locale set up before calling any other routines - * with messages to ouput. Just in case we're not in a build - * environment, make sure that TEXT_DOMAIN gets set to - * something. - */ -#if !defined(TEXT_DOMAIN) -#define TEXT_DOMAIN "SYS_TEST" -#endif - (void) setlocale(LC_ALL, ""); - (void) textdomain(TEXT_DOMAIN); - - /* - * There is no need to proxy the command to owner of the set - * to get the mediator information as the /etc/lvm/meddb file - * contains the required information and so it can be used. - */ - if ((sdssc_bind_library() == SDSSC_ERROR)) { - (void) fprintf(stderr, - "Failed to initialised libscsds.so.1\n"); - exit(1); - } - - /* initialize */ - if (md_init(argc, argv, 0, 1, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - optind = 1; - opterr = 1; - while ((c = getopt(argc, argv, "qs:?")) != -1) { - switch (c) { - case 'q': - verbose = 0; - break; - case 's': - sname = optarg; - break; - case '?': - if (optopt == '?') - usage(sp, NULL); - /*FALLTHROUGH*/ - default: - usage(sp, gettext("unknown command")); - } - } - - /* must have set for everything else */ - if (strcmp(sname, MD_LOCAL_NAME) == 0) - usage(sp, gettext("setname must be specified")); - - /* snarf MDDB */ - if (meta_setup_db_locations(ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - if ((sp = metasetname(sname, ep)) != NULL) { - - if ((sd = metaget_setdesc(sp, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - if (sd->sd_med.n_cnt == 0) { - if (verbose) - (void) printf(gettext("No mediator hosts" - "configured for set \"%s\".\n"), sname); - md_exit(sp, 2); - } - - if (verbose) - (void) printf("%8.8s\t\t%6.6s\t" - "%6.6s\n", gettext("Mediator"), - gettext("Status"), gettext("Golden")); - - for (i = 0; i < MED_MAX_HOSTS; i++) { - - if (sd->sd_med.n_lst[i].a_cnt == 0) - continue; - - (void) memset(&medd, '\0', sizeof (medd)); - (void) memset(&mdh, '\0', sizeof (mdh)); - mdh = sd->sd_med.n_lst[i]; - - if (verbose) - (void) printf("%-17.17s\t", - sd->sd_med.n_lst[i].a_nm[0]); - - if (clnt_med_get_data(&mdh, sp, &medd, ep) == -1) { - if (mdanyrpcerror(ep)) { - if (verbose) - (void) printf("%s\n", - gettext("Unreachable")); - continue; - } else if (mdiserror(ep, MDE_MED_ERROR)) { - if (verbose) - (void) printf("%s\n", - gettext("Bad")); - } else { - if (verbose) - (void) printf("%s\n", - gettext("Fatal")); - } - mde_perror(ep, ""); - if (mdiserror(ep, MDE_MED_ERROR)) - continue; - md_exit(sp, 1); - } - - if (verbose) - (void) printf("%s", gettext("Ok")); - - if (medd.med_dat_fl & MED_DFL_GOLDEN) { - if (verbose) - (void) printf("\t%s", - gettext("Yes")); - golden++; - } else { - if (verbose) - (void) printf("\t%s", gettext("No")); - } - - if (verbose) - (void) printf("\n"); - - medok++; - } - - if (golden) - md_exit(sp, 0); - - if (medok < ((sd->sd_med.n_cnt / 2) + 1)) - md_exit(sp, 1); - - md_exit(sp, 0); - } - - /* - * Print the mediator status using /etc/lvm/meddb if host is not - * part of metaset but part of mediators. - */ - - if (meta_mediator_info_from_file(sname, verbose, ep)) { - md_exit(sp, 1); - } - - md_exit(sp, 0); - /* NOTREACHED */ - return (0); -} diff --git a/usr/src/cmd/lvm/util/metaclear.c b/usr/src/cmd/lvm/util/metaclear.c deleted file mode 100644 index ad4f1e05818e..000000000000 --- a/usr/src/cmd/lvm/util/metaclear.c +++ /dev/null @@ -1,352 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -/* - * clear metadevices - */ - -#include -#include - - -/* - * clear metadevice or hotspare pool - */ -static int -clear_name( - mdsetname_t **spp, - char *uname, - mdcmdopts_t options, - md_error_t *ep -) -{ - - /* clear hotspare pool */ - if (is_existing_hsp(*spp, uname)) { - mdhspname_t *hspnp; - - /* get hotspare pool name */ - if ((hspnp = metahspname(spp, uname, ep)) == NULL) - return (-1); - assert(*spp != NULL); - - /* grab set lock */ - if (meta_lock(*spp, TRUE, ep)) - return (-1); - - /* check for ownership */ - if (meta_check_ownership(*spp, ep) != 0) - return (-1); - - /* clear hotspare pool */ - return (meta_hsp_reset(*spp, hspnp, options, ep)); - } - - /* clear metadevice */ - else { - mdname_t *np; - - /* check for ownership */ - if (meta_check_ownership(*spp, ep) != 0) - return (-1); - - /* get metadevice name */ - if (((np = metaname(spp, uname, META_DEVICE, ep)) == NULL) || - (metachkmeta(np, ep) != 0)) { - return (-1); - } - assert(*spp != NULL); - - /* grab set lock */ - if (meta_lock(*spp, TRUE, ep)) - return (-1); - - /* clear metadevice */ - return (meta_reset_by_name(*spp, np, options, ep)); - } -} - -/* - * print usage message - */ -static void -usage( - mdsetname_t *sp, - int eval -) -{ - (void) fprintf(stderr, gettext("\ -usage: %s [-s setname] -a\n\ - %s [-s setname] [options] metadevice...\n\ -options:\n\ --f force clear\n\ --r recursive clear\n\ --p clear all soft partitions on metadevice/component\n"), myname, myname); - md_exit(sp, eval); -} - -/* - * mainline. crack command line arguments. - */ -int -main( - int argc, - char *argv[] -) -{ - char *sname = MD_LOCAL_NAME; - mdsetname_t *sp = NULL; - int aflag = 0; - int pflag = 0; - int set_flag = 0; - mdcmdopts_t options = (MDCMD_PRINT|MDCMD_DOIT); - int c; - md_error_t status = mdnullerror; - md_error_t *ep = &status; - int eval = 1; - int error; - bool_t called_thru_rpc = FALSE; - char *cp; - int mnset = FALSE; - - /* - * Get the locale set up before calling any other routines - * with messages to ouput. Just in case we're not in a build - * environment, make sure that TEXT_DOMAIN gets set to - * something. - */ -#if !defined(TEXT_DOMAIN) -#define TEXT_DOMAIN "SYS_TEST" -#endif - (void) setlocale(LC_ALL, ""); - (void) textdomain(TEXT_DOMAIN); - - if ((cp = strstr(argv[0], ".rpc_call")) == NULL) { - if (sdssc_bind_library() == SDSSC_OKAY) - if (sdssc_cmd_proxy(argc, argv, SDSSC_PROXY_PRIMARY, - &error) == SDSSC_PROXY_DONE) - exit(error); - } else { - *cp = '\0'; /* cut off ".rpc_call" */ - called_thru_rpc = TRUE; - } - - - /* initialize */ - if (md_init(argc, argv, 0, 1, ep) != 0 || - meta_check_root(ep) != 0) - goto errout; - - /* parse args */ - optind = 1; - opterr = 1; - while ((c = getopt(argc, argv, "hs:afrp?")) != -1) { - switch (c) { - case 'h': - usage(sp, 0); - break; - - case 's': - sname = optarg; - set_flag++; - break; - - case 'a': - ++aflag; - options |= MDCMD_FORCE; - break; - - case 'f': - options |= MDCMD_FORCE; - break; - - case 'r': - options |= MDCMD_RECURSE | MDCMD_FORCE; - break; - case 'p': - ++pflag; - break; - case '?': - if (optopt == '?') - usage(sp, 0); - /*FALLTHROUGH*/ - default: - usage(sp, 1); - break; - } - } - argc -= optind; - argv += optind; - - /* with mn sets if -a, set name must have been specified by -s */ - if (called_thru_rpc && aflag && !set_flag) { - md_eprintf(gettext( - "-a parameter requires the use of -s in multi-node sets")); - md_exit(sp, 1); - } - - /* get set context */ - if ((sp = metasetname(sname, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - if (called_thru_rpc) { - /* Check if the device is open on all nodes */ - options |= MDCMD_MN_OPEN_CHECK; - } - - if (aflag) { /* clear all devices */ - if (argc != 0) - usage(sp, 1); - - /* - * If a MN set, we will generate a series of individual - * metaclear commands which will each grab the set lock. - * Therefore do not grab the set lock now. - */ - - if (!meta_is_mn_set(sp, ep)) { - /* grab set lock */ - if (meta_lock(sp, TRUE, ep)) - goto errout; - - /* check for ownership */ - if (meta_check_ownership(sp, ep) != 0) - goto errout; - } else { - mnset = TRUE; - } - - /* reset all devices in set */ - if (meta_reset_all(sp, options, ep) != 0) { - if (!mnset) - mde_perror(ep, ""); - } else - eval = 0; - } else { - /* - * We are dealing with either a single or multiple names. - * The set for the command is either denoted by the -s option - * or the set of the first name. - */ - if (argc <= 0) - usage(sp, 1); - - if (meta_is_mn_name(&sp, argv[0], ep)) - mnset = TRUE; - eval = 0; - - for (; (argc > 0); --argc, ++argv) { - char *cname; - - /* - * If we are dealing with a MN set and we were not - * called thru an rpc call, we are just to send this - * command string to the master of the set and let it - * deal with it. - */ - if (!called_thru_rpc && mnset) { - /* get the canonical name */ - if (pflag) { - /* - * If -p, set cname to the device - * argument. - */ - cname = Strdup(argv[0]); - } else { - /* - * For hotspares and metadevices, set - * cname to the full name, - * setname/hspxxx or setname/dxxx - */ - cname = meta_name_getname(&sp, - argv[0], META_DEVICE, ep); - if (cname == NULL) { - mde_perror(ep, ""); - eval = 1; - continue; - } - } - if (meta_mn_send_metaclear_command(sp, - cname, options, pflag, ep) != 0) { - eval = 1; - } - Free(cname); - } else { - if (pflag) { - /* - * clear all soft partitions on named - * devices - */ - if (meta_sp_reset_component(sp, argv[0], - options, ep) != 0) { - mde_perror(ep, ""); - eval = 1; - continue; - } - } else { - /* - * get the canonical name and - * setup sp if it has been - * specified as part of the - * metadevice/hsp name param - */ - cname = meta_name_getname(&sp, - argv[0], META_DEVICE, ep); - if (cname == NULL) { - mde_perror(ep, ""); - eval = 1; - continue; - } - - /* clear named devices */ - if (clear_name(&sp, cname, - options, ep) != 0) { - mde_perror(ep, ""); - eval = 1; - Free(cname); - continue; - } - Free(cname); - } - } - } - } - /* update md.cf */ - if (meta_update_md_cf(sp, ep) != 0) { - mde_perror(ep, ""); - eval = 1; - } - md_exit(sp, eval); - -errout: - mde_perror(ep, ""); - md_exit(sp, eval); - /*NOTREACHED*/ - return (eval); -} diff --git a/usr/src/cmd/lvm/util/metaclust.c b/usr/src/cmd/lvm/util/metaclust.c deleted file mode 100644 index 497cfc53e897..000000000000 --- a/usr/src/cmd/lvm/util/metaclust.c +++ /dev/null @@ -1,2012 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#define MY_VERSION "1.0" /* the highest supported version */ -#define MAX_DEBUG_LEVEL 5 /* maximum verbosity level */ - -#define RESET_OWNER 0x0001 -#define CHOOSE_OWNER 0x0002 -#define RESET_ABR 0x0004 -#define UPDATE_ABR 0x0008 -#define GET_MIRROR_STATE 0x0010 - -#define SET_INFO_NO_WR 0x0002 -#define SET_INFO_MN 0x0004 - -/* - * This table defines all the metaclust reconfig steps we understand - */ -typedef enum stpnum { - MC_UNK = 0, - MC_START, - MC_STOP, - MC_ABORT, - MC_RETURN, - MC_STEP1, - MC_STEP2, - MC_STEP3, - MC_STEP4 -} stepnum_t; - -/* - * Structure for step_name -> step_number mapping - */ -struct step_t { - char *step_nam; - stepnum_t step_num; -}; - -/* - * Step name to step number mapping table - * This table MUST be sorted alphabetically in ascending order of step name - */ -static struct step_t step_table[] = { - { "abort", MC_ABORT }, - { "return", MC_RETURN }, - { "start", MC_START }, - { "step1", MC_STEP1 }, - { "step2", MC_STEP2 }, - { "step3", MC_STEP3 }, - { "step4", MC_STEP4 }, - { "stop", MC_STOP } -}; - -/* - * If support for a different version is added, the new version number should - * be appended to the version_table below. This list will be searched to - * determine if a version requested via the -V option is supported or not. - */ -static char *version_table[] = { - MY_VERSION -}; - -uint_t timeout = 0; /* disable timeout by default */ -char *version = MY_VERSION; /* use latest version by default */ -int stepnum = MC_UNK; /* reconfiguration step number */ -pid_t c_pid; /* child process id */ - -/* - * Binary search comparison routine - */ -static int -mc_compare(const void *stp1, const void *stp2) -{ - return (strcmp((const char *)stp1, - ((const struct step_t *)stp2)->step_nam)); -} - -/* - * Timeout expiry alarm signal handler - */ -/*ARGSUSED*/ -static void -sigalarmhandler(int sig) -{ - int i, n, ret, stat_loc = 0; - FILE *pgcore; - char corecmd[256]; - - n = sizeof (step_table) / sizeof (step_table[0]); - for (i = 0; i < n; i++) { - if (stepnum == step_table[i].step_num) - break; - } - - assert(i != n); - - meta_mc_log(MC_LOG1, gettext("Timeout expired in %s: %s"), - step_table[i].step_nam, - meta_print_hrtime(gethrtime() - start_time)); - - /* - * See what the child was actually doing when the timeout expired. - * A core-dump of this would be _really_ good, so let's just - * try a 'gcore -g c_pid' and hope - */ - - (void) memset(corecmd, 0, sizeof (corecmd)); - (void) snprintf(corecmd, sizeof (corecmd), - "/bin/gcore -g %d >/dev/null 2>&1", (int)c_pid); - - pgcore = popen(corecmd, "r"); - - if (pgcore == NULL) { - meta_mc_log(MC_LOG1, gettext("Could not grab core for pid %s"), - c_pid); - } else { - (void) pclose(pgcore); - } - - if ((ret = kill(c_pid, SIGKILL)) == 0) { - /* - * The child will wait forever until the status is retrieved - * so get it now. Keep retrying if the call is interrupted. - * - * The possible results are, - * - * - child killed successfully - * - signal sent but child not killed - * - waitpid failed/interrupted - */ - (void) sleep(2); - while ((ret = waitpid(c_pid, &stat_loc, WNOHANG)) < 0) { - if (errno != EINTR) { - break; - } - } - if ((ret == c_pid) || (errno == ECHILD)) { - ret = 0; - } else { - ret = 1; - } - } else if (errno == ESRCH) { - /* - * If the kill did not catch the child then it means the child - * exited immediately after the timeout occured. - */ - ret = 0; - } - - /* - * make sure not to exit with 205 for any steps other than step1-step4. - * Suncluster reconfiguration can't handle it otherwise. - */ - switch (stepnum) { - case MC_STEP1: - case MC_STEP2: - case MC_STEP3: - case MC_STEP4: - /* - * If the child was killed successfully return 205 for a - * new reconfig cycle otherwise send 1 to panic the node. - */ - if (ret != 0) { - md_eprintf(gettext("Could not kill child\n")); - exit(1); - } else { - exit(205); - } - break; - case MC_START: - case MC_STOP: - case MC_ABORT: - case MC_RETURN: - default: - exit(1); - break; - } -} - -/* - * Attempt to load local set. - * Returns: - * pointer to mdsetname_t for local set (local_sp) is successful. - * 0 if failure - * if there are no local set mddbs, no error message is printed. - * Otherwise, error message is printed so that user - * can determine why the local set didn't start. - */ -mdsetname_t * -load_local_set(md_error_t *ep) -{ - mdsetname_t *local_sp = NULL; - - /* Does local set exist? If not, give no error */ - if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) { - return (0); - } - - /* - * snarf local set - * If fails with MDE_DB_NODB, then just return 1 printing - * no failure. - * Otherwise, print error message, and return 1. - */ - if (meta_setup_db_locations(ep) != 0) { - if (!(mdismddberror(ep, MDE_DB_NODB))) - mde_perror(ep, ""); - return (0); - } - - /* local set loaded successfully */ - return (local_sp); -} - -/* - * Purpose: Compose a full path name for a metadevice - * - * On entry: sp - setname pointer - * mnum - minor number of metadevice - * pathname - pointer to array to return path string - * pathlen - max length of pathname array - */ -static int -compose_path(mdsetname_t *sp, int mnum, char *pathname, int pathlen) -{ - int rtn; - mdname_t *np; - md_error_t status = mdnullerror; - - if (MD_MIN2SET(mnum) != sp->setno) { - md_eprintf(gettext("minor number 0x%x invalid for set %d\n"), - mnum, sp->setno); - return (-1); - } - - if ((np = metamnumname(&sp, mnum, 0, &status)) == NULL) { - return (-1); - } - - rtn = snprintf(pathname, pathlen, "%s", np->rname); - - if ((pathname[0] == '\0') || (rtn >= pathlen)) { - md_eprintf(gettext( - "Could not create path for device %s\n"), - get_mdname(sp, mnum)); - return (-1); - } - return (0); -} - -/* - * Purpose: Walk through all the devices specified for the given set - * and do the action specified in mode - */ -static int -reset_state(uint_t mode, mdsetname_t *sp, char *drivername, md_error_t *ep) -{ - mdnamelist_t *devnlp = NULL; - mdnamelist_t *p; - mdname_t *devnp = NULL; - md_set_mmown_params_t ownpar_p; - md_set_mmown_params_t *ownpar = &ownpar_p; - md_unit_t *mm; - int mirror_dev = 0; - mndiskset_membershiplist_t *nl; - int cnt; - int has_parent; - md_mn_get_mir_state_t mir_state_p; - md_mn_get_mir_state_t *mir_state = &mir_state_p; - - /* - * if we are choosing or resetting the owners then make sure - * we are only doing it for mirror devices - */ - mirror_dev = (strcmp(MD_MIRROR, drivername) == 0); - if ((mode & (RESET_OWNER | CHOOSE_OWNER)) && !mirror_dev) { - return (-1); - } - - /* get a list of all the metadevices for current set */ - if (mirror_dev && meta_get_mirror_names(sp, &devnlp, 0, ep) < 0) { - mde_perror(ep, gettext("Could not get mirrors for set %s"), - sp->setname); - return (-1); - } else if (meta_get_sp_names(sp, &devnlp, 0, ep) < 0) { - mde_perror(ep, gettext( - "Could not get soft partitions for set %s"), sp->setname); - return (-1); - } - - /* If resetting the owner, get the known membership list */ - if (mode & RESET_OWNER) { - if (meta_read_nodelist(&cnt, &nl, ep)) { - mde_perror(ep, "Could not get nodelist"); - return (-1); - } - } - - /* for each metadevice */ - for (p = devnlp; (p != NULL); p = p->next) { - devnp = p->namep; - - /* - * Get the current setting for mirror ABR state and all of the - * submirror state and flags from the master node. We only - * perform this when going through a 'start' cycle. - */ - if ((mode & GET_MIRROR_STATE) && mirror_dev) { - char *miscname; - - /* - * Ensure that we ignore soft-parts that are returned - * from the meta_get_mirror_names() call - */ - if ((miscname = metagetmiscname(devnp, ep)) == NULL) - goto out; - if (strcmp(miscname, MD_MIRROR) != 0) - continue; - - mir_state->mnum = meta_getminor(devnp->dev); - MD_SETDRIVERNAME(mir_state, MD_MIRROR, sp->setno); - meta_mc_log(MC_LOG4, gettext("Getting mirror state" - " for %s: %s"), get_mdname(sp, mir_state->mnum), - meta_print_hrtime(gethrtime() - start_time)); - - if (metaioctl(MD_MN_GET_MIRROR_STATE, mir_state, ep, - "MD_MN_GET_MIRROR_STATE") != 0) { - mde_perror(ep, gettext("Unable to get " - "mirror state for %s"), - get_mdname(sp, mir_state->mnum)); - goto out; - } else { - continue; - } - } - - /* check if this is a top level metadevice */ - if ((mm = meta_get_mdunit(sp, devnp, ep)) == NULL) - goto out; - if (MD_HAS_PARENT(MD_PARENT(mm))) { - has_parent = 1; - } else { - has_parent = 0; - } - Free(mm); - - if (mode & (RESET_OWNER | CHOOSE_OWNER)) { - char *miscname; - - /* - * we can only do these for mirrors so make sure we - * really have a mirror device and not a softpartition - * imitating one. meta_get_mirror_names seems to think - * softparts on top of a mirror are mirrors! - */ - if ((miscname = metagetmiscname(devnp, ep)) == NULL) - goto out; - if (strcmp(miscname, MD_MIRROR) != 0) - continue; - - (void) memset(ownpar, 0, sizeof (*ownpar)); - ownpar->d.mnum = meta_getminor(devnp->dev); - MD_SETDRIVERNAME(ownpar, MD_MIRROR, sp->setno); - - meta_mc_log(MC_LOG4, gettext("Setting owner " - "for %s: %s"), get_mdname(sp, ownpar->d.mnum), - meta_print_hrtime(gethrtime() - start_time)); - - /* get the current owner id */ - if (metaioctl(MD_MN_GET_MM_OWNER, ownpar, ep, - "MD_MN_GET_MM_OWNER") != 0) { - mde_perror(ep, gettext("Unable to get " - "mirror owner for %s"), - get_mdname(sp, ownpar->d.mnum)); - goto out; - } - } - - if (mode & RESET_OWNER) { - if (ownpar->d.owner == MD_MN_MIRROR_UNOWNED) { - mdclrerror(ep); - continue; - } - - /* - * reset owner only if the current owner is - * not in the membership list - * Also kill the resync thread so that when the resync - * is started, it will perform an optimized resync - * for any resync regions that were dirty when the - * current owner left the membership. - */ - if (meta_is_member(NULL, ownpar->d.owner, nl) != 1) { - if (meta_mn_change_owner(&ownpar, - sp->setno, ownpar->d.mnum, - MD_MN_MIRROR_UNOWNED, - MD_MN_MM_ALLOW_CHANGE) == -1) { - md_eprintf(gettext( - "Unable to reset mirror owner " - "for %s\n"), - get_mdname(sp, ownpar->d.mnum)); - goto out; - } - if (meta_mirror_resync(sp, devnp, 0, ep, - MD_RESYNC_KILL_NO_WAIT) != 0) { - md_eprintf(gettext( - "Unable to kill resync for" - " %s\n"), - get_mdname(sp, ownpar->d.mnum)); - goto out; - } - } - } - - if (mode & CHOOSE_OWNER) { - /* - * only orphaned resyncs will have no owner. - * if that is the case choose a new owner. Otherwise - * re-establish the existing owner. This covers the - * case where a node that owned the mirror - * reboots/panics and comes back into the cluster before - * the reconfig cycle has completed. In this case the - * other cluster nodes will have the mirror owner marked - * as the rebooted node while it has the owner marked - * as 'None'. We have to reestablish the ownership so - * that the subsequent resync can continue. - */ - if (meta_mn_change_owner(&ownpar, sp->setno, - ownpar->d.mnum, ownpar->d.owner, - MD_MN_MM_CHOOSE_OWNER) == -1) { - md_eprintf(gettext("Unable to choose " - "mirror owner for %s\n"), - get_mdname(sp, ownpar->d.mnum)); - goto out; - } - } - - /* - * For RESET_ABR and UPDATE_ABR - only handle top - * level metadevices. - */ - if (has_parent) - continue; - - if (mode & RESET_ABR) { - /* - * Reset the ABR (application based recovery) - * value on all nodes. We are dealing with - * the possibility that we have ABR set but the - * only node that had the device open with ABR has - * left the cluster. We simply open and close the - * device and if this is the last close in the - * cluster, ABR will be cleared on all nodes. - */ - char *miscname; - char name[MAXPATHLEN]; - int mnum, fd; - - name[0] = '\0'; - mnum = meta_getminor(devnp->dev); - - /* - * Ensure that we don't include soft-parts in the - * mirror-only call to RESET_ABR. meta_get_mirror_names - * returns a bogus list that includes all soft-parts - * built on mirrors. - */ - if ((miscname = metagetmiscname(devnp, ep)) == NULL) - goto out; - if (mirror_dev && (strcmp(miscname, MD_MIRROR) != 0)) - continue; - - meta_mc_log(MC_LOG4, gettext("Re-setting ABR state " - "for %s: %s"), get_mdname(sp, mnum), - meta_print_hrtime(gethrtime() - start_time)); - - /* compose the absolute device path and open it */ - if (compose_path(sp, mnum, &name[0], - sizeof (name)) != 0) - goto out; - if ((fd = open(name, O_RDWR, 0)) < 0) { - md_perror(gettext("Could not open device %s"), - name); - continue; - } - - (void) close(fd); - } - - if (mode & UPDATE_ABR) { - /* - * Update the ABR value on this node. We obtain the - * current ABR state from the master node. - */ - - char *miscname; - char name[MAXPATHLEN]; - int mnum, fd; - volcap_t vc; - uint_t tstate; - - name[0] = '\0'; - mnum = meta_getminor(devnp->dev); - - /* - * Ensure that we don't include soft-parts in the - * mirror-only call to UPDATE_ABR. meta_get_mirror_names - * returns a bogus list that includes all soft-parts - * built on mirrors. - */ - if ((miscname = metagetmiscname(devnp, ep)) == NULL) - goto out; - if (mirror_dev && (strcmp(miscname, MD_MIRROR) != 0)) - continue; - - /* Get tstate from Master */ - if (meta_mn_send_get_tstate(devnp->dev, &tstate, ep) - != 0) - continue; - /* If not set on the master, nothing to do */ - if (!(tstate & MD_ABR_CAP)) - continue; - - meta_mc_log(MC_LOG4, gettext("Updating ABR state " - "for %s: %s"), get_mdname(sp, mnum), - meta_print_hrtime(gethrtime() - start_time)); - - /* compose the absolute device path and open it */ - if (compose_path(sp, mnum, &name[0], - sizeof (name)) != 0) - goto out; - if ((fd = open(name, O_RDWR, 0)) < 0) { - md_perror(gettext("Could not open device %s"), - name); - continue; - } - - /* set ABR state */ - vc.vc_info = 0; - vc.vc_set = 0; - if (ioctl(fd, DKIOCGETVOLCAP, &vc) < 0) { - /* - * Ignore if device does not support this - * ioctl - */ - if ((errno != ENOTTY) && (errno != ENOTSUP)) { - md_perror(gettext("Could not get " - "ABR/DMR state for device %s"), - name); - } - (void) close(fd); - continue; - } - if (!(vc.vc_info & (DKV_ABR_CAP | DKV_DMR_CAP))) { - (void) close(fd); - continue; - } - - vc.vc_set = DKV_ABR_CAP; - if (ioctl(fd, DKIOCSETVOLCAP, &vc) < 0) { - md_perror(gettext( - "Could not set ABR state for " - "device %s"), name); - (void) close(fd); - goto out; - } else { - md_eprintf(gettext( - "Setting ABR state on device %s\n"), name); - } - - (void) close(fd); - } - } - - /* cleanup */ - if (mode & RESET_OWNER) { - meta_free_nodelist(nl); - } - metafreenamelist(devnlp); - return (0); - -out: - /* cleanup */ - if (mode & RESET_OWNER) { - meta_free_nodelist(nl); - } - metafreenamelist(devnlp); - return (-1); -} - -/* - * Print usage message - */ -static void -usage(mdsetname_t *sp, int eval) -{ - (void) fprintf(stderr, gettext("usage:" - "\t%s [-V version] [-t timeout] [-d level] start localnodeid\n" - "\t%s [-V version] [-t timeout] [-d level] step nodelist...\n" - "\t%s [-V version] [-t timeout] [-d level] abort | stop\n" - "\t%s [-V | -? | -h]\n"), - myname, myname, myname, myname); - if (!eval) { - (void) fprintf(stderr, gettext("\n" - "\tValid debug (-d) levels are 1-%d for increasing " - "verbosity.\n\tDefault is -d 3.\n\n" - "\tValid step values are: return | step1 | step2 | " - "step3 | step4\n\n" - "\tNodelist is a space-separated list of node id's\n\n"), - MAX_DEBUG_LEVEL); - } - md_exit(sp, eval); -} - -/* - * Input: Input takes a config step name followed by a list of - * possible node id's. - * - * Returns: 0 - Success - * 1 - Fail - * Node will be removed from cluster membership - * by forcing node to panic. - * 205 - Unsuccessful. Start another reconfig cycle. - * Problem was encountered that could be fixed by - * running another reconfig cycle. - * Problem could be a result of a failure to read - * the nodelist file or that all work could not be - * accomplished in a reconfig step in the amount of - * time given so another reconfig cycle is needed in - * order to finish the current step. - */ -int -main(int argc, char **argv) -{ - mdsetname_t *sp = NULL; - md_error_t status = mdnullerror; - md_error_t *ep = &status; - set_t max_sets, setno; - int c, clust = 0; - struct sigaction nsa, osa; - struct step_t *step_ptr; - mdsetname_t *local_sp = NULL; - md_drive_desc *dd; - int rval = 0; - md_set_desc *sd; - mddb_block_parm_t mbp; - uint_t debug = 3; /* log upto MC_LOG3 by default */ - int version_table_size; - mddb_setflags_config_t sf; - int ret_val; - mddb_config_t cfg; - int set_info[MD_MAXSETS]; - long commd_timeout = 0; - - /* - * Get the locale set up before calling any other routines - * with messages to ouput. Just in case we're not in a build - * environment, make sure that TEXT_DOMAIN gets set to - * something. - */ -#if !defined(TEXT_DOMAIN) -#define TEXT_DOMAIN "SYS_TEST" -#endif - (void) setlocale(LC_ALL, ""); - (void) textdomain(TEXT_DOMAIN); - - if ((clust = sdssc_bind_library()) == SDSSC_ERROR) { - md_eprintf(gettext("Interface error with libsds_sc.so\n")); - exit(1); - } - - if (md_init(argc, argv, 1, 1, ep) != 0 || meta_check_root(ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - /* - * open log and enable libmeta logging. Do it here explicitly - * rather than letting md_init() do it because we are not really - * a daemon and that is what md_init() opens the log as. - */ - openlog("metaclust", LOG_CONS, LOG_USER); - - version_table_size = sizeof (version_table) / sizeof (version_table[0]); - - optind = 1; - opterr = 0; - while ((c = getopt(argc, argv, "hd:V:t:?")) != -1) { - switch (c) { - case 'h': - usage(sp, 0); - break; - - case 'd': - if (sscanf(optarg, "%u", &debug) != 1) { - md_eprintf(gettext("Invalid debug level\n")); - md_exit(sp, 1); - } else if ((debug < 1) || (debug > MAX_DEBUG_LEVEL)) { - debug = min(max(debug, 1), MAX_DEBUG_LEVEL); - md_eprintf(gettext("Debug level must be " - "between 1 and %d inclusive.\n"), - MAX_DEBUG_LEVEL); - md_eprintf(gettext("Debug level set to %d.\n"), - debug); - } - break; - - case 'V': - version = Strdup(optarg); - break; - - case 't': - if (sscanf(optarg, "%u", &timeout) != 1) { - md_eprintf(gettext("Invalid timeout value\n")); - md_exit(sp, 1); - } - break; - - case '?': - if (optopt == '?') { - usage(sp, 0); - } else if (optopt == 'V') { - int i; - - (void) fprintf(stdout, gettext( - "%s: Versions Supported:"), myname); - for (i = 0; i < version_table_size; i++) { - (void) fprintf(stdout, " %s", - version_table[i]); - } - (void) fprintf(stdout, "\n"); - md_exit(sp, 0); - } - /*FALLTHROUGH*/ - - default: - usage(sp, 1); - break; - } - } - - /* initialise the debug level and start time */ - setup_mc_log(debug); - - /* - * check that the version specified (if any) is supported. - */ - if (version != NULL) { - int i, found = 0; - - for (i = 0; i < version_table_size; i++) { - if (strcmp(version, version_table[i]) == 0) { - found = 1; - break; - } - } - if (!found) { - md_eprintf(gettext("Version %s not supported\n"), - version); - md_exit(sp, 1); - } - } - - argc -= optind; - argv += optind; - - /* parse arguments */ - if (argc <= 0) { - usage(sp, 1); - } - - /* convert the step name to the corresponding number */ - step_ptr = bsearch(argv[0], step_table, (sizeof (step_table) / - sizeof (step_table[0])), sizeof (step_table[0]), mc_compare); - if (step_ptr != NULL) { - stepnum = step_ptr->step_num; - } - - --argc; - ++argv; - - /* set timeout alarm signal, a value of 0 will disable timeout */ - if (timeout > 0) { - int stat_loc = 0; - commd_timeout = (long)(timeout * .75); - - c_pid = fork(); - - if (c_pid == (pid_t)-1) { - md_perror(gettext("Unable to fork")); - md_exit(sp, 1); - } else if (c_pid) { - /* parent */ - nsa.sa_flags = 0; - if (sigfillset(&nsa.sa_mask) < 0) { - md_perror(gettext("Unable to set signal mask")); - md_exit(sp, 1); - } - - nsa.sa_handler = sigalarmhandler; - if (sigaction(SIGALRM, &nsa, &osa) == -1) { - md_perror(gettext("Unable to set alarm " - "handler")); - md_exit(sp, 1); - } - - (void) alarm(timeout); - - /* - * wait for child to exit or timeout to expire. - * keep retrying if the call is interrupted - */ - while ((ret_val = waitpid(c_pid, &stat_loc, 0)) < 0) { - if (errno != EINTR) { - break; - } - } - if (ret_val == c_pid) { - /* exit with the childs exit value */ - exit(WEXITSTATUS(stat_loc)); - } else if (errno == ECHILD) { - md_exit(sp, 0); - } else { - perror(myname); - md_exit(sp, 1); - } - } - } - - /* - * If a timeout value is given, everything from this point onwards is - * executed in the child process. - */ - - switch (stepnum) { - case MC_START: - /* - * Start Step - * - * - Suspend all rpc.mdcommd messages - */ - - /* expect the local node id to be given only */ - if (argc != 1) - usage(sp, 1); - - meta_mc_log(MC_LOG2, gettext("Starting Start step: %s"), - meta_print_hrtime(0)); - - /* - * With multinode disksets configured we need to - * update all replicas on all cluster nodes to have - * the same status. If local replicas on a cluster - * node are not accessible we need to panic this - * node, otherwise we abort in the reconfig cycle - * and failfast/reboot the "good" cluster node too. - * To avoid a total cluster outage in the above case - * we panic only the failing node via md_exit(.., 1). - */ - if ((local_sp = load_local_set(ep)) == NULL) { - /* panic the node */ - md_exit(local_sp, 1); - } - - if ((max_sets = get_max_sets(ep)) == 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - /* start walking through all possible disksets */ - for (setno = 1; setno < max_sets; setno++) { - if ((sp = metasetnosetname(setno, ep)) == NULL) { - if (mdiserror(ep, MDE_NO_SET)) { - /* No set for this setno - continue */ - mdclrerror(ep); - continue; - } else { - mde_perror(ep, gettext("Unable to " - "get set %d information"), setno); - md_exit(sp, 1); - } - } - - /* only check multi-node disksets */ - if (!meta_is_mn_set(sp, ep)) { - mdclrerror(ep); - continue; - } - - meta_mc_log(MC_LOG3, gettext("Start - block parse " - "messages for set %s: %s"), sp->setname, - meta_print_hrtime(gethrtime() - start_time)); - - /* - * Mddb parse messages are sent amongst the nodes - * in a diskset whenever the locator block or - * locator names structure has been changed. - * A locator block change could occur as a result - * of a disk failure during the reconfig cycle, - * so block the mddb parse messages while the - * rpc.mdcommd is suspended during the reconfig cycle. - */ - if (s_ownset(sp->setno, ep) == MD_SETOWNER_YES) { - (void) memset(&mbp, 0, sizeof (mbp)); - mbp.c_setno = setno; - mbp.c_blk_flags = MDDB_BLOCK_PARSE; - if (metaioctl(MD_MN_MDDB_BLOCK, &mbp, - &mbp.c_mde, NULL)) { - (void) mdstealerror(ep, &mbp.c_mde); - mde_perror(ep, gettext("Could not " - "block set %s"), sp->setname); - md_exit(sp, 1); - } - } - - /* suspend commd and spin waiting for drain */ - while ((ret_val = mdmn_suspend(setno, - MD_COMM_ALL_CLASSES, commd_timeout)) == - MDE_DS_COMMDCTL_SUSPEND_NYD) { - (void) sleep(1); - } - - if (ret_val) { - md_eprintf(gettext("Could not suspend " - "rpc.mdcommd for set %s\n"), sp->setname); - md_exit(sp, 1); - } - - /* - * Set start step flag for set. This is set to indicate - * that this node entered the reconfig cycle through - * the start step. This is used during the reconfig - * cycle to determine whether the node had entered - * through the start step or the return step. - */ - (void) memset(&sf, 0, sizeof (sf)); - sf.sf_setno = sp->setno; - sf.sf_setflags = MD_SET_MN_START_RC; - sf.sf_flags = MDDB_NM_SET; - /* Use magic to help protect ioctl against attack. */ - sf.sf_magic = MDDB_SETFLAGS_MAGIC; - if (metaioctl(MD_MN_SET_SETFLAGS, &sf, - &sf.sf_mde, NULL)) { - (void) mdstealerror(ep, &sf.sf_mde); - mde_perror(ep, gettext("Could not set " - "start_step flag for set %s"), sp->setname); - md_exit(sp, 1); - } - - } - - meta_mc_log(MC_LOG2, gettext("Start step completed: %s"), - meta_print_hrtime(gethrtime() - start_time)); - - break; - - case MC_STOP: - /* - * Stop Step - * - * - ??? - */ - - /* don't expect any more arguments to follow the step name */ - if (argc != 0) - usage(sp, 1); - - break; - - case MC_ABORT: - /* - * Abort Step - * - * - Abort rpc.mdcommd - */ - - /* don't expect any more arguments to follow the step name */ - if (argc != 0) - usage(sp, 1); - - meta_mc_log(MC_LOG2, gettext("Starting Abort step: %s"), - meta_print_hrtime(0)); - - /* - * Does local set exist? If not, exit with 0 - * since there's no reason to have this node panic if - * the local set cannot be started. - */ - if ((local_sp = load_local_set(ep)) == NULL) { - md_exit(local_sp, 0); - } - - /* - * abort the rpc.mdcommd. The abort is only issued on this node - * meaning that the abort reconfig step is called on this - * node before a panic while the rest of the cluster will - * undergo a reconfig cycle. - * There is no time relation between this node running a - * reconfig abort and the the rest of the cluster - * running a reconfig cycle meaning that this node may - * panic before, during or after the cluster has run - * a reconfig cycle. - */ - mdmn_abort(); - - meta_mc_log(MC_LOG2, gettext("Abort step completed: %s"), - meta_print_hrtime(gethrtime() - start_time)); - - break; - - case MC_RETURN: - /* - * Return Step - * - * - Grab local set lock, issue rpc.mdcommd DRAIN ALL - * and release local set lock. Grabbing the local set - * lock allows any active metaset/metadb commands to - * terminate gracefully and will keep a metaset/metadb - * command from starting until the DRAIN ALL is issued. - * The metaset/metadb commands can issue - * DRAIN ALL/RESUME ALL commands to rpc.mdcommd, - * so the return step must not issue the DRAIN ALL command - * until metaset/metadb have finished or metaset may issue - * a RESUME ALL after this return reconfig step has issued - * the DRAIN ALL command. - * After this reconfig step has issued the DRAIN_ALL and - * released the local set lock, metaset/metadb will fail - * when attempting to contact the rpc.mdcommd and will - * terminate without making any configuration changes. - * The DRAIN ALL command will keep all other meta* commands - * from running during the reconfig cycle (these commands - * will wait until the rpc.mdcommd is resumed) since the - * reconfig cycle may be changing the diskset configuration. - */ - - /* expect the nodelist to follow the step name */ - if (argc < 1) - usage(sp, 1); - - meta_mc_log(MC_LOG2, gettext("Starting Return step: %s"), - meta_print_hrtime(0)); - - /* - * Does local set exist? If not, exit with 0 - * since there's no reason to have this node panic if - * the local set cannot be started. - */ - if ((local_sp = load_local_set(ep)) == NULL) { - md_exit(local_sp, 0); - } - - /* - * Suspend any mirror resyncs that are in progress. This - * stops unnecessary timeouts. - */ - meta_mirror_resync_block_all(); - - if (meta_lock(local_sp, TRUE, ep) != 0) { - mde_perror(ep, ""); - md_exit(local_sp, 1); - } - - /* - * All metaset and metadb commands on this node have now - * terminated gracefully. Now, issue a drain all to - * the rpc.mdcommd. Any meta command issued after the - * drain all will either spin sending the command to the - * master until after the reconfig cycle has finished OR - * will terminate gracefully (metaset/metadb). - */ - if ((max_sets = get_max_sets(ep)) == 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - /* start walking through all possible disksets */ - for (setno = 1; setno < max_sets; setno++) { - if ((sp = metasetnosetname(setno, ep)) == NULL) { - if (mdiserror(ep, MDE_NO_SET)) { - /* No set for this setno - continue */ - mdclrerror(ep); - continue; - } else { - mde_perror(ep, gettext("Unable to " - "get set %d information"), setno); - md_exit(sp, 1); - } - } - - /* only check multi-node disksets */ - if (!meta_is_mn_set(sp, ep)) { - mdclrerror(ep); - continue; - } - - meta_mc_log(MC_LOG3, gettext("Return - block parse " - "messages for set %s: %s"), sp->setname, - meta_print_hrtime(gethrtime() - start_time)); - - /* - * Mddb parse messages are sent amongst the nodes - * in a diskset whenever the locator block or - * locator names structure has been changed. - * A locator block change could occur as a result - * of a disk failure during the reconfig cycle, - * so block the mddb parse messages while the - * rpc.commd is suspended during the reconfig cycle. - */ - if (s_ownset(sp->setno, ep) == MD_SETOWNER_YES) { - (void) memset(&mbp, 0, sizeof (mbp)); - mbp.c_setno = setno; - mbp.c_blk_flags = MDDB_BLOCK_PARSE; - if (metaioctl(MD_MN_MDDB_BLOCK, &mbp, - &mbp.c_mde, NULL)) { - (void) mdstealerror(ep, &mbp.c_mde); - mde_perror(ep, gettext("Could not " - "block set %s"), sp->setname); - md_exit(sp, 1); - } - } - - /* suspend commd and spin waiting for drain */ - while ((ret_val = mdmn_suspend(setno, - MD_COMM_ALL_CLASSES, commd_timeout)) == - MDE_DS_COMMDCTL_SUSPEND_NYD) { - (void) sleep(1); - } - - if (ret_val) { - md_eprintf(gettext("Could not suspend " - "rpc.mdcommd for set %s\n"), sp->setname); - md_exit(sp, 1); - } - } - /* - * Resume all I/Os for this node for all MN sets in - * case master node had suspended I/Os but panic'd - * before resuming I/Os. In case of failure, exit - * with a 1 since unable to resume I/Os on this node. - */ - if (clnt_mn_susp_res_io(mynode(), 0, MN_RES_IO, ep)) { - mde_perror(ep, gettext( - "Unable to resume I/O on node %s for all sets"), - mynode()); - md_exit(sp, 1); - } - - - /* - * Can now unlock local set lock. New metaset/metadb - * commands are now held off using drain all. - */ - (void) meta_unlock(local_sp, ep); - - meta_mc_log(MC_LOG2, gettext("Return step completed: %s"), - meta_print_hrtime(gethrtime() - start_time)); - - break; - - case MC_STEP1: - /* - * Step 1 - * - * - Populate nodelist file if we are on clustering - * and pick a master node for each MN diskset. - */ - - /* expect the nodelist to follow the step name */ - if (argc < 1) - usage(sp, 1); - - meta_mc_log(MC_LOG2, gettext("Starting Step1: %s"), - meta_print_hrtime(0)); - - /* Always write nodelist file even if no local set exists */ - if (clust == SDSSC_OKAY) { - /* skip to the nodelist args */ - if (meta_write_nodelist(argc, argv, ep) != 0) { - mde_perror(ep, gettext( - "Could not populate nodelist file")); - md_exit(sp, 1); - } - } - - /* - * Does local set exist? If not, exit with 0 - * since there's no reason to have this node panic if - * the local set cannot be started. - */ - if ((local_sp = load_local_set(ep)) == NULL) { - md_exit(local_sp, 0); - } - - /* - * At this point, all meta* commands are blocked across - * all disksets since the master rpc.mdcommd has drained or - * the master node has died. - * If a metaset or metadb command had been in progress - * at the start of the reconfig cycle, this command has - * either completed or it has been terminated due to - * the death of the master node. - * - * This means that that it is now ok to remove any - * outstanding clnt_locks associated with multinode - * disksets on this node due to a node panic during - * a metaset operation. This allows the routines that - * choose the master to use rpc.metad to determine the - * master of the diskset. - */ - if (clnt_clr_mnsetlock(mynode(), ep) != 0) { - meta_mc_log(MC_LOG2, gettext("Step1 aborted:" - "clear locks failed %s"), - meta_print_hrtime(gethrtime() - start_time)); - md_exit(local_sp, 1); - } - - /* - * Call reconfig_choose_master to choose a master for - * each MN diskset, update the nodelist for each diskset - * given the member information and send a reinit message - * to rpc.mdcommd to reload the nodelist. - */ - rval = meta_reconfig_choose_master(commd_timeout, ep); - if (rval == 205) { - /* - * NOTE: Should issue call to reboot remote host that - * is causing the RPC failure. Clustering to - * provide interface in the future. This should - * stop a never-ending set of 205 reconfig cycles. - * Remote host causing failure is stored in - * ep->host if ep is an RPC error. - * if (mdanyrpcerror(ep)) - * reboot (ep->host); - */ - meta_mc_log(MC_LOG2, gettext("Step1 aborted:" - "choose master failure of 205 %s"), - meta_print_hrtime(gethrtime() - start_time)); - md_exit(local_sp, 205); - } else if (rval != 0) { - meta_mc_log(MC_LOG2, gettext("Step1 failure: " - "choose master failure %s"), - meta_print_hrtime(gethrtime() - start_time)); - md_exit(local_sp, 1); - } - - meta_mc_log(MC_LOG2, gettext("Step1 completed: %s"), - meta_print_hrtime(gethrtime() - start_time)); - - md_exit(local_sp, rval); - break; - - case MC_STEP2: - /* - * Step 2 - * - * In Step 2, each node walks the list of disksets. If a - * node is a master of a MN diskset, it synchronizes - * the local set USER records for that diskset. - * - * If disks exist in the diskset and there is a joined - * (owner) node in the diskset, the master will also: - * - synchronize the diskset mddbs to the master - * - play the change log - * - * The master node will now attempt to join any unjoined - * nodes that are currently members in the membership list. - */ - - /* expect the nodelist to follow the step name */ - if (argc < 1) - usage(sp, 1); - - meta_mc_log(MC_LOG2, gettext("Starting Step2: %s"), - meta_print_hrtime(0)); - - /* - * Does local set exist? If not, exit with 0 - * since there's no reason to have this node panic if - * the local set cannot be started. - */ - if ((local_sp = load_local_set(ep)) == NULL) { - md_exit(local_sp, 0); - } - - if ((max_sets = get_max_sets(ep)) == 0) { - mde_perror(ep, ""); - md_exit(local_sp, 1); - } - - /* start walking through all possible disksets */ - for (setno = 1; setno < max_sets; setno++) { - if ((sp = metasetnosetname(setno, ep)) == NULL) { - if (mdiserror(ep, MDE_NO_SET)) { - /* No set for this setno - continue */ - mdclrerror(ep); - continue; - } else if (mdanyrpcerror(ep)) { - /* Fail on RPC failure to self */ - mde_perror(ep, gettext( - "Unable to get information for " - "set number %d"), setno); - md_exit(local_sp, 1); - } else { - mde_perror(ep, gettext( - "Unable to get information for " - "set number %d"), setno); - mdclrerror(ep); - continue; - } - } - - if ((sd = metaget_setdesc(sp, ep)) == NULL) { - if (mdanyrpcerror(ep)) { - /* Fail on RPC failure to self */ - mde_perror(ep, gettext( - "Unable to get information for " - "set number %d"), setno); - md_exit(local_sp, 1); - } - mde_perror(ep, gettext("Unable to get set " - "%s desc information"), sp->setname); - mdclrerror(ep); - continue; - } - - /* Only check MN disksets */ - if (!(MD_MNSET_DESC(sd))) { - continue; - } - - /* All actions in step 2 are driven by master */ - if (!(sd->sd_mn_am_i_master)) { - continue; - } - - meta_mc_log(MC_LOG3, gettext("Step2 - begin record " - "synchronization for set %s: %s"), sp->setname, - meta_print_hrtime(gethrtime() - start_time)); - - /* - * Synchronize the USER records in the local mddbs - * for hosts that are members. The USER records - * contain set, drive and host information. - */ - rval = meta_mnsync_user_records(sp, ep); - if (rval != 0) { - mde_perror(ep, gettext( - "Synchronization of user records " - "in set %s failed\n"), sp->setname); - if (rval == 205) { - /* - * NOTE: Should issue call to reboot - * remote host that is causing the RPC - * failure. Clustering to provide - * interface in the future. This - * should stop a never-ending set of - * 205 reconfig cycles. - * Remote host causing failure is - * stored in ep->host if ep is an - * RPC error. - * if (mdanyrpcerror(ep)) - * reboot (ep->host); - */ - md_exit(local_sp, 205); - } else { - md_exit(local_sp, 1); - } - } - - /* Reget sd since sync_user_recs may have flushed it */ - if ((sd = metaget_setdesc(sp, ep)) == NULL) { - mde_perror(ep, gettext("Unable to get set " - "%s desc information"), sp->setname); - md_exit(local_sp, 1); - } - - dd = metaget_drivedesc(sp, - (MD_BASICNAME_OK | PRINT_FAST), ep); - if (! mdisok(ep)) { - mde_perror(ep, gettext("Unable to get set " - "%s drive information"), sp->setname); - md_exit(local_sp, 1); - } - - /* - * No drives in set, continue to next set. - */ - if (dd == NULL) { - /* Done with this set */ - continue; - } - - meta_mc_log(MC_LOG3, gettext("Step2 - local set user " - "records completed for set %s: %s"), sp->setname, - meta_print_hrtime(gethrtime() - start_time)); - - /* - * Synchronize the diskset mddbs for hosts - * that are members. This may involve - * playing the changelog and writing out - * to the diskset mddbs. - */ - rval = meta_mnsync_diskset_mddbs(sp, ep); - if (rval != 0) { - mde_perror(ep, gettext( - "Synchronization of diskset mddbs " - "in set %s failed\n"), sp->setname); - meta_mc_log(MC_LOG3, gettext("Step2 - diskset " - "mddb synchronization failed for " - "set %s: %s"), sp->setname, - meta_print_hrtime(gethrtime() - - start_time)); - if (rval == 205) { - /* - * NOTE: Should issue call to reboot - * remote host that is causing the RPC - * failure. Clustering to provide - * interface in the future. This - * should stop a never-ending set of - * 205 reconfig cycles. - * Remote host causing failure is - * stored in ep->host if ep is an - * RPC error. - * if (mdanyrpcerror(ep)) - * reboot (ep->host); - */ - md_exit(local_sp, 205); - } else if (rval == 1) { - continue; - } else { - md_exit(local_sp, 1); - } - } - - meta_mc_log(MC_LOG3, gettext("Step2 - diskset mddb " - "synchronization completed for set %s: %s"), - sp->setname, - meta_print_hrtime(gethrtime() - start_time)); - - /* Join the starting nodes to the diskset */ - rval = meta_mnjoin_all(sp, ep); - if (rval != 0) { - mde_perror(ep, gettext( - "Join of non-owner (starting) nodes " - "in set %s failed\n"), sp->setname); - meta_mc_log(MC_LOG3, gettext("Step2 - non owner" - "nodes joined for set %s: %s"), - sp->setname, - meta_print_hrtime(gethrtime() - - start_time)); - if (rval == 205) { - /* - * NOTE: Should issue call to reboot - * remote host that is causing the RPC - * failure. Clustering to provide - * interface in the future. This - * should stop a never-ending set of - * 205 reconfig cycles. - * Remote host causing failure is - * stored in ep->host if ep is an - * RPC error. - * if (mdanyrpcerror(ep)) - * reboot (ep->host); - */ - md_exit(local_sp, 205); - } else { - md_exit(local_sp, 1); - } - } - - meta_mc_log(MC_LOG3, gettext("Step2 - non owner nodes " - "joined for set %s: %s"), sp->setname, - meta_print_hrtime(gethrtime() - start_time)); - - } - - meta_mc_log(MC_LOG2, gettext("Step2 completed: %s"), - meta_print_hrtime(gethrtime() - start_time)); - - break; - - case MC_STEP3: - /* - * Step 3 - * - * For all multinode sets do, - * - Reinitialise rpc.mdcommd - * - Reset mirror owners to null if the current owner is - * no longer in the membership list - */ - - /* expect the nodelist to follow the step name */ - if (argc < 1) - usage(sp, 1); - - meta_mc_log(MC_LOG2, gettext("Starting Step3: %s"), - meta_print_hrtime(0)); - - /* - * Does local set exist? If not, exit with 0 - * since there's no reason to have this node panic if - * the local set cannot be started. - */ - if ((local_sp = load_local_set(ep)) == NULL) { - md_exit(local_sp, 0); - } - - /* - * walk through all sets on this node which could include: - * - MN disksets - * - traditional disksets - * - non-existent disksets - * start mirror resync for all MN sets - */ - if ((max_sets = get_max_sets(ep)) == 0) { - mde_perror(ep, ""); - md_exit(local_sp, 1); - } - - /* start walking through all possible disksets */ - for (setno = 1; setno < max_sets; setno++) { - if ((sp = metasetnosetname(setno, ep)) == NULL) { - if (mdiserror(ep, MDE_NO_SET)) { - /* No set for this setno - continue */ - mdclrerror(ep); - continue; - } else { - mde_perror(ep, gettext("Unable to " - "get set %d information"), setno); - md_exit(local_sp, 1); - } - } - - /* only check multi-node disksets */ - if (!meta_is_mn_set(sp, ep)) { - mdclrerror(ep); - continue; - } - - if (meta_lock(sp, TRUE, ep) != 0) { - mde_perror(ep, ""); - md_exit(local_sp, 1); - } - - /* If this node isn't joined to set, do nothing */ - if (s_ownset(sp->setno, ep) != MD_SETOWNER_YES) { - if (!mdisok(ep)) { - mde_perror(ep, gettext("Could " - "not get set %s ownership"), - sp->setname); - md_exit(sp, 1); - } - mdclrerror(ep); - (void) meta_unlock(sp, ep); - continue; - } - - meta_mc_log(MC_LOG3, gettext("Step3 - begin " - "re-initialising rpc.mdcommd and resetting mirror " - "owners for set %s: %s"), sp->setname, - meta_print_hrtime(gethrtime() - start_time)); - - /* reinitialzse rpc.mdcommd with new nodelist */ - if (mdmn_reinit_set(setno, commd_timeout)) { - md_eprintf(gettext( - "Could not re-initialise rpc.mdcommd for " - "set %s\n"), sp->setname); - md_exit(sp, 1); - } - - (void) memset(&cfg, 0, sizeof (cfg)); - cfg.c_id = 0; - cfg.c_setno = sp->setno; - if (metaioctl(MD_DB_GETDEV, &cfg, &cfg.c_mde, - NULL) != 0) { - (void) mdstealerror(ep, &cfg.c_mde); - mde_perror(ep, gettext("Could " - "not get set %s information"), - sp->setname); - md_exit(sp, 1); - } - - /* Don't do anything else if set is stale */ - if (cfg.c_flags & MDDB_C_STALE) { - (void) meta_unlock(sp, ep); - mdclrerror(ep); - continue; - } - - /* reset mirror owners */ - if (reset_state(RESET_OWNER, sp, MD_MIRROR, ep) == -1) { - md_exit(sp, 1); - } - - (void) meta_unlock(sp, ep); - - meta_mc_log(MC_LOG3, gettext("Step3 - rpc.mdcommd " - "re-initialised and mirror owners reset for " - "set %s: %s"), sp->setname, - meta_print_hrtime(gethrtime() - start_time)); - } - - meta_mc_log(MC_LOG2, gettext("Step3 completed: %s"), - meta_print_hrtime(gethrtime() - start_time)); - - break; - - case MC_STEP4: - /* - * Step 4 - * - * For all multinode sets do: - * - Resume the rpc.mdcommd messages. Must resume all - * sets before issuing I/O to any set since an error - * encountered in a commd suspended set could be - * blocked waiting for commd in another set to resume. - * (This happens since the daemon queues service - * all sets). An open of a soft partition causes - * a read of the watermarks during the open. - * - If set is non-writable (not an owner or STALE), then - * continue to next set. - * - * For all multinode sets do, - * - Reset ABR states for all mirrors, ie clear ABR if not - * open on any node. - * - Reset ABR states for all soft partitions, ie clear ABR if - * not open on any node. - * - For all slave nodes that have entered through the start - * step, update the ABR state to that of the master and - * get the submirror state from the master - * - meta_lock set - * - Resync all mirrors - * - unlock meta_lock for this set. - * - Choose a new owner for any orphaned resyncs - * - * There is one potential issue here. when concurrently - * resetting and updating the ABR state. If the master has ABR - * set, but should no longer have because the only node that - * had the metadevice open and had ABR set has paniced, the - * master will send a message to all nodes to clear the ABR - * state. Meanwhile any node that has come through the - * start step will get tstate from the master and will update - * ABR if it was set in tstate. So, we appear to have a problem - * if the following sequence occurs:- - * - The slave gets tstate with ABR set - * - The master sends a message to clear ABR - * - The slave updates ABR with the value it got from tstate. - * We now have the master with ABR clear and the slave with ABR - * set. Fortunately, having set ABR, the slave will close the - * metadevice after setting ABR and as there are no nodes with - * the device open, the close will send a message to clear ABR - * on all nodes. So, the nodes will all have ABR unset. - */ - - /* expect the nodelist to follow the step name */ - if (argc < 1) - usage(sp, 1); - - meta_mc_log(MC_LOG2, gettext("Starting Step4: %s"), - meta_print_hrtime(0)); - - /* - * Does local set exist? If not, exit with 0 - * since there's no reason to have this node panic if - * the local set cannot be started. - */ - if ((local_sp = load_local_set(ep)) == NULL) { - md_exit(local_sp, 0); - } - - /* - * walk through all sets on this node which could include: - * - MN disksets - * - traditional disksets - * - non-existent disksets - * start mirror resync for all MN sets - */ - if ((max_sets = get_max_sets(ep)) == 0) { - mde_perror(ep, ""); - md_exit(local_sp, 1); - } - - /* Clear set_info structure */ - for (setno = 1; setno < max_sets; setno++) { - set_info[setno] = 0; - } - - /* start walking through all possible disksets */ - for (setno = 1; setno < max_sets; setno++) { - if ((sp = metasetnosetname(setno, ep)) == NULL) { - if (mdiserror(ep, MDE_NO_SET)) { - /* No set for this setno - continue */ - mdclrerror(ep); - continue; - } else { - mde_perror(ep, gettext("Unable to " - "get set %d information"), setno); - md_exit(local_sp, 1); - } - } - - if ((sd = metaget_setdesc(sp, ep)) == NULL) { - mde_perror(ep, gettext("Unable to get set " - "%s desc information"), sp->setname); - mdclrerror(ep); - continue; - } - - /* only check multi-node disksets */ - if (!meta_is_mn_set(sp, ep)) { - mdclrerror(ep); - continue; - } - - set_info[setno] |= SET_INFO_MN; - - /* - * If not an owner (all mddbs failed) or stale - * (< 50% mddbs operational), then set is - * non-writable so just resume commd and - * unblock mddb messages. - */ - mdclrerror(ep); - if (s_ownset(sp->setno, ep) != MD_SETOWNER_YES) { - set_info[setno] |= SET_INFO_NO_WR; - } - if (!mdisok(ep)) { - mde_perror(ep, gettext("Could " - "not get set %s ownership"), - sp->setname); - md_exit(local_sp, 1); - } - /* Set is owned - is it stale? */ - if (!set_info[setno] & SET_INFO_NO_WR) { - (void) memset(&cfg, 0, sizeof (cfg)); - cfg.c_id = 0; - cfg.c_setno = sp->setno; - if (metaioctl(MD_DB_GETDEV, &cfg, &cfg.c_mde, - NULL) != 0) { - (void) mdstealerror(ep, &cfg.c_mde); - mde_perror(ep, gettext("Could " - "not get set %s information"), - sp->setname); - md_exit(local_sp, 1); - } - if (cfg.c_flags & MDDB_C_STALE) { - set_info[setno] |= SET_INFO_NO_WR; - } - } - - /* resume rpc.mdcommd */ - if (mdmn_resume(setno, MD_COMM_ALL_CLASSES, 0, - commd_timeout)) { - md_eprintf(gettext("Unable to resume " - "rpc.mdcommd for set %s\n"), sp->setname); - md_exit(local_sp, 1); - } - - /* Unblock mddb parse messages */ - if (s_ownset(sp->setno, ep) == MD_SETOWNER_YES) { - (void) memset(&mbp, 0, sizeof (mbp)); - mbp.c_setno = setno; - mbp.c_blk_flags = MDDB_UNBLOCK_PARSE; - if (metaioctl(MD_MN_MDDB_BLOCK, &mbp, - &mbp.c_mde, NULL)) { - (void) mdstealerror(ep, &mbp.c_mde); - mde_perror(ep, gettext("Could not " - "unblock set %s"), sp->setname); - md_exit(local_sp, 1); - } - } - meta_mc_log(MC_LOG3, gettext("Step4 - rpc.mdcommd " - "resumed and messages unblocked for set %s: %s"), - sp->setname, - meta_print_hrtime(gethrtime() - start_time)); - } - - for (setno = 1; setno < max_sets; setno++) { - int start_step; - - /* Skip traditional disksets. */ - if ((set_info[setno] & SET_INFO_MN) == 0) - continue; - - /* - * If already determined that this set is - * a non-writable set, then just continue - * to next set since there's nothing else - * to do for a non-writable set. - */ - if (set_info[setno] & SET_INFO_NO_WR) - continue; - - if ((sp = metasetnosetname(setno, ep)) == NULL) { - if (mdiserror(ep, MDE_NO_SET)) { - /* No set for this setno - continue */ - mdclrerror(ep); - continue; - } else { - mde_perror(ep, gettext("Unable to " - "get set %d information"), setno); - md_exit(local_sp, 1); - } - } - - if ((sd = metaget_setdesc(sp, ep)) == NULL) { - mde_perror(ep, gettext("Unable to get set " - "%s desc information"), sp->setname); - mdclrerror(ep); - continue; - } - - /* See if this node came through the start step */ - (void) memset(&sf, 0, sizeof (sf)); - sf.sf_setno = sp->setno; - sf.sf_flags = MDDB_NM_GET; - /* Use magic to help protect ioctl against attack. */ - sf.sf_magic = MDDB_SETFLAGS_MAGIC; - if (metaioctl(MD_MN_GET_SETFLAGS, &sf, - &sf.sf_mde, NULL)) { - (void) mdstealerror(ep, &sf.sf_mde); - mde_perror(ep, gettext("Could not get " - "start_step flag for set %s"), sp->setname); - md_exit(local_sp, 1); - } - start_step = - (sf.sf_setflags & MD_SET_MN_START_RC)? 1: 0; - - /* - * We can now reset the start_step flag for the set - * if it was already set. - */ - if (start_step) { - (void) memset(&sf, 0, sizeof (sf)); - sf.sf_setno = sp->setno; - sf.sf_setflags = MD_SET_MN_START_RC; - sf.sf_flags = MDDB_NM_RESET; - /* - * Use magic to help protect ioctl - * against attack. - */ - sf.sf_magic = MDDB_SETFLAGS_MAGIC; - if (metaioctl(MD_MN_SET_SETFLAGS, &sf, - &sf.sf_mde, NULL)) { - (void) mdstealerror(ep, &sf.sf_mde); - mde_perror(ep, - gettext("Could not reset " - "start_step flag for set %s"), - sp->setname); - } - } - - meta_mc_log(MC_LOG3, gettext("Step4 - begin setting " - "ABR state and restarting io's for " - "set %s: %s"), sp->setname, - meta_print_hrtime(gethrtime() - start_time)); - - - /* - * If we are not the master and we have come through - * the start step, we must update the ABR states - * for mirrors and soft partitions. Also the submirror - * states need to be synchronised so that we see the - * same status as other previously joined members. - * This _must_ be done before starting the resync. - */ - if (!(sd->sd_mn_am_i_master) && start_step) { - if (reset_state(GET_MIRROR_STATE, sp, MD_MIRROR, - ep) == -1) { - md_exit(local_sp, 1); - } - if (reset_state(UPDATE_ABR, sp, MD_SP, - ep) == -1) { - md_exit(local_sp, 1); - } - /* - * Mark the fact that we've got the mirror - * state. This allows the resync thread to - * determine if _it_ needs to issue this. This - * can happen if a node is added to a set after - * a reconfig cycle has completed. - */ - (void) memset(&sf, 0, sizeof (sf)); - sf.sf_setno = sp->setno; - sf.sf_setflags = MD_SET_MN_MIR_STATE_RC; - sf.sf_flags = MDDB_NM_SET; - /* - * Use magic to help protect ioctl - * against attack. - */ - sf.sf_magic = MDDB_SETFLAGS_MAGIC; - if (metaioctl(MD_MN_SET_SETFLAGS, &sf, - &sf.sf_mde, NULL)) { - (void) mdstealerror(ep, &sf.sf_mde); - mde_perror(ep, - gettext("Could not set " - "submirror state flag for set %s"), - sp->setname); - } - } - - /* - * All remaining actions are only performed by the - * master - */ - if (!(sd->sd_mn_am_i_master)) { - if (meta_lock(sp, TRUE, ep) != 0) { - mde_perror(ep, ""); - md_exit(local_sp, 1); - } - meta_mirror_resync_unblock(sp); - (void) meta_unlock(sp, ep); - continue; - } - - /* - * If the master came through the start step, this - * implies that all of the nodes must have done the - * same and hence there can be no applications - * running. Hence no need to reset ABR - */ - if (!start_step) { - /* Reset ABR state for mirrors */ - if (reset_state(RESET_ABR, sp, MD_MIRROR, - ep) == -1) { - md_exit(local_sp, 1); - } - /* ...and now the same for soft partitions */ - if (reset_state(RESET_ABR, sp, MD_SP, - ep) == -1) { - md_exit(local_sp, 1); - } - } - - /* - * choose owners for orphaned resyncs and reset - * non-orphaned resyncs so that an owner node that - * reboots will restart the resync if needed. - */ - if (reset_state(CHOOSE_OWNER, sp, MD_MIRROR, ep) == -1) - md_exit(local_sp, 1); - - /* - * Must unlock set lock before meta_mirror_resync_all - * sends a message to run the metasync command - * which also grabs the meta_lock. - */ - if (meta_lock(sp, TRUE, ep) != 0) { - mde_perror(ep, ""); - md_exit(local_sp, 1); - } - meta_mirror_resync_unblock(sp); - (void) meta_unlock(sp, ep); - - /* resync all mirrors in set */ - if (meta_mirror_resync_all(sp, 0, ep) != 0) { - mde_perror(ep, gettext("Mirror resyncs " - "failed for set %s"), sp->setname); - md_exit(local_sp, 1); - } - - meta_mc_log(MC_LOG3, gettext("Step4 - io's restarted " - "for set %s: %s"), sp->setname, - meta_print_hrtime(gethrtime() - start_time)); - } - - meta_mc_log(MC_LOG2, gettext("Step4 completed: %s"), - meta_print_hrtime(gethrtime() - start_time)); - - break; - - default: - usage(sp, 1); - break; - } - - md_exit(sp, 0); - /* NOTREACHED */ - return (0); -} diff --git a/usr/src/cmd/lvm/util/metadb.c b/usr/src/cmd/lvm/util/metadb.c deleted file mode 100644 index 24dc37f0a549..000000000000 --- a/usr/src/cmd/lvm/util/metadb.c +++ /dev/null @@ -1,819 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -/* - * Metadevice database utility. - */ - -#include -#define MDDB -#include -#include - -enum mddb_cmd {none, attach, detach, patch, infolong, infoshort}; - -extern int procsigs(int block, sigset_t *oldsigs, md_error_t *ep); - -static void -usage( - mdsetname_t *sp, - char *string -) -{ - if ((string != NULL) && (*string != '\0')) - md_eprintf("%s\n", string); - - (void) fprintf(stderr, gettext( -"usage: %s [-s setname] -a [options] mddbnnn\n" -" %s [-s setname] -a [options] device ...\n" -" %s [-s setname] -d [options] mddbnnn\n" -" %s [-s setname] -d [options] device ...\n" -" %s [-s setname] -i \n" -" %s -p [options] [ mddb.cf-file ]\n" -"options:\n" -"-c count number of replicas (for use with -a only)\n" -"-f force adding or deleting of replicas\n" -"-k filename alternate /etc/system file\n" -"-l length specify size of replica (for use with -a only)\n"), - myname, myname, myname, myname, myname, myname); - - md_exit(sp, (string == NULL) ? 0 : 1); -} - -static mdname_t * -make_dbname( - mdsetname_t *sp, - mdnamelist_t **nlp, - char *name, - md_error_t *ep -) -{ - mdname_t *np; - - if ((np = metaname(&sp, name, LOGICAL_DEVICE, ep)) == NULL) - return (NULL); - - return (metanamelist_append(nlp, np)); -} - -static mdnamelist_t * -get_dbnames_fromfile( - mdsetname_t *sp, - mdnamelist_t **nlp, - char *tabname, - int *dbsize, - int *dbcnt, - int *default_size, - md_error_t *ep -) -{ - md_tab_t *tabp = NULL; - md_tab_line_t *linep = NULL; - int argc; - char **argv; - char *context; - int save = optind; - int c; - - /* look in md.tab */ - if ((tabp = meta_tab_parse(NULL, ep)) == NULL) { - if (! mdissyserror(ep, ENOENT)) - mde_perror(ep, ""); - mdclrerror(ep); - return (NULL); - } - - if ((linep = meta_tab_find(sp, tabp, tabname, TAB_MDDB)) == NULL) { - (void) mdsyserror(ep, ENOENT, tabname); - goto out; - } - argc = linep->argc; - argv = linep->argv; - context = linep->context; - - /* parse up entry */ - optind = 1; - opterr = 1; - while ((c = getopt(argc, argv, "c:l:")) != -1) { - switch (c) { - case 'c': - if (sscanf(optarg, "%d", dbcnt) != 1) { - md_eprintf("%s: %s\n", - context, gettext("bad format")); - usage(sp, ""); - } - break; - - case 'l': - if (sscanf(optarg, "%d", dbsize) != 1) { - md_eprintf("%s: %s\n", - context, gettext("bad format")); - usage(sp, ""); - } - *default_size = FALSE; - break; - - default: - usage(sp, ""); - } - } - argc -= optind; - argv += optind; - for (; (argc > 0); --argc, ++argv) { - char *token = argv[0]; - - if (make_dbname(sp, nlp, token, ep) == NULL) { - metafreenamelist(*nlp); - *nlp = NULL; - goto out; - } - } - - /* cleanup, return list */ -out: - if (tabp != NULL) - meta_tab_free(tabp); - optind = save; - return (*nlp); -} - -/* - * built list of all devices which are to be detached - */ -static mdnamelist_t * -build_a_namelist( - mdsetname_t *sp, - int argc, - char **argv, - md_error_t *ep -) -{ - int i; - int dbsize, dbcnt, default_size; - mdnamelist_t *dbnlp = NULL; - - for (i = 0; i < argc; i++) { - if (strncmp(argv[i], "mddb", 4) == 0) { - if (get_dbnames_fromfile(sp, &dbnlp, argv[i], - &dbsize, &dbcnt, &default_size, ep) == NULL) { - /* don't freelist here - already been done */ - return (NULL); - } - continue; - } - if (make_dbname(sp, &dbnlp, argv[i], ep) == NULL) { - metafreenamelist(dbnlp); - return (NULL); - } - } - - return (dbnlp); -} - - -/* - * built the next list of devices which are to be attached - * that have the same size and count of replicas. - */ -static mdnamelist_t * -build_next_namelist( - mdsetname_t *sp, - int argc, - char **argv, - int *arg_index, - int *dbsize, - int *dbcnt, - int *default_size, - md_error_t *ep -) -{ - int i; - mdnamelist_t *dbnlp = NULL; - - for (i = *arg_index; i < argc; i++) { - if (strncmp(argv[i], "mddb", 4) == 0) { - /* - * If we have stuff in the namelist - * return it before processing the mddb entry. - */ - if (dbnlp) { - *arg_index = i; - return (dbnlp); - } - if (get_dbnames_fromfile(sp, &dbnlp, argv[i], - dbsize, dbcnt, default_size, ep) == NULL) { - /* don't freelist here - already been done */ - return (NULL); - } - *arg_index = i + 1; - return (dbnlp); - } - if (make_dbname(sp, &dbnlp, argv[i], ep) == NULL) { - metafreenamelist(dbnlp); - return (NULL); - } - } - *arg_index = argc; - return (dbnlp); -} - - -static int -chngdb( - mdsetname_t *sp, - enum mddb_cmd cmd, - int argc, - char *argv[], - uint_t options, - md_error_t *ep -) -{ - int c; - int i; - md_error_t xep = mdnullerror; - mdnamelist_t *dbnlp = NULL; - int dbsize = MD_DBSIZE; - int maxblks = MDDB_MAXBLKS; - int minblks = MDDB_MINBLKS; - int dbcnt = 1; - mdforceopts_t force = MDFORCE_NONE; - int rval = 0; - char *sysfilename = NULL; - int default_size = TRUE; - md_set_desc *sd; - md_setkey_t *cl_sk; - md_mnnode_desc *nd; - int suspend1_flag = 0; - - /* reset and parse args */ - optind = 1; - opterr = 1; - while ((c = getopt(argc, argv, "ac:dfk:pl:s:")) != -1) { - switch (c) { - case 'a': - break; - case 'c': - if (sscanf(optarg, "%d", &dbcnt) != 1) { - md_eprintf("%s: %s\n", - optarg, gettext("bad format")); - usage(sp, ""); - } - break; - case 'd': - break; - case 'f': - force = MDFORCE_LOCAL; - break; - case 'k': - sysfilename = optarg; - break; - case 'l': - if (sscanf(optarg, "%d", &dbsize) != 1) { - md_eprintf("%s: %s\n", - optarg, gettext("bad format")); - usage(sp, ""); - } - default_size = FALSE; - break; - case 'p': - break; - case 's': - break; - default: - usage(sp, ""); - } - } - - /* - * If it is a multinode diskset, use appropriate metadb size. - */ - if (! metaislocalset(sp)) { - if ((sd = metaget_setdesc(sp, ep)) == NULL) - return (-1); - - if (MD_MNSET_DESC(sd)) { - maxblks = MDDB_MN_MAXBLKS; - minblks = MDDB_MN_MINBLKS; - if (default_size) - dbsize = MD_MN_DBSIZE; - } - } - - if (dbsize > maxblks) - usage(sp, gettext("size (-l) is too big")); - - - if (dbsize < minblks) - usage(sp, gettext("size (-l) is too small")); - - if (dbcnt < 1) - usage(sp, gettext( - "count (-c) must be 1 or more")); - - - argc -= optind; - argv += optind; - if (argc <= 0) { - usage(sp, gettext( - "no devices specified to attach or detach")); - } - - if (! metaislocalset(sp)) { - - if (MD_MNSET_DESC(sd)) { - md_error_t xep = mdnullerror; - sigset_t sigs; - - /* Make sure we are blocking all signals */ - if (procsigs(TRUE, &sigs, &xep) < 0) - mdclrerror(&xep); - - /* - * Lock out other metaset or metadb commands - * across the diskset. - */ - nd = sd->sd_nodelist; - while (nd) { - if ((force & MDFORCE_LOCAL) && - strcmp(nd->nd_nodename, mynode()) != 0) { - nd = nd->nd_next; - continue; - } - - if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) { - nd = nd->nd_next; - continue; - } - - if (clnt_lock_set(nd->nd_nodename, sp, ep)) { - rval = -1; - goto done; - } - nd = nd->nd_next; - } - /* - * Lock out other meta* commands by suspending - * class 1 messages across the diskset. - */ - nd = sd->sd_nodelist; - while (nd) { - if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) { - nd = nd->nd_next; - continue; - } - - if (clnt_mdcommdctl(nd->nd_nodename, - COMMDCTL_SUSPEND, sp, MD_MSG_CLASS1, - MD_MSCF_NO_FLAGS, ep)) { - rval = -1; - goto done; - } - suspend1_flag = 1; - nd = nd->nd_next; - } - } else { - /* Lock the set on current set members */ - for (i = 0; i < MD_MAXSIDES; i++) { - /* Skip empty slots */ - if (sd->sd_nodes[i][0] == '\0') - continue; - - if ((force & MDFORCE_LOCAL) && - strcmp(sd->sd_nodes[i], mynode()) != 0) - continue; - - if (clnt_lock_set(sd->sd_nodes[i], sp, ep)) { - rval = -1; - goto done; - } - } - } - - force |= MDFORCE_SET_LOCKED; - options |= MDCHK_SET_LOCKED; - } - - if (cmd == detach) { - if ((dbnlp = build_a_namelist(sp, argc, argv, ep)) == NULL) { - rval = -1; - goto done; - } - - rval = meta_db_detach(sp, dbnlp, force, sysfilename, ep); - - metafreenamelist(dbnlp); - } - - if (cmd == attach) { - daddr_t nblks = 0; - int arg_index = 0; - int saved_dbsize = dbsize; - int saved_dbcnt = dbcnt; - int saved_default_size = default_size; - - if (force & MDFORCE_LOCAL) - options |= MDCHK_SET_FORCE; - - if (default_size) - if ((nblks = meta_db_minreplica(sp, ep)) < 0) - mdclrerror(ep); - /* - * Loop through build a new namelist - * for each "mddb" entry or the devices list - * on the command line. This allows each "mddb" - * entry to have unique dbsize and dbcnt. - */ - while (arg_index < argc) { - - dbnlp = build_next_namelist(sp, argc, argv, - &arg_index, &dbsize, &dbcnt, &default_size, ep); - if (dbnlp == NULL) { - rval = -1; - goto done; - } - /* - * If using the default size, - * then let's adjust the default to the minimum - * size currently in use. - */ - if (default_size && (nblks > 0)) - dbsize = nblks; /* adjust replica size */ - - if (dbsize > maxblks) - usage(sp, gettext("size (-l) is too big")); - - rval = meta_db_attach(sp, dbnlp, options, NULL, dbcnt, - dbsize, sysfilename, ep); - if (rval) { - metafreenamelist(dbnlp); - break; - } - dbsize = saved_dbsize; - dbcnt = saved_dbcnt; - default_size = saved_default_size; - - metafreenamelist(dbnlp); - } - } - -done: - if (! metaislocalset(sp)) { - cl_sk = cl_get_setkey(sp->setno, sp->setname); - if (MD_MNSET_DESC(sd)) { - /* - * Unlock diskset by resuming - * class 1 messages across the diskset. - */ - if (suspend1_flag) { - nd = sd->sd_nodelist; - while (nd) { - if (!(nd->nd_flags & - MD_MN_NODE_ALIVE)) { - nd = nd->nd_next; - continue; - } - - if (clnt_mdcommdctl(nd->nd_nodename, - COMMDCTL_RESUME, sp, - MD_MSG_CLASS1, - MD_MSCF_NO_FLAGS, &xep)) { - mde_perror(&xep, ""); - mdclrerror(&xep); - } - nd = nd->nd_next; - } - } - nd = sd->sd_nodelist; - while (nd) { - if ((force & MDFORCE_LOCAL) && - strcmp(nd->nd_nodename, mynode()) != 0) { - nd = nd->nd_next; - continue; - } - if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) { - nd = nd->nd_next; - continue; - } - - if (clnt_unlock_set(nd->nd_nodename, cl_sk, - &xep)) - mdclrerror(&xep); - nd = nd->nd_next; - } - } else { - for (i = 0; i < MD_MAXSIDES; i++) { - /* Skip empty slots */ - if (sd->sd_nodes[i][0] == '\0') - continue; - - if ((force & MDFORCE_LOCAL) && - strcmp(sd->sd_nodes[i], mynode()) != 0) - continue; - - if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, - &xep)) - mdclrerror(&xep); - } - } - cl_set_setkey(NULL); - } - - return (rval); -} - -static int -info( - mdsetname_t *sp, - enum mddb_cmd cmd, - int print_headers, - int print_footers, - md_error_t *ep -) -{ - md_replicalist_t *rlp = NULL; - md_replicalist_t *rl; - md_replica_t *r; - int i; - char *unk_str = NULL; - - /* get list of replicas, quit if none */ - if (metareplicalist(sp, (MD_BASICNAME_OK | PRINT_FAST), &rlp, ep) < 0) - return (-1); - else if (rlp == NULL) - return (0); - - if (print_headers) { - (void) printf("\t%5.5s\t\t%9.9s\t%11.11s\n", gettext("flags"), - gettext("first blk"), gettext("block count")); - } - - unk_str = gettext("unknown"); - for (rl = rlp; rl != NULL; rl = rl->rl_next) { - r = rl->rl_repp; - - for (i = 0; i < MDDB_FLAGS_LEN; i++) { - if (r->r_flags & (1 << i)) - (void) putchar(MDDB_FLAGS_STRING[i]); - else - (void) putchar(' '); - } - - if ((r->r_blkno == -1) && (r->r_nblk == -1)) { - (void) printf("\t%7.7s\t\t%7.7s\t", unk_str, unk_str); - } else if (r->r_nblk == -1) { - (void) printf("\t%ld\t\t%7.7s\t", r->r_blkno, unk_str); - } else { - (void) printf("\t%ld\t\t%ld\t", r->r_blkno, r->r_nblk); - } - - (void) printf("\t%s\n", r->r_namep->bname); - - } - - metafreereplicalist(rlp); - - if (cmd == infoshort) - return (0); - - if (!print_footers) - return (0); - - (void) printf(gettext( - " r - replica does not have device relocation information\n" - " o - replica active prior to last mddb configuration change\n" - " u - replica is up to date\n" - " l - locator for this replica was read successfully\n" - " c - replica's location was in %s\n" - " p - replica's location was patched in kernel\n" - " m - replica is master, this is replica selected as input\n" - " t - tagged data is associated with the replica\n" - " W - replica has device write errors\n" - " a - replica is active, commits are occurring to this replica\n" - " M - replica had problem with master blocks\n" - " D - replica had problem with data blocks\n" - " F - replica had format problems\n" - " S - replica is too small to hold current data base\n" - " R - replica had device read errors\n" - " B - tagged data associated with the replica is not valid\n"), - META_DBCONF); - return (0); -} - -int -main(int argc, char **argv) -{ - mdsetname_t *sp = NULL; - int c; - enum mddb_cmd cmd = none; - char *sname = MD_LOCAL_NAME; - char *cffilename = NULL; - char *sysfilename = NULL; - int forceflg = FALSE; - mdchkopts_t options = 0; - md_error_t status = mdnullerror; - md_error_t *ep = &status; - int error; - md_set_desc *sd; - int multi_node = 0; - - /* - * Get the locale set up before calling any other routines - * with messages to ouput. Just in case we're not in a build - * environment, make sure that TEXT_DOMAIN gets set to - * something. - */ -#if !defined(TEXT_DOMAIN) -#define TEXT_DOMAIN "SYS_TEST" -#endif - (void) setlocale(LC_ALL, ""); - (void) textdomain(TEXT_DOMAIN); - - if (sdssc_bind_library() == SDSSC_OKAY) - if (sdssc_cmd_proxy(argc, argv, SDSSC_PROXY_PRIMARY, - &error) == SDSSC_PROXY_DONE) - exit(error); - - /* parse args */ - optind = 1; - opterr = 1; - - /* initialize */ - if (md_init(argc, argv, 0, 1, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - /* parse args */ - optind = 1; - opterr = 1; - while ((c = getopt(argc, argv, "ac:dfhik:l:ps:?")) != -1) { - switch (c) { - case 'a': - cmd = attach; - break; - case 'c': - break; - case 'd': - cmd = detach; - break; - case 'f': - forceflg = TRUE; - break; - case 'h': - usage(sp, (char *)0); - break; - case 'i': - cmd = infolong; - break; - case 'k': - sysfilename = optarg; - break; - case 'l': - break; - case 'p': - cmd = patch; - break; - case 's': - sname = optarg; - break; - - case '?': - if (optopt == '?') - usage(sp, NULL); - /*FALLTHROUGH*/ - default: - usage(sp, gettext("unknown command")); - } - } - if (cmd == none) - cmd = infoshort; - - /* get set context */ - if ((sp = metasetname(sname, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - /* print status */ - if (cmd == infoshort || cmd == infolong) { - if (optind != argc) - usage(sp, gettext( - "too many arguments")); - - if (info(sp, cmd, 1, 1, ep)) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - if (meta_smf_isonline(meta_smf_getmask(), ep) == 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - md_exit(sp, 0); - } - - if (meta_check_root(ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - if (! metaislocalset(sp)) { - if ((sd = metaget_setdesc(sp, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - if (MD_MNSET_DESC(sd)) { - multi_node = 1; - } - } - - /* - * Adjust lock for traditional and local diskset. - * - * A MN diskset does not use the set meta_lock but instead - * uses the clnt_lock of rpc.metad and the suspend/resume - * feature of the rpc.mdcommd. Can't use set meta_lock since - * class 1 messages are grabbing this lock and if this thread - * is holding the set meta_lock then no rpc.mdcommd suspend - * can occur. - */ - if ((!multi_node) && (meta_lock(sp, TRUE, ep) != 0)) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - /* check for ownership */ - if (meta_check_ownership(sp, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - /* snarf MDDB locations */ - if (cmd != patch) { - if (meta_setup_db_locations(ep) != 0) { - if (! mdismddberror(ep, MDE_DB_STALE)) { - if (forceflg == FALSE) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - options = MDCHK_ALLOW_NODBS; - } - mdclrerror(ep); - } - } - - /* patch MDDB locations */ - if (cmd == patch) { - if (optind < (argc - 1)) { - usage(sp, gettext( - "too many arguments to -p")); - } - - if (optind == (argc - 1)) - cffilename = argv[optind]; - - if (metaislocalset(sp)) { - if (meta_db_patch(sysfilename, cffilename, 1, ep)) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - } - } - - /* add/delete replicas */ - if (cmd == attach || cmd == detach) { - if (chngdb(sp, cmd, argc, argv, options, ep)) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - } - - md_exit(sp, 0); - /*NOTREACHED*/ - return (0); -} diff --git a/usr/src/cmd/lvm/util/metadetach.c b/usr/src/cmd/lvm/util/metadetach.c deleted file mode 100644 index ab9d74c9db3e..000000000000 --- a/usr/src/cmd/lvm/util/metadetach.c +++ /dev/null @@ -1,307 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -/* - * detach submirrors - */ - -#include - -#include - -/* - * print usage message - */ -static void -usage( - mdsetname_t *sp, - int eval -) -{ - (void) fprintf(stderr, gettext("\ -usage: %s [-s setname] [-f] mirror submirror\n\ - %s [-s setname] [-f] trans\n"), - myname, myname); - md_exit(sp, eval); -} - -/* - * detach submirror from mirror - */ -static int -mirror_detach( - mdsetname_t **spp, - mdname_t *mirnp, - int argc, - char *argv[], - mdcmdopts_t options, - md_error_t *ep -) -{ - mdname_t *submirnp; - int c; - - /* reset and parse args */ - optind = 1; - opterr = 1; - while ((c = getopt(argc, argv, "s:f")) != -1) { - switch (c) { - case 's': - break; - - case 'f': - options |= MDCMD_FORCE; - break; - - default: - usage(*spp, 1); - /*NOTREACHED*/ - break; - } - } - argc -= optind; - argv += optind; - if (argc != 2) - usage(*spp, 1); - - /* get submirror */ - if ((submirnp = metaname(spp, argv[1], META_DEVICE, ep)) == NULL) - return (-1); - - /* detach submirror */ - if (meta_mirror_detach(*spp, mirnp, submirnp, options, ep) != 0) - return (-1); - - /* update md.cf */ - if (meta_update_md_cf(*spp, ep) != 0) - return (-1); - - /* return success */ - return (0); -} - -/* - * detach log from trans - */ -static int -trans_detach( - mdsetname_t *sp, - mdname_t *transnp, - int argc, - char *argv[], - mdcmdopts_t options, - md_error_t *ep -) -{ - int delayed; - int c; - - /* reset and parse args */ - optind = 1; - opterr = 1; - while ((c = getopt(argc, argv, "s:f")) != -1) { - switch (c) { - case 's': - break; - - case 'f': - options |= MDCMD_FORCE; - break; - - default: - usage(sp, 1); - /*NOTREACHED*/ - break; - } - } - argc -= optind; - argv += optind; - if (argc != 1) - usage(sp, 1); - - /* detach log */ - if (meta_trans_detach(sp, transnp, options, &delayed, ep) != 0) - return (-1); - - /* update md.cf */ - if (meta_update_md_cf(sp, ep) != 0) - return (-1); - - /* return success */ - return (0); -} - -/* - * parse args and doit - */ -int -main( - int argc, - char *argv[] -) -{ - char *sname = NULL; - mdsetname_t *sp = NULL; - mdcmdopts_t options = (MDCMD_PRINT); - mdname_t *np; - char *miscname; - int c; - md_error_t status = mdnullerror; - md_error_t *ep = &status; - int error; - bool_t called_thru_rpc = FALSE; - char *cp; - - - /* - * Get the locale set up before calling any other routines - * with messages to ouput. Just in case we're not in a build - * environment, make sure that TEXT_DOMAIN gets set to - * something. - */ -#if !defined(TEXT_DOMAIN) -#define TEXT_DOMAIN "SYS_TEST" -#endif - (void) setlocale(LC_ALL, ""); - (void) textdomain(TEXT_DOMAIN); - - if ((cp = strstr(argv[0], ".rpc_call")) == NULL) { - if (sdssc_bind_library() == SDSSC_OKAY) - if (sdssc_cmd_proxy(argc, argv, SDSSC_PROXY_PRIMARY, - &error) == SDSSC_PROXY_DONE) - exit(error); - } else { - *cp = '\0'; /* cut off ".rpc_call" */ - called_thru_rpc = TRUE; - } - - - /* initialize */ - if (md_init(argc, argv, 0, 1, ep) != 0 || - meta_check_root(ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - /* find set and metadevice first */ - optind = 1; - opterr = 1; - while ((c = getopt(argc, argv, "hs:f?")) != -1) { - switch (c) { - case 'h': - usage(sp, 0); - break; - - case 's': - sname = optarg; - break; - - case '?': - if (optopt == '?') - usage(sp, 0); - break; - } - } - if ((argc - optind) <= 0) - usage(sp, 1); - - if (sname != NULL) { - if ((sp = metasetname(sname, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - } - - /* Get metadevice name */ - if (((np = metaname(&sp, argv[optind], META_DEVICE, ep)) == NULL) || - (metachkmeta(np, ep) != 0)) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - assert(sp != NULL); - - if ((called_thru_rpc == FALSE) && - meta_is_mn_name(&sp, argv[optind], ep)) { - /* - * If we are dealing with a MN set and we were not - * called thru an rpc call, we are just to send this - * command string to the master of the set and let it - * deal with it. - * Note that if sp is NULL, meta_is_mn_name() derives sp - * from argv[optind] which is the metadevice arg - * If this fails, the master must panic as the mddb may be - * inconsistent - */ - int result; - result = meta_mn_send_command(sp, argc, argv, MD_DISP_STDERR | - MD_PANIC_WHEN_INCONSISTENT, NO_CONTEXT_STRING, ep); - /* - * The error message has been already been displayed - * just exit - */ - md_exit(sp, result); - } - - /* grab set lock */ - if (meta_lock(sp, TRUE, ep)) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - /* check ownership */ - if (meta_check_ownership(sp, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - if ((miscname = metagetmiscname(np, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - /* dispatch based on device type */ - if (strcmp(miscname, MD_MIRROR) == 0) { - if (mirror_detach(&sp, np, argc, argv, options, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - } else if (strcmp(miscname, MD_TRANS) == 0) { - if (trans_detach(sp, np, argc, argv, options, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - } else { - md_eprintf(gettext( - "%s: invalid metadevice type %s\n"), - np->cname, miscname); - md_exit(sp, 1); - } - - /* return success */ - md_exit(sp, 0); - /*NOTREACHED*/ - return (0); -} diff --git a/usr/src/cmd/lvm/util/metadevadm.c b/usr/src/cmd/lvm/util/metadevadm.c deleted file mode 100644 index 63aa33db40f5..000000000000 --- a/usr/src/cmd/lvm/util/metadevadm.c +++ /dev/null @@ -1,239 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * print usage message - */ -static void -usage(char *myname) -{ - (void) fprintf(stderr, gettext( - "usage: %s -h\n" - " %s [-s setname] -r [-lnv]\n" - " %s [-s setname] -u cxtxdx [-lnv]\n"), - myname, myname, myname); -} - -int -main(int argc, char **argv) -{ - char c; - char *sname = MD_LOCAL_NAME; - mddevopts_t options = 0; - md_error_t status = mdnullerror; - md_error_t *ep = &status; - mdsetname_t *sp = NULL; - mdsetname_t *local_sp = NULL; - char *argname; - int todo = 0; - int ret = 0; - int md_upgd_stat = 0; - int error; - md_set_desc *sd; - - /* - * Get the locale set up before calling any other routines - * with messages to ouput. Just in case we're not in a build - * environment, make sure that TEXT_DOMAIN gets set to - * something. - */ -#if !defined(TEXT_DOMAIN) -#define TEXT_DOMAIN "SYS_TEST" -#endif - (void) setlocale(LC_ALL, ""); - (void) textdomain(TEXT_DOMAIN); - - if ((sdssc_bind_library() == SDSSC_OKAY) && - (sdssc_cmd_proxy(argc, argv, SDSSC_PROXY_PRIMARY, - &error) == SDSSC_PROXY_DONE)) - exit(error); - - openlog("metadevadm", LOG_ODELAY, LOG_USER); - - /* initialize */ - if (md_init(argc, argv, 0, 1, ep) != 0 || - meta_check_root(ep) != 0) { - closelog(); - mde_perror(ep, ""); - md_exit(sp, 1); - } - - /* parse args */ - optind = 1; - opterr = 1; - while ((c = getopt(argc, argv, "vlhnrs:u:")) != -1) { - switch (c) { - case 'v': - options |= DEV_VERBOSE; - break; - case 'n': - options |= DEV_NOACTION; - break; - case 'r': - options |= DEV_RELOAD; - todo = 1; - break; - case 's': - sname = optarg; - break; - case 'u': - todo = 1; - options |= DEV_UPDATE; - argname = optarg; - if (argname == NULL) { - usage("metadevadm"); - closelog(); - md_exit(sp, 0); - } - break; - case 'l': - options |= DEV_LOG; - break; - case 'h': - default: - usage("metadevadm"); - closelog(); - md_exit(sp, 0); - } - } - - if ((sp = metasetname(sname, ep)) == NULL) { - mde_perror(ep, ""); - closelog(); - md_exit(sp, 1); - } - - if (!metaislocalset(sp)) { - if ((sd = metaget_setdesc(sp, ep)) == NULL) { - mde_perror(ep, ""); - closelog(); - md_exit(sp, 1); - } - if (MD_MNSET_DESC(sd)) { - (void) printf("%s\n", gettext("metadevadm cannot be " - "run on multi-owner disksets\n")); - closelog(); - md_exit(sp, 0); - } - } - - if ((options & DEV_VERBOSE) && (todo != 1)) { - usage("metadevadm"); - closelog(); - md_exit(sp, 0); - } - - if ((options & DEV_NOACTION) && (todo != 1)) { - usage("metadevadm"); - closelog(); - md_exit(sp, 0); - } - - if (todo == 0) { - usage("metadevadm"); - closelog(); - md_exit(sp, 0); - } - - if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) { - mde_perror(ep, ""); - closelog(); - md_exit(local_sp, 1); - } - - /* lock the local set */ - if (meta_lock(local_sp, TRUE, ep) != 0) { - mde_perror(ep, ""); - closelog(); - md_exit(local_sp, 1); - } - - /* grab set lock */ - if (meta_lock(sp, TRUE, ep)) { - mde_perror(ep, ""); - closelog(); - md_exit(local_sp, 1); - } - - /* check for ownership */ - if (meta_check_ownership(sp, ep) != 0) { - /* - * If the set is not owned by this node then only update the - * local set's replica. - */ - options |= DEV_LOCAL_SET; - } - - /* - * check for upgrade. If upgrade in progress then just exit. - */ - if (metaioctl(MD_UPGRADE_STAT, &md_upgd_stat, ep, NULL) != 0) { - mde_perror(ep, ""); - closelog(); - (void) meta_unlock(sp, ep); - md_exit(local_sp, 1); - } - if (md_upgd_stat == 0) { - ret = meta_fixdevid(sp, options, argname, ep); - if (ret == METADEVADM_ERR) { - /* - * If the call failed, for a DEV_RELOAD still need to - * update the .conf file to provide the latest devid - * information so exit later. - */ - if (options & DEV_UPDATE) { - closelog(); - (void) meta_unlock(sp, ep); - md_exit(local_sp, 1); - } - } - } - - /* - * Sync replica list in kernel to replica list in conf files. - * This will update driver name and minor number in conf file - * if reload was run. Will update device id in conf file if - * update was run. - */ - meta_sync_db_locations(sp, ep); - closelog(); - (void) meta_unlock(sp, ep); - md_exit(local_sp, ret); - return (0); -} diff --git a/usr/src/cmd/lvm/util/metahs.c b/usr/src/cmd/lvm/util/metahs.c deleted file mode 100644 index 4db667d28772..000000000000 --- a/usr/src/cmd/lvm/util/metahs.c +++ /dev/null @@ -1,753 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -/* - * hotspare maintenance - */ - -#include -#include - -/* - * possible actions - */ -enum metahs_op { - NONE, - ADD_A_HS, - DELETE_A_HS, - ENABLE_A_HS, - REPLACE_A_HS, - STATUS_A_HSP -}; - -/* - * report status of a hotspare pool - */ -static int -status_hsp( - mdsetname_t *sp, - mdhspname_t *hspnp, - md_error_t *ep -) -{ - mdprtopts_t options = (PRINT_HEADER | PRINT_SUBDEVS | PRINT_DEVID); - mdnamelist_t *nlp = NULL; - - /* must have set */ - assert(sp != NULL); - assert(hspnp->hsp == MD_HSP_NONE || sp->setno == HSP_SET(hspnp->hsp)); - - /* print status */ - if (meta_hsp_print(sp, hspnp, &nlp, NULL, stdout, options, ep) != 0) - return (-1); - - /* return success */ - return (0); -} - -/* - * print usage message - */ -static void -usage( - mdsetname_t *sp, - int eval -) -{ - (void) fprintf(stderr, gettext("\ -usage: %s [-s setname] -a hot_spare_pool [component...]\n\ - %s [-s setname] -a \"all\" component...\n\ - %s [-s setname] -d hot_spare_pool [component...]\n\ - %s [-s setname] -d \"all\" component...\n\ - %s [-s setname] -e component...\n\ - %s [-s setname] -r hot_spare_pool component_old component_new\n\ - %s [-s setname] -r \"all\" component_old component_new\n\ - %s [-s setname] -i [hot_spare_pool...]\n"), - myname, myname, myname, myname, myname, myname, myname, myname); - md_exit(sp, eval); -} - -/* - * parse args and add hotspares - */ -static int -add_hotspares( - mdsetname_t **spp, - int argc, - char *argv[], - mdcmdopts_t options, - md_error_t *ep -) -{ - mdhspnamelist_t *hspnlp = NULL; - mdnamelist_t *nlp = NULL; - int cnt; - mdhspnamelist_t *p; - int rval = -1; - - /* get hotspare pool name(s) */ - if (argc < 1) - usage(*spp, 1); - if ((argc > 1) && meta_is_all(argv[0])) { - /* check for ownership */ - assert(*spp != NULL); - if (meta_check_ownership(*spp, ep) != 0) - return (-1); - - if ((cnt = meta_get_hsp_names(*spp, &hspnlp, 0, ep)) < 0) { - return (-1); - } else if (cnt == 0) { - return (mderror(ep, MDE_NO_HSPS, NULL)); - } - } else { /* create the hsp nmlist from the specified hsp name */ - if (!is_hspname(argv[0])) - return (mderror(ep, MDE_NAME_ILLEGAL, argv[0])); - - if ((cnt = metahspnamelist(spp, &hspnlp, 1, &argv[0], ep)) < 0) - return (-1); - } - assert(cnt > 0); - --argc, ++argv; - - assert(*spp != NULL); - - /* grab set lock */ - if (meta_lock(*spp, TRUE, ep)) - return (-1); - - /* check for ownership */ - if (meta_check_ownership(*spp, ep) != 0) - return (-1); - - /* get hotspares */ - if (metanamelist(spp, &nlp, argc, argv, - LOGICAL_DEVICE, ep) < 0) { - goto out; - } - - /* add hotspares */ - for (p = hspnlp; (p != NULL); p = p->next) { - mdhspname_t *hspnp = p->hspnamep; - - if (meta_hs_add(*spp, hspnp, nlp, options, ep) != 0) - goto out; - } - rval = 0; - - /* cleanup, return success */ -out: - if (hspnlp != NULL) - metafreehspnamelist(hspnlp); - if (nlp != NULL) - metafreenamelist(nlp); - return (rval); -} - -/* - * parse args and delete hotspares - */ -static int -delete_hotspares( - mdsetname_t **spp, - int argc, - char *argv[], - mdcmdopts_t options, - md_error_t *ep -) -{ - mdhspnamelist_t *hspnlp = NULL; - mdnamelist_t *nlp = NULL; - int cnt; - mdhspnamelist_t *p; - int rval = -1; - - /* get hotspare pool name(s) */ - if (argc < 1) - usage(*spp, 1); - if ((argc > 1) && meta_is_all(argv[0])) { - /* check for ownership */ - assert(*spp != NULL); - if (meta_check_ownership(*spp, ep) != 0) - return (-1); - - if ((cnt = meta_get_hsp_names(*spp, &hspnlp, 0, ep)) < 0) { - return (-1); - } else if (cnt == 0) { - return (mderror(ep, MDE_NO_HSPS, NULL)); - } - } else if ((cnt = metahspnamelist(spp, &hspnlp, 1, &argv[0], - ep)) < 0) { - return (-1); - } - assert(cnt > 0); - --argc, ++argv; - - assert(*spp != NULL); - - /* grab set lock */ - if (meta_lock(*spp, TRUE, ep)) - return (-1); - - /* check for ownership */ - if (meta_check_ownership(*spp, ep) != 0) - return (-1); - - /* get hotspares */ - if (metanamelist(spp, &nlp, argc, argv, - LOGICAL_DEVICE, ep) < 0) { - goto out; - } - - /* delete hotspares */ - cnt = 0; - for (p = hspnlp; (p != NULL); p = p->next) { - mdhspname_t *hspnp = p->hspnamep; - - if (meta_hs_delete(*spp, hspnp, nlp, options, ep) != 0) { - if (mdisdeverror(ep, MDE_INVAL_HS)) - mdclrerror(ep); - else - goto out; - } else { - ++cnt; - } - } - - /* make sure we got some */ - if ((nlp != NULL) && (cnt == 0)) { - (void) mddeverror(ep, MDE_INVAL_HS, nlp->namep->dev, - nlp->namep->cname); - goto out; - } - - /* success */ - rval = 0; - - /* cleanup, return success */ -out: - if (hspnlp != NULL) - metafreehspnamelist(hspnlp); - if (nlp != NULL) - metafreenamelist(nlp); - return (rval); -} - -/* - * parse args and enable hotspares - */ -static int -enable_hotspares( - mdsetname_t **spp, - int argc, - char *argv[], - mdcmdopts_t options, - md_error_t *ep -) -{ - mdnamelist_t *nlp = NULL; - int rval = -1; - - /* enable hotspares */ - if (argc < 1) - usage(*spp, 1); - - /* get list of hotspares */ - if (metanamelist(spp, &nlp, argc, argv, - LOGICAL_DEVICE, ep) < 0) - goto out; - assert(nlp != NULL); - - assert(*spp != NULL); - - /* grab set lock */ - if (meta_lock(*spp, TRUE, ep)) - return (-1); - - /* check for ownership */ - if (meta_check_ownership(*spp, ep) != 0) - return (-1); - - /* enable hotspares */ - rval = meta_hs_enable(*spp, nlp, options, ep); - - /* cleanup, return success */ -out: - metafreenamelist(nlp); - return (rval); -} - -/* - * parse args and replace hotspares - */ -static int -replace_hotspares( - mdsetname_t **spp, - int argc, - char *argv[], - mdcmdopts_t options, - md_error_t *ep -) -{ - mdhspnamelist_t *hspnlp = NULL; - int cnt; - mdname_t *oldnp; - mdname_t *newnp; - mdhspnamelist_t *p; - int rval = -1; - - /* get hotspare pool name(s) */ - if (argc != 3) - usage(*spp, 1); - if (meta_is_all(argv[0])) { - /* check for ownership */ - assert(*spp != NULL); - if (meta_check_ownership(*spp, ep) != 0) - return (-1); - - if ((cnt = meta_get_hsp_names(*spp, &hspnlp, 0, ep)) < 0) { - return (-1); - } else if (cnt == 0) { - return (mderror(ep, MDE_NO_HSPS, NULL)); - } - } else if ((cnt = metahspnamelist(spp, &hspnlp, 1, &argv[0], - ep)) < 0) { - return (-1); - } - assert(cnt > 0); - - assert(*spp != NULL); - - /* grab set lock */ - if (meta_lock(*spp, TRUE, ep)) - return (-1); - - /* check for ownership */ - if (meta_check_ownership(*spp, ep) != 0) - return (-1); - - /* get old component */ - if ((oldnp = metaname(spp, argv[1], LOGICAL_DEVICE, ep)) == NULL) - goto out; - - /* get new component */ - if ((newnp = metaname(spp, argv[2], LOGICAL_DEVICE, ep)) == NULL) - goto out; - - /* replace hotspares */ - cnt = 0; - for (p = hspnlp; (p != NULL); p = p->next) { - mdhspname_t *hspnp = p->hspnamep; - - if (meta_hs_replace(*spp, hspnp, oldnp, newnp, options, ep) - != 0) { - if (mdisdeverror(ep, MDE_INVAL_HS)) - mdclrerror(ep); - else - goto out; - } else { - ++cnt; - } - } - - /* make sure we got some */ - if (cnt == 0) { - (void) mddeverror(ep, MDE_INVAL_HS, oldnp->dev, oldnp->cname); - goto out; - } - - /* success */ - rval = 0; - - /* cleanup, return success */ -out: - if (hspnlp != NULL) - metafreehspnamelist(hspnlp); - return (rval); -} - -/* - * print_hsp_devid will collect the information for each underlying - * physical device for all the hotspare pools and print out the - * device relocation information - * INPUT: - * mdsetname_t *sp set the hsp is in - * mdhspnamelist_t *hspnlp list of hsp - * FILE *fp where to print to - * md_error_t *ep errors - * RETURN: - * 0 SUCCESS - * -1 ERROR - */ -static int -print_hsp_devid( - mdsetname_t *sp, - mdhspnamelist_t *hspnlp, - FILE *fp, - md_error_t *ep -) -{ - mddevid_t *ldevidp = NULL; - int retval = 0; - mdhspnamelist_t *p; - mddevid_t *nextp; - - /* for all hotspare pools */ - for (p = hspnlp; (p != NULL); p = p->next) { - mdhspname_t *hspnp = p->hspnamep; - uint_t hsi; - - /* for all hotspares within a pool */ - for (hsi = 0; - hsi < hspnp->unitp->hotspares.hotspares_len; hsi++) { - mdname_t *hsname; - - hsname = - hspnp->unitp->hotspares.hotspares_val[hsi].hsnamep; - - meta_create_non_dup_list(hsname, &ldevidp); - } - } - - retval = meta_print_devid(sp, fp, ldevidp, ep); - - /* cleanup */ - for (nextp = ldevidp; nextp != NULL; ldevidp = nextp) { - Free(ldevidp->ctdname); - nextp = ldevidp->next; - Free(ldevidp); - } - return (retval); -} - -/* - * parse args and status hotspares - */ -static int -status_hotspares( - mdsetname_t **spp, - int argc, - char *argv[], - md_error_t *ep -) -{ - mdhspnamelist_t *hspnlp = NULL; - int cnt; - mdhspnamelist_t *p; - int rval = -1; - - /* get hotspare pool name(s) */ - if (argc == 0) { - /* check for ownership */ - assert(*spp != NULL); - if (meta_check_ownership(*spp, ep) != 0) - return (-1); - - if ((cnt = meta_get_hsp_names(*spp, &hspnlp, 0, ep)) < 0) { - return (-1); - } else if (cnt == 0) { - return (mderror(ep, MDE_NO_HSPS, NULL)); - } - } else if ((cnt = metahspnamelist(spp, &hspnlp, argc, argv, ep)) < 0) { - return (-1); - } - assert(cnt > 0); - - /* check for ownership */ - assert(*spp != NULL); - if (meta_check_ownership(*spp, ep) != 0) - return (-1); - - /* status hotspare pools */ - for (p = hspnlp; (p != NULL); p = p->next) { - mdhspname_t *hspnp = p->hspnamep; - - if (status_hsp(*spp, hspnp, ep) != 0) - goto out; - } - - if (print_hsp_devid(*spp, hspnlp, stdout, ep) == 0) { - rval = 0; - } - - /* cleanup, return success */ -out: - if (hspnlp != NULL) - metafreehspnamelist(hspnlp); - return (rval); -} - -/* - * parse args and doit - */ -int -main( - int argc, - char **argv -) -{ - char *sname = MD_LOCAL_NAME; - mdsetname_t *sp = NULL; - enum metahs_op which_op = NONE; - mdcmdopts_t options = (MDCMD_PRINT | MDCMD_DOIT); - int c; - md_error_t status = mdnullerror; - md_error_t *ep = &status; - int error; - bool_t called_thru_rpc = FALSE; - char *cp; - - /* - * Get the locale set up before calling any other routines - * with messages to ouput. Just in case we're not in a build - * environment, make sure that TEXT_DOMAIN gets set to - * something. - */ -#if !defined(TEXT_DOMAIN) -#define TEXT_DOMAIN "SYS_TEST" -#endif - (void) setlocale(LC_ALL, ""); - (void) textdomain(TEXT_DOMAIN); - - - if ((cp = strstr(argv[0], ".rpc_call")) == NULL) { - if (sdssc_bind_library() == SDSSC_OKAY) - if (sdssc_cmd_proxy(argc, argv, SDSSC_PROXY_PRIMARY, - &error) == SDSSC_PROXY_DONE) - exit(error); - } else { - *cp = '\0'; /* cut off ".rpc_call" */ - called_thru_rpc = TRUE; - } - - /* initialize */ - if (md_init(argc, argv, 0, 1, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - /* parse args */ - optind = 1; - opterr = 1; - while ((c = getopt(argc, argv, "hs:aderin?")) != -1) { - switch (c) { - case 'h': - usage(sp, 0); - break; - - case 's': - sname = optarg; - break; - - case 'a': - if (which_op != NONE) - usage(sp, 1); - which_op = ADD_A_HS; - break; - - case 'd': - if (which_op != NONE) - usage(sp, 1); - which_op = DELETE_A_HS; - break; - - case 'e': - if (which_op != NONE) - usage(sp, 1); - which_op = ENABLE_A_HS; - break; - - case 'r': - if (which_op != NONE) - usage(sp, 1); - which_op = REPLACE_A_HS; - break; - - case 'i': - if (which_op != NONE) - usage(sp, 1); - which_op = STATUS_A_HSP; - break; - - case 'n': - if (called_thru_rpc == TRUE) { - options &= ~MDCMD_DOIT; - } else { - usage(sp, 1); - } - break; - - - case '?': - if (optopt == '?') - usage(sp, 0); - /*FALLTHROUGH*/ - default: - usage(sp, 1); - break; - } - } - - /* get set context */ - if ((sp = metasetname(sname, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - /* - * Send the command to all nodes if the -s argument refers to a MN - * set or the next argument refers to MN set hotspare name ( argc - * greater than optind if there is a next argument) - */ - if ((called_thru_rpc == FALSE) && - (meta_is_mn_set(sp, ep) || ((argc > optind) && - meta_is_mn_name(&sp, argv[optind], ep)))) { - int i; - int newargc; - int result; - char **newargv; - - /* - * If we are dealing with a MN set and we were not - * called thru an rpc call, we are just to send this - * command string to the master of the set and let it - * deal with it. - * First we send out a dryrun version of this command. - * If that returns success, we know it succeeded on all - * nodes and it is safe to do the real command now. - */ - newargv = calloc(argc+1, sizeof (char *)); - newargv[0] = "metahs"; - newargv[1] = "-n"; /* always do "-n" first */ - newargc = 2; - for (i = 1; i < argc; i++, newargc++) - newargv[newargc] = argv[i]; - result = meta_mn_send_command(sp, newargc, newargv, - MD_DISP_STDERR | MD_DRYRUN, NO_CONTEXT_STRING, ep); - - /* If we found a problem don't do it for real */ - if (result != 0) { - md_exit(sp, result); - } - - /* - * Do it for real now. Remove "-n" from the arguments and - * MD_DRYRUN from the flags. If this fails the master must panic - * as the mddbs may be inconsistent. - */ - newargv[1] = ""; /* this was "-n" before */ - result = meta_mn_send_command(sp, newargc, newargv, - MD_DISP_STDERR | MD_RETRY_BUSY | MD_PANIC_WHEN_INCONSISTENT, - NO_CONTEXT_STRING, ep); - free(newargv); - - /* No further action required */ - md_exit(sp, result); - } - - argc -= optind; - argv += optind; - if (which_op == NONE) - usage(sp, 1); - - /* - * if a hot spare pool was specified by name then - * get the canonical form of the name and set up - * sp if the name was specified in the form 'set/hsp' - * unless 'all' is specified or the request is made to - * enable a hs which means that argv[0] will be a component - */ - if (argc > 0 && !meta_is_all(argv[0]) && which_op != ENABLE_A_HS) { - char *cname = NULL; - - cname = meta_name_getname(&sp, argv[0], HSP_DEVICE, ep); - if (cname == NULL) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - Free(cname); - } - - if (which_op == STATUS_A_HSP) { - if (status_hotspares(&sp, argc, argv, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - md_exit(sp, 0); - } - - if (meta_check_root(ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - - /* dispatch */ - switch (which_op) { - - case ADD_A_HS: - if (add_hotspares(&sp, argc, argv, options, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - break; - - case DELETE_A_HS: - if (delete_hotspares(&sp, argc, argv, options, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - break; - - case ENABLE_A_HS: - if (enable_hotspares(&sp, argc, argv, options, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - break; - - case REPLACE_A_HS: - if (replace_hotspares(&sp, argc, argv, options, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - break; - - default: - assert(0); - break; - } - - /* update md.cf */ -out: - if (meta_update_md_cf(sp, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - md_exit(sp, 0); - /*NOTREACHED*/ - return (0); -} diff --git a/usr/src/cmd/lvm/util/metaimport.c b/usr/src/cmd/lvm/util/metaimport.c deleted file mode 100644 index c6d5fdea66cc..000000000000 --- a/usr/src/cmd/lvm/util/metaimport.c +++ /dev/null @@ -1,877 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* - * Utility to import SVM disksets into an active SVM configuration. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -static md_im_drive_info_t *overlap_disks; - -static void -usage(mdsetname_t *sp, char *string) -{ - if ((string != NULL) && (*string != '\0')) - md_eprintf("%s\n", string); - - (void) fprintf(stderr, - "%s:\t%s -s setname [-n] [-f] [-v] [%s...]\n", - gettext("usage"), myname, gettext("disk")); - (void) fprintf(stderr, " %s -r [%s...]\n", - myname, gettext("disk")); - (void) fprintf(stderr, " %s -?\n", myname); - (void) fprintf(stderr, " %s -V\n", myname); - - md_exit(sp, (string == NULL) ? 0 : 1); -} - -static void -print_version(mdsetname_t *sp) -{ - struct utsname curname; - - if (uname(&curname) == -1) { - md_eprintf("%s\n", strerror(errno)); - md_exit(sp, 1); - } - - (void) fprintf(stderr, "%s %s\n", myname, curname.version); - - md_exit(sp, 0); -} - -/* - * Returns 0 if there is no overlap, 1 otherwise - */ -static int -set_disk_overlap(md_im_set_desc_t *misp) -{ - md_im_set_desc_t *next, *isp = misp; - md_im_drive_info_t *set_dr, *next_set_dr, **chain; - int is_overlap = 0; - md_im_drive_info_t *good_disk = NULL; - md_im_drive_info_t *d; - md_timeval32_t gooddisktime; - int disk_not_available = 0; - /* - * There are 2 ways we could get an "overlap" disk. - * One is if the ctd's are the same. The other is if - * the setcreatetimestamp on the disk doesn't agree with the - * "good" disk in the set. However, if we have a disk that is - * unavailable and the other instance of the ctd is available we - * really don't have a conflict. It's just that the unavailable ctd - * is it's "old" location and the available instance is a current - * location. - */ - for (; isp != NULL; isp = isp->mis_next) { - for (next = isp->mis_next; next != NULL; next = next->mis_next) { - for (set_dr = isp->mis_drives; set_dr != NULL; - set_dr = set_dr->mid_next) { - if (set_dr->mid_available == MD_IM_DISK_NOT_AVAILABLE) - disk_not_available = 1; - else - disk_not_available = 0; - for (next_set_dr = next->mis_drives; next_set_dr != NULL; - next_set_dr = next_set_dr->mid_next) { - if (disk_not_available && - (next_set_dr->mid_available - == MD_IM_DISK_AVAILABLE)) - continue; - else if (!disk_not_available && - (next_set_dr->mid_available == - MD_IM_DISK_NOT_AVAILABLE)) - continue; - if (strcmp(set_dr->mid_dnp->cname, - next_set_dr->mid_dnp->cname) == 0) { - /* - * Chain it, skip if - * already there - */ - if (overlap_disks == NULL) { - set_dr->overlap = NULL; - set_dr->overlapped_disk = 1; - next_set_dr->overlapped_disk = 1; - overlap_disks = set_dr; - } else { - for (chain = &overlap_disks; - *chain != NULL; - chain = &(*chain)->overlap) { - if (strcmp(set_dr->mid_dnp->cname, - (*chain)->mid_dnp->cname) == 0) - break; - } - - if (*chain == NULL) { - *chain = set_dr; - set_dr->overlap = NULL; - set_dr->overlapped_disk = 1; - next_set_dr->overlapped_disk = 1; - } - } - if (!is_overlap) - is_overlap = 1; - } - } - } - } - } - - for (isp = misp; isp != NULL; isp = isp->mis_next) { - good_disk = pick_good_disk(isp); - if (good_disk == NULL) { - /* didn't find a good disk */ - continue; - } - gooddisktime = good_disk->mid_setcreatetimestamp; - for (d = isp->mis_drives; d != NULL; d = d->mid_next) { - if (d->mid_available == MD_IM_DISK_NOT_AVAILABLE) - continue; - /* - * If the disk doesn't have the same set creation - * time as the designated "good disk" we have a - * time conflict/overlap situation. Mark the disk - * as such. - */ - if ((gooddisktime.tv_usec != - d->mid_setcreatetimestamp.tv_usec) || - (gooddisktime.tv_sec != - d->mid_setcreatetimestamp.tv_sec)) { - d->overlapped_disk = 1; - if (overlap_disks == NULL) { - d->overlap = NULL; - d->overlapped_disk = 1; - overlap_disks = d; - } else { - for (chain = &overlap_disks; - *chain != NULL; - chain = &(*chain)->overlap) { - if (strcmp(d->mid_dnp->cname, - (*chain)->mid_dnp->cname) - == 0) { - break; - } - } - - if (*chain == NULL) { - *chain = d; - d->overlap = NULL; - d->overlapped_disk = 1; - } - } - if (!is_overlap) - is_overlap = 1; - } - } - } - return (is_overlap); -} - -static void -report_overlap_recommendation() -{ - mddb_mb_t *mbp; - md_error_t status = mdnullerror; - md_error_t *ep = &status; - md_im_drive_info_t *d; - - (void) fprintf(stdout, "%s\n", gettext("Warning: The following disks " - "have been detected in more than one set.\n" - "Import recommendation based upon set creation time.\n" - "Proceed with the import with caution.")); - - /* - * Look at all overlapping disks. Determine which slice - * would have a replica on it. i.e. either slice 7 or 6. - * Then read the master block. If the disk doesn't have a - * metadb on it, the master block is a dummy master block. - * Both dummy or normal master block contain the timestamp - * which is what we are after. Use this timestamp to issue - * the appropriate recommendation. - */ - mbp = Malloc(DEV_BSIZE); - for (d = overlap_disks; d != NULL; d = d->overlap) { - mdname_t *rsp; - uint_t sliceno; - int fd = -1; - - /* - * If the disk isn't available (i.e. powered off or dead) - * we can't read the master block timestamp and thus - * cannot make a recommendation as to which set it belongs to. - */ - if (d->mid_available != MD_IM_DISK_AVAILABLE) { - (void) fprintf(stdout, " %s ", d->mid_dnp->cname); - (void) fprintf(stdout, - gettext(" - no recommendation can " - "be made because disk is unavailable\n")); - continue; - } - - if (meta_replicaslice(d->mid_dnp, &sliceno, ep) != 0) - continue; - - if (d->mid_dnp->vtoc.parts[sliceno].size == 0) - continue; - - if ((rsp = metaslicename(d->mid_dnp, sliceno, ep)) == NULL) - continue; - if ((fd = open(rsp->rname, O_RDONLY| O_NDELAY)) < 0) - continue; - if (read_master_block(ep, fd, mbp, DEV_BSIZE) <= 0) { - (void) close(fd); - mdclrerror(ep); - continue; - } - (void) close(fd); - (void) fprintf(stdout, " %s ", d->mid_dnp->cname); - (void) fprintf(stdout, "%s: %s\n", - gettext(" - must import with set " - "created at "), meta_print_time((md_timeval32_t *) - (&(mbp->mb_setcreatetime)))); - } - Free(mbp); -} - -/* - * is_first_disk is called to determine if the disk passed to it is - * eligible to be used as the "first disk time" in the set. It checks to - * see if the disk is available, on the skip list or not (thus already in - * an importable set) or being used by the system already. - * RETURN: - * 1 The time can be used as the first disk time - * 0 The time should not be used. - */ -static int -is_first_disk( -md_im_drive_info_t *d, -mddrivenamelist_t **skiph) -{ - mddrivenamelist_t *slp; - md_error_t status = mdnullerror; - md_error_t *ep = &status; - mdsetname_t *sp = metasetname(MD_LOCAL_NAME, ep); - - /* - * If a disk is not available there is no - * set creation timestamp available. - */ - if (d->mid_available == MD_IM_DISK_AVAILABLE) { - /* - * We also need to make sure this disk isn't already on - * the skip list. - */ - for (slp = *skiph; slp != NULL; slp = slp->next) { - if (d->mid_dnp == slp->drivenamep) - return (0); - } - /* - * And we need to make sure the drive isn't - * currently being used for something else - * like a mounted file system or a current - * metadevice or in a set. - */ - if (meta_imp_drvused(sp, d->mid_dnp, ep)) { - return (0); - } - } else { - return (0); - } - return (1); -} - -/* - * Input a list of disks (dnlp), find the sets that are importable, create - * a list of these sets (mispp), and a list of the disks within each of these - * sets (midp). These lists (mispp and midp) will be used by metaimport. - */ -static int process_disks( - mddrivenamelist_t *dnlp, - mddrivenamelist_t **skipt, - md_im_set_desc_t **mispp, - int flags, - int *set_count, - int overlap, - md_error_t *ep -) -{ - mddrivenamelist_t *dp; - int rscount = 0; - int hasreplica; - md_im_set_desc_t *p; - md_im_drive_info_t *d; - mddrivenamelist_t **skiph = skipt; - - /* Scan qualified disks */ - for (dp = dnlp; dp != NULL; dp = dp->next) { - mddrivenamelist_t *slp; - - /* is the current drive on the skip list? */ - for (slp = *skiph; slp != NULL; slp = slp->next) { - if (dp->drivenamep == slp->drivenamep) - break; - } - /* drive on the skip list ? */ - if (slp != NULL) - continue; - - /* - * In addition to updating the misp list, either verbose or - * standard output will be generated. - * - */ - hasreplica = meta_get_and_report_set_info(dp, mispp, 0, - flags, set_count, overlap, overlap_disks, ep); - - if (hasreplica < 0) { - mde_perror(ep, ""); - mdclrerror(ep); - } else { - - rscount += hasreplica; - - /* Eliminate duplicate reporting */ - if (hasreplica > 0) { - md_timeval32_t firstdisktime; - - /* - * Go to the tail for the current set - */ - for (p = *mispp; p->mis_next != NULL; - p = p->mis_next) - ; - - /* - * Now look for the set creation timestamp. - * If a disk is not available there is no - * set creation timestamp available so look - * for the first available disk to grab this - * information from. We also need to make - * sure this disk isn't already on the skip - * list. If so go to the next available drive. - * And we need to make sure the drive isn't - * currently being used for something else - * like a mounted file system or a current - * metadevice or in a set. - */ - for (d = p->mis_drives; d != NULL; - d = d->mid_next) { - if (is_first_disk(d, skiph)) { - firstdisktime = - d->mid_setcreatetimestamp; - break; - } - } - for (d = p->mis_drives; d != NULL; - d = d->mid_next) { - /* - * if the mb_setcreatetime for a disk - * is not the same as the first disk - * in the set, don't put it on the - * skip list. This disk probably - * doesn't really belong in this set - * and we'll want to look at it again - * to figure out where it does belong. - * If the disk isn't available, there's - * really no point in looking at it - * again so put it on the skip list. - */ - if (d->mid_available == - MD_IM_DISK_AVAILABLE) { - if ((d->mid_setcreatetimestamp. - tv_sec != firstdisktime. - tv_sec) || - (d->mid_setcreatetimestamp. - tv_usec != - firstdisktime.tv_usec)) - continue; - } - skipt = - meta_drivenamelist_append_wrapper( - skipt, d->mid_dnp); - } - } - } - } - return (rscount); -} - -int -main(int argc, char *argv[]) -{ - char c; - md_error_t status = mdnullerror; - md_error_t *ep = &status; - mdsetname_t *sp = NULL; - char *setname_new = NULL; - int report_only = 0; - int version = 0; - bool_t dry_run = 0; - md_im_names_t cnames = { 0, NULL }; - int err_on_prune = 0; - mddrivenamelist_t *dnlp = NULL; - mddrivenamelist_t *dp; - mddrivenamelist_t *skiph = NULL; - int rscount = 0; - md_im_set_desc_t *pass1_misp = NULL; - md_im_set_desc_t *misp = NULL; - md_im_set_desc_t **pass1_mispp = &pass1_misp; - md_im_set_desc_t **mispp = &misp; - mhd_mhiargs_t mhiargs = defmhiargs; - int have_multiple_sets = 0; - int force = 0; - int overlap = 0; - uint_t imp_flags = 0; - int set_count = 0; - int no_quorum = 0; - - /* - * Get the locale set up before calling any other routines - * with messages to output. Just in case we're not in a build - * environment, make sure that TEXT_DOMAIN gets set to - * something. - */ -#if !defined(TEXT_DOMAIN) -#define TEXT_DOMAIN "SYS_TEST" -#endif - (void) setlocale(LC_ALL, ""); - (void) textdomain(TEXT_DOMAIN); - - /* - * Check to see if the libsds_sc.so is bound on the - * current system. If it is, it means the system is - * part of a cluster. - * - * The import operation is currently not supported - * in a SunCluster environment. - */ - if (sdssc_bind_library() != SDSSC_NOT_BOUND) { - (void) printf(gettext( - "%s: Import operation not supported under SunCluster\n"), - argv[0]); - exit(0); - } - - /* initialize */ - if (md_init(argc, argv, 0, 1, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - optind = 1; - opterr = 1; - - while ((c = getopt(argc, argv, "frns:vV?")) != -1) { - switch (c) { - - case 'f': - force = 1; - break; - - case 'n': - dry_run = 1; - break; - - case 'r': - report_only = 1; - imp_flags |= META_IMP_REPORT; - break; - - case 's': - setname_new = optarg; - break; - - case 'v': - imp_flags |= META_IMP_VERBOSE; - break; - - case 'V': - version = 1; - break; - - case '?': - default: - usage(sp, NULL); - break; - } - } - - if (version == 1) - print_version(sp); - - /* Detect conflicting options */ - if ((dry_run != 0) && (report_only != 0)) - usage(sp, gettext("The -n and -r options conflict.")); - - if ((report_only != 0) && (setname_new != NULL)) - usage(sp, gettext("The -r and -s options conflict.")); - - if ((report_only == 0) && (setname_new == NULL)) - usage(sp, gettext("You must specify either -r or -s.")); - - /* Don't do any real work if we don't have root privilege */ - if (meta_check_root(ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - if (meta_setup_db_locations(ep) != 0) { - mde_perror(ep, ""); - if (mdismddberror(ep, MDE_DB_STALE)) - md_exit(sp, 66); - if (! mdiserror(ep, MDE_MDDB_CKSUM)) - md_exit(sp, 1); - } - - /* - * Read remaining arguments into drive name list, otherwise - * call routine to list all drives in system. - */ - if (argc > optind) { - int i; - - /* For user specified disks, they MUST not be in use */ - err_on_prune = 1; - - /* All remaining args should be disks */ - cnames.min_count = argc - optind; - cnames.min_names = Malloc(cnames.min_count * sizeof (char *)); - - for (i = 0; i < cnames.min_count; i++, optind++) { - mddrivename_t *dnp; - dnp = metadrivename(&sp, argv[optind], ep); - if (dnp == NULL) { - mde_perror(ep, ""); - md_exit(sp, 1); - } else { - cnames.min_names[i] = dnp->rname; - } - } - } else { - if (meta_list_disks(ep, &cnames) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - } - - /* - * If the user specified disks on the command line, min_count will be - * greater than zero. If they didn't, it should be safe to assume that - * the system in question has at least one drive detected by the - * snapshot code, or we would have barfed earlier initializing the - * metadb. - */ - assert(cnames.min_count > 0); - - /* - * Prune the list: - * - get rid of drives in current svm configuration - * - get rid of mounted drives - * - get rid of swap drives - * - get rid of drives in other sets - * - * If drives were specified on the command line, it should be - * an error to find in-use disks in the list. (err_on_prune) - * - * On return from meta_prune_cnames call, dnlp - * will have candidate for replica scan. - */ - dnlp = meta_prune_cnames(ep, &cnames, err_on_prune); - - /* - * Doctor the drive string in the error structure to list all of the - * unused disks, rather than just one. The output will be done in the - * following !mdisok() block. - */ - if (mdisdserror(ep, MDE_DS_DRIVEINUSE)) { - md_ds_error_t *ip = - &ep->info.md_error_info_t_u.ds_error; - char *dlist; - int sizecnt = 0; - - /* add 1 for null terminator */ - sizecnt += strlen(ip->drive) + 1; - for (dp = dnlp->next; dp != NULL; dp = dp->next) { - sizecnt += 2; /* for the ", " */ - sizecnt += strlen(dp->drivenamep->cname); - } - - dlist = Malloc(sizecnt); - - (void) strlcpy(dlist, ip->drive, sizecnt); - - Free(ip->drive); - for (dp = dnlp->next; dp != NULL; dp = dp->next) { - (void) strlcat(dlist, ", ", sizecnt); - (void) strlcat(dlist, dp->drivenamep->cname, sizecnt); - } - - ip->drive = dlist; - } - - /* Don't continue if we're already hosed */ - if (!mdisok(ep)) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - /* ...or if there's nothing to scan */ - if (dnlp == NULL) { - md_eprintf("%s\n", gettext("no unused disks detected")); - md_exit(sp, 0); - } - - /* - * META_IMP_PASS1 means gather the info, but don't report. - */ - (void) process_disks(dnlp, &skiph, pass1_mispp, - imp_flags | META_IMP_PASS1, &set_count, overlap, ep); - - overlap_disks = NULL; - overlap = set_disk_overlap(pass1_misp); - skiph = NULL; - - /* - * This time call without META_IMP_PASS1 set and we gather - * and report the information. - * We need to do this twice because of the overlap detection. - * The first pass generates a list of disks to detect overlap on. - * We then do a second pass using that overlap list to generate - * the report. - */ - rscount = process_disks(dnlp, &skiph, mispp, imp_flags, &set_count, - overlap, ep); - - /* - * Now have entire list of disks associated with diskset including - * disks listed in mddb locator blocks and namespace. Before importing - * diskset need to recheck that none of these disks is already in use. - * If a disk is found that is already in use, print error and exit. - */ - if (!report_only) { - md_im_set_desc_t *p; - md_im_drive_info_t *d; - mddrivename_t *dnp; - - if (sp == NULL) { - /* Get sp for local set */ - if ((sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) { - mde_perror(ep, ""); - meta_free_im_set_desc(misp); - md_exit(sp, 1); - } - } - - for (p = misp; p != NULL; p = p->mis_next) { - for (d = p->mis_drives; d != NULL; d = d->mid_next) { - dnp = d->mid_dnp; - if (d->mid_available == MD_IM_DISK_AVAILABLE) { - if (meta_imp_drvused(sp, dnp, ep)) { - (void) mddserror(ep, - MDE_DS_DRIVEINUSE, 0, NULL, - dnp->cname, NULL); - mde_perror(ep, ""); - meta_free_im_set_desc(misp); - md_exit(sp, 1); - } - } else { - /* - * If drive is unavailable, then check - * that this drive hasn't already been - * imported as part of another partial - * diskset. Check by devid instead of - * cname since the unavailable drive - * would have the cname from its - * previous system and this may collide - * with a valid cname on this system. - * Fail if devid is found in another - * set or if the routine fails. - */ - mdsetname_t *tmp_sp = NULL; - - if ((meta_is_devid_in_anyset( - d->mid_devid, &tmp_sp, ep) == -1) || - (tmp_sp != NULL)) { - (void) mddserror(ep, - MDE_DS_DRIVEINUSE, 0, NULL, - dnp->cname, NULL); - mde_perror(ep, ""); - meta_free_im_set_desc(misp); - md_exit(sp, 1); - } - } - } - } - } - - /* - * If there are no unconfigured sets, then our work here is done. - * Hopefully this is friendlier than just not printing anything at all. - */ - if (rscount == 0) { - /* - * If we've found partial disksets but no complete disksets, - * we don't want this to print. - */ - if (!misp) { - md_eprintf("%s\n", gettext("no unconfigured sets " - "detected")); - meta_free_im_set_desc(misp); - md_exit(sp, 1); - } - md_exit(sp, 0); - } - - /* - * We'll need this info for both the report content and the import - * decision. By the time we're here, misp should NOT be NULL (or we - * would have exited in the rscount == 0 test above). - */ - assert(misp != NULL); - if (misp->mis_next != NULL) { - have_multiple_sets = 1; - } - /* - * Generate the appropriate (verbose or not) report for all sets - * detected. If we're planning on importing later, only include the - * "suggested import" command if multiple sets were detected. (That - * way, when we error out later, we have still provided useful - * information.) - */ - - /* - * Now we should have all the unconfigured sets detected - * check for the overlapping - */ - if (have_multiple_sets) { - /* Printing out how many candidate disksets we found. */ - if (imp_flags & META_IMP_REPORT) { - (void) printf("%s: %i\n\n", - gettext("Number of disksets eligible for import"), - set_count); - } - } - if (overlap) { - report_overlap_recommendation(); - } - - if (have_multiple_sets && !report_only) { - md_eprintf("%s\n\n", gettext("multiple unconfigured " - "sets detected.\nRerun the command with the " - "suggested options for the desired set.")); - } - - - /* - * If it's a report-only request, we're done. If it's an import - * request, make sure that we only have one entry in the set list. - */ - - if (report_only) { - meta_free_im_set_desc(misp); - md_exit(sp, 0); - } else if (have_multiple_sets) { - meta_free_im_set_desc(misp); - md_exit(sp, 1); - } else if (overlap) { - md_im_drive_info_t *d; - /* - * The only way we can get here is if we're doing an import - * request on a set that contains at least one disk with - * a time conflict. We are prohibiting the importation of - * this type of set until the offending disk(s) are turned - * off to prevent data corruption. - */ - (void) printf(gettext("To import this set, ")); - for (d = pass1_misp->mis_drives; - d != NULL; - d = d->mid_next) { - if (d->overlapped_disk) - (void) printf("%s ", d->mid_dnp->cname); - } - (void) printf(gettext("must be removed from the system\n")); - meta_free_im_set_desc(misp); - md_exit(sp, 1); - } - - if (setname_new == NULL) { - usage(sp, gettext("You must specify a new set name.")); - } - - /* - * The user must specify the -f (force) flag if the following - * conditions exist: - * - partial diskset - * - stale diskset - */ - if (meta_replica_quorum(misp) != 0) - no_quorum = 1; - if (misp->mis_partial || no_quorum) { - if (!force) - usage(sp, gettext("You must specify the force flag")); - } - (void) meta_imp_set(misp, setname_new, force, dry_run, ep); - if (dry_run) { - meta_free_im_set_desc(misp); - md_exit(sp, 0); - } - - if (!mdisok(ep)) { - meta_free_im_set_desc(misp); - mde_perror(ep, ""); - md_exit(sp, 1); - } - - if ((sp = metasetname(setname_new, ep)) == NULL) { - meta_free_im_set_desc(misp); - mde_perror(ep, ""); - md_exit(sp, 1); - } - - if (meta_lock_nowait(sp, ep) != 0) { - meta_free_im_set_desc(misp); - mde_perror(ep, ""); - md_exit(sp, 10); /* special errcode */ - } - - if (meta_set_take(sp, &mhiargs, (misp->mis_partial | TAKE_IMP), - 0, &status)) { - meta_free_im_set_desc(misp); - mde_perror(&status, ""); - md_exit(sp, 1); - } - - meta_free_im_set_desc(misp); - md_exit(sp, 0); - /*NOTREACHED*/ - return (0); -} diff --git a/usr/src/cmd/lvm/util/metainit.c b/usr/src/cmd/lvm/util/metainit.c deleted file mode 100644 index 63a4286494ca..000000000000 --- a/usr/src/cmd/lvm/util/metainit.c +++ /dev/null @@ -1,900 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -/* - * initialize metadevices - */ - -#include - -#include -#include -#include -#include "meta_set_prv.h" - -/* - * try to initialize devices - */ -#define DO_AGAIN 0 -#define DONT_DO 1 -#define IS_DONE 2 - -/* - * mn_send_command - * - * generate a command of the form "metainit -s setname [-n] [-f] ....." - * - * If -n option is *not* set, send the metainit command *with -n set* to - * all nodes first. Do this with MD_MSGF_STOP_ON_ERROR set. - * That means if it fails on one node, it'll return immediately, - * reporting the error. - * By doing so, we have a dryrun first that has to succeed on every node - * before we start the command for real. - * This saves us from backing out a metainit command that succeeded on - * some nodes but failed on one. - */ -static int -mn_send_command( - mdsetname_t **spp, - int argc, - char **argv, - mdcmdopts_t options, - int flags, - char *context, - md_error_t *ep -) -{ - int newargc; - char **newargv; - int i; - int ret; - int dryrun_only = 0; - - - newargv = calloc(argc+5, sizeof (char *)); - newargv[0] = "metainit"; - newargv[1] = "-s"; - newargv[2] = (*spp)->setname; - newargv[3] = "-n"; /* always do "-n" first */ - newargc = 4; - if ((options & MDCMD_DOIT) == 0) { - dryrun_only = 1; - } - if ((options & MDCMD_FORCE) != 0) { - newargv[newargc] = "-f"; - newargc++; - } - for (i = 0; i < argc; i++, newargc++) - newargv[newargc] = argv[i]; - ret = meta_mn_send_command(*spp, newargc, newargv, - flags | MD_DRYRUN | MD_NOLOG, context, ep); - - if ((dryrun_only == 0) && (ret == 0)) { - /* - * Do it for real now. Remove "-n" from the arguments and - * MD_DRYRUN from the flags. If we fail this time the master - * must panic as the mddbs may be inconsistent. - */ - newargv[3] = ""; /* this was "-n" before */ - ret = meta_mn_send_command(*spp, newargc, newargv, - flags | MD_RETRY_BUSY | MD_PANIC_WHEN_INCONSISTENT, - context, ep); - } - - free(newargv); - return (ret); -} - -static int -init_entries( - mdsetname_t **spp, - md_tab_t *tabp, - mdcmdopts_t options, - uint_t flags, - bool_t called_thru_rpc, - md_error_t *ep -) -{ - uint_t cnt = 0; - uint_t line; - int rval = 0; - int ret; - - /* for all matching entries, which haven't already been done */ - for (line = 0; (line < tabp->nlines); ++line) { - md_tab_line_t *linep = &tabp->lines[line]; - char *uname = linep->argv[0]; - - /* see if already done */ - if (linep->flags != DO_AGAIN) - continue; - - /* clear the metadev/hsp caches between inits */ - metaflushmetanames(); - - /* try it */ - if ((called_thru_rpc == FALSE) && - meta_is_mn_name(spp, uname, ep)) { - /* - * MN set, send command to all nodes - * Note that is sp is NULL, meta_is_mn_name() derives - * sp from linep->argv which is the metadevice arg - */ - ret = mn_send_command(spp, linep->argc, linep->argv, - options, flags, linep->context, ep); - } else { - char *cname = NULL; - - cname = meta_name_getname(spp, uname, META_DEVICE, ep); - if (cname == NULL) { - mde_perror(ep, ""); - mdclrerror(ep); - } else { - - ret = meta_init_name(spp, linep->argc, - linep->argv, cname, options, ep); - Free(cname); - - if (ret != 0) { - if (!(flags & MD_IGNORE_STDERR)) { - mderrorextra(ep, linep->context); - mde_perror(ep, ""); - rval = -1; - } - mdclrerror(ep); - } - } - } - if (ret == 0) { - linep->flags = IS_DONE; - ++cnt; - } - } - - /* return success */ - if (rval != 0) - return (rval); - return (cnt); -} - -/* - * initialize all devices in set - */ -static int -init_all( - mdsetname_t **spp, - mdcmdopts_t options, - bool_t called_thru_rpc, - md_error_t *ep -) -{ - md_tab_t *tabp = NULL; - size_t setlen; - uint_t more; - int done; - int eval = -1; - - /* - * Only take the lock if this is not a MN set - * We can only enter this code for a MN set if we are the initiator - * and in this case, we don't want to take locks. - */ - if (meta_is_mn_set((*spp), ep) == 0) { - /* grab set lock */ - if (meta_lock(*spp, TRUE, ep)) { - mde_perror(ep, ""); - mdclrerror(ep); - return (eval); - } - - /* check for ownership */ - if (meta_check_ownership(*spp, ep) != 0) { - mde_perror(ep, ""); - mdclrerror(ep); - return (eval); - } - - /* lock is held across init_entries */ - options |= MDCMD_NOLOCK; - } - - /* get md.tab, preen entries */ - if ((tabp = meta_tab_parse(NULL, ep)) == NULL) { - mde_perror(ep, ""); - mdclrerror(ep); - return (eval); - } - - setlen = strlen((*spp)->setname); - for (more = 0; (more < tabp->nlines); ++more) { - md_tab_line_t *linep = &tabp->lines[more]; - char *cname = linep->cname; - char *p; - size_t len; - - /* better have args */ - assert((linep->argc > 0) && (linep->argv[0] != NULL)); - - /* only do metadevices and hotspare pools in set */ - if (linep->type & TAB_MD_HSP) { - if ((p = strrchr(cname, '/')) == NULL) { - len = 0; - } else { - len = p - cname; - } - if ((len == setlen) && - (strncmp(cname, (*spp)->setname, len) == 0)) { - linep->flags = DO_AGAIN; - } else { - linep->flags = DONT_DO; - } - - } else { - linep->flags = DONT_DO; - } - } - - eval = 1; - - /* while more devices get made */ - do { - done = init_entries(spp, tabp, options, - MD_IGNORE_STDERR|MD_RETRY_BUSY, called_thru_rpc, ep); - } while (done > 0); - - /* now do it and report errors */ - if (init_entries(spp, tabp, options, MD_RETRY_BUSY, - called_thru_rpc, ep) >= 0) - eval = 0; /* success */ - mdclrerror(ep); - - /* cleanup, return success */ -out: - meta_tab_free(tabp); - return (eval); -} - -/* - * initialize named device or hotspare pool - */ -static int -init_name( - mdsetname_t **spp, - int argc, - char *argv[], - mdcmdopts_t options, - int called_thru_rpc, - md_error_t *ep -) -{ - md_tab_t *tabp = NULL; - md_tab_line_t *linep = NULL; - int rval = -1; - int ret; - char *uname = argv[0]; - - /* look in md.tab */ - if (argc == 1) { - /* get md.tab entries */ - if ((tabp = meta_tab_parse(NULL, ep)) == NULL) { - if (! mdissyserror(ep, ENOENT)) - return (-1); - } - - /* look in md.tab */ - if ((linep = meta_tab_find(*spp, tabp, uname, TAB_MD_HSP)) - != NULL) { - argc = linep->argc; - argv = linep->argv; - } - } - - if ((called_thru_rpc == FALSE) && - meta_is_mn_name(spp, uname, ep)) { - /* - * MN set, send command to all nodes - */ - ret = mn_send_command(spp, argc, argv, options, - MD_DISP_STDERR, NO_CONTEXT_STRING, ep); - } else { - char *cname = NULL; - - cname = meta_name_getname(spp, uname, META_DEVICE, ep); - if (cname == NULL) { - goto out; - } - - /* check for ownership */ - if (meta_check_ownership(*spp, ep) != 0) { - Free(cname); - goto out; - } - - ret = meta_init_name(spp, argc, argv, cname, options, ep); - Free(cname); - } - - if (ret != 0) { - if (linep != NULL) - mderrorextra(ep, linep->context); - goto out; - } - rval = 0; /* success */ - - /* cleanup, return error */ -out: - if (tabp != NULL) - meta_tab_free(tabp); - return (rval); -} - -/* - * print usage message - */ -static void -usage( - mdsetname_t *sp, - int eval -) -{ -#ifndef lint - (void) fprintf(stderr, gettext("\ -usage: %s [-s setname] [-n] [-f] concat/stripe numstripes\n\ - width component... [-i interlace]\n\ - [width component... [-i interlace]] [-h hotspare_pool]\n\ - %s [-s setname] [-n] [-f] mirror -m submirror...\n\ - [read_options] [write_options] [pass_num]\n\ - %s [-s setname] [-n] [-f] RAID -r component...\n\ - [-i interlace] [-h hotspare_pool]\n\ - [-k] [-o original_column_count]\n\ - %s [-s setname] [-n] [-f] hotspare_pool [hotspare...]\n\ - %s [-s setname] [-n] [-f] softpart -p [-A alignment]\n\ - [-e] device size|all\n\ - %s [-s setname] [-n] [-f] md.tab_entry\n\ - %s [-s setname] [-n] [-f] -a\n\ - %s -r\n"), myname, myname, myname, myname, myname, myname, myname, - myname); -#endif /* ! lint */ - md_exit(sp, eval); -} - -/* - * If we fail during the attempt to take the auto-take disksets - * we need to tell the kernel to cleanup the in-core set struct - * so that we have a chance to take the set again later. - */ -static void -auto_take_cleanup(mdsetname_t *sp, side_t sideno) -{ - mddb_config_t c; - - (void) memset(&c, 0, sizeof (c)); - c.c_setno = sp->setno; - c.c_sideno = sideno; - - if (metaioctl(MD_RELEASE_SET, &c, &c.c_mde, NULL) != 0) { - mde_perror(&c.c_mde, "auto_take_cleanup"); - return; - } -} - -/* - * Take the diskset. - * - * This is a clean auto-take set, so do the work to take it. - * This is a streamlined version of the code in meta_set_take. We avoid the - * need for talking to the rpc.metad since that can't run this early during the - * boot. We don't need to talk to the metad for this diskset since we're the - * only host in the set. - */ -static void -take_set(md_set_record *sr) -{ - mdsetname_t sn; - md_drive_desc *dd; - md_error_t error = mdnullerror; - md_replicalist_t *rlp = NULL; - md_replicalist_t *rl; - daddr_t nblks = 0; - md_drive_record *dr; - side_t sideno; - - /* - * Several of the functions we call take a sp param so - * construct one from the set record. - */ - sn.setname = sr->sr_setname; - sn.setno = sr->sr_setno; - sn.setdesc = sr2setdesc(sr); - sn.lockfd = MD_NO_LOCK; - - if (sr->sr_flags & MD_SR_MB_DEVID) - dd = metaget_drivedesc(&sn, MD_BASICNAME_OK | PRINT_FAST, - &error); - else - dd = metaget_drivedesc(&sn, MD_BASICNAME_OK, &error); - - if (dd == NULL) { - mde_perror(&error, ""); - mdclrerror(&error); - return; - } - - /* - * Skip call to tk_own_bydd. This talks to rpc.metamhd (which we can't - * do yet) and is not needed for auto-take disksets since we are not - * doing SCSI reservations on these drives. - */ - - if (setup_db_bydd(&sn, dd, 0, &error) != 0) { - if (! mdismddberror(&error, MDE_DB_ACCOK) && - ! mdismddberror(&error, MDE_DB_TAGDATA)) { - /* - * Skip call to rel_own_bydd since that really just - * calls rpc.metamhd which we don't need to do, - * so there really isn't anything to rollback here. - */ - mde_perror(&error, ""); - mdclrerror(&error); - return; - } - mdclrerror(&error); - } - - if ((sideno = getmyside(&sn, &error)) == MD_SIDEWILD) { - mde_perror(&error, ""); - return; - } - - if (snarf_set(&sn, FALSE, &error) != 0) { - if (mdismddberror(&error, MDE_DB_STALE) || - mdismddberror(&error, MDE_DB_TAGDATA) || - ! mdismddberror(&error, MDE_DB_NODB) && - ! mdismddberror(&error, MDE_DB_NOTOWNER)) { - /* - * rollback - * Normally MDE_DB_STALE or MDE_DB_TAGDATA - * would still keep the set but in this case we don't - * want to do that. This will probably result in the - * boot going in to single-user since we won't have the - * set so any attempted mounts using the set's metadevices - * will fail. However, that is a "good thing" so the - * sysadmin can fix the set. Normally they would see - * all of these problems when they ran the take and be - * able to immediately fix the problem. - */ - mde_perror(&error, ""); - auto_take_cleanup(&sn, sideno); - return; - } - } - - /* - * Call metareplicalist and upd_dr_dbinfo. - * Most of that code is only needed to synchronize amongst the multiple - * hosts in a set, which is not applicable in our case. But we do a - * subset here to handle the case when the user had been - * adding/deleting/balancing mddbs when this node panic'd. We are - * synchronizing the ondisk mddbs to the list of drive records stored - * in the local mddb. - */ - if (metareplicalist(&sn, (MD_BASICNAME_OK | PRINT_FAST), &rlp, &error) - < 0) { - /* rollback */ - mde_perror(&error, ""); - auto_take_cleanup(&sn, sideno); - return; - } - - /* - * The following code is equivalent to upd_dr_dbinfo for syncronizing - * the local host only. That function is normally run through the - * metad with a local and daemon side but we'll do all of the work - * here. - */ - - /* find the smallest existing replica */ - for (rl = rlp; rl != NULL; rl = rl->rl_next) { - md_replica_t *r; - - r = rl->rl_repp; - nblks = ((nblks == 0) ? r->r_nblk : min(r->r_nblk, nblks)); - } - - if (nblks <= 0) - nblks = MD_DBSIZE; - - for (dr = sr->sr_drivechain; dr; dr = dr->dr_next) { - int dbcnt; - mddrivename_t *dnp; - md_replicalist_t *rl; - - /* - * The cname style for dnp and replica list will be same since - * both use the the same flags MD_BASICNAME_OK|PRINT_FAST which - * will always provide the cached value. - */ - if ((dnp = metadrivename_withdrkey(&sn, sideno, dr->dr_key, - MD_BASICNAME_OK | PRINT_FAST, &error)) == NULL) { - mde_perror(&error, ""); - metafreereplicalist(rlp); - auto_take_cleanup(&sn, sideno); - return; - } - - dbcnt = 0; - /* see how many replicas are on this drive */ - for (rl = rlp; rl != NULL; rl = rl->rl_next) { - if (strcmp(rl->rl_repp->r_namep->drivenamep->cname, dnp->cname) - == 0) - dbcnt++; - } - - /* Adjust the fields in the copy */ - dr->dr_dbcnt = dbcnt; - dr->dr_dbsize = dbcnt > 0 ? nblks : 0; - } - - /* - * If the set doesn't have the MD_SR_MB_DEVID bit set, i.e - * the drives in the set don't have the device id information, - * then stick it in if possible. - * - * If updating the master block fails for whatever reason, it's - * okay. It just means the disk(s) in the diskset won't be self - * identifying. - */ - if (!(sr->sr_flags & MD_SR_MB_DEVID)) { - if (meta_update_mb(&sn, dd, &error) == 0) { - sr->sr_flags |= MD_SR_MB_DEVID; - mdclrerror(&error); - } - } - - commitset(sr, FALSE, &error); - - metafreereplicalist(rlp); - - /* - * This finishes up the logical equivalent of meta_set_take. - */ - if (meta_resync_all(&sn, MD_DEF_RESYNC_BUF_SIZE, &error) != 0) { - mde_perror(&error, ""); - mdclrerror(&error); - } -} - -/* - * Take the disksets that are marked to be taken at boot time. - */ -static void -auto_take_sets() -{ - int max_sets; - int i; - md_error_t error = mdnullerror; - char *hostname; - - if ((max_sets = get_max_sets(&error)) == 0) - return; - - if (!mdisok(&error)) { - mde_perror(&error, ""); - return; - } - - /* set up so auto-take errors also go to syslog */ - openlog("metainit", LOG_ODELAY, LOG_USER); - metasyslog = 1; - - hostname = mynode(); - - /* - * For each possible set number (skip set 0 which is the unnamed local - * set), see if we really have a diskset. If so, check if auto-take - * is enabled. - * - * In order to take the set it must have drives and it must not be - * stuck in mid-add. The sr_validate routine within rpc.metad will - * delete sets that are in mid-add when it runs. - */ - for (i = 1; i < max_sets; i++) { - md_set_record *sr; - - if ((sr = metad_getsetbynum(i, &error)) == NULL) { - mdclrerror(&error); - continue; - } - - if (sr->sr_flags & MD_SR_AUTO_TAKE && !(sr->sr_flags & MD_SR_ADD)) { - int j; - int cnt = 0; - int host_mismatch = 0; - int take = 0; - md_drive_record *dr; - - /* check for host renames or multiple hosts in set */ - for (j = 0; j < MD_MAXSIDES; j++) { - /* Skip empty slots */ - if (sr->sr_nodes[j][0] == '\0') - continue; - - cnt++; - if (strcmp(sr->sr_nodes[j], hostname) != 0) - host_mismatch = 1; - } - - /* paranoid check that we're the only host in the set */ - if (cnt > 1) { - md_eprintf(gettext( - "diskset %s: auto-take enabled and multiple hosts in set\n"), - sr->sr_setname); - continue; - } - - if (host_mismatch) { - /* The host was renamed, repair the set. */ - for (j = 0; j < MD_MAXSIDES; j++) { - /* Skip empty slots */ - if (sr->sr_nodes[j][0] == '\0') - continue; - - (void) strncpy(sr->sr_nodes[j], hostname, - sizeof (sr->sr_nodes[j])); - commitset(sr, FALSE, &error); - if (!mdisok(&error)) { - mde_perror(&error, ""); - mdclrerror(&error); - } else { - md_eprintf(gettext( - "new hostname %s, update auto-take diskset %s\n"), - hostname, sr->sr_setname); - } - break; - } - } - - /* set must have at least one drive to be taken */ - for (dr = sr->sr_drivechain; dr != NULL; dr = dr->dr_next) { - /* ignore drives in mid-add */ - if (!(dr->dr_flags & MD_DR_ADD)) { - take = 1; - break; - } - } - - if (take) - take_set(sr); - else - md_eprintf(gettext( - "diskset %s: auto-take enabled but set has no drives\n"), - sr->sr_setname); - } - } -} - -/* - * mainline. crack command line arguments. - */ -int -main( - int argc, - char *argv[] -) -{ - char *sname = MD_LOCAL_NAME; - mdsetname_t *sp = NULL; - enum action { - NONE, - INIT, - ALL - } todo = NONE; - mdcmdopts_t options = (MDCMD_DOIT | MDCMD_PRINT); - int c; - md_error_t status = mdnullerror; - md_error_t *ep = &status; - - md_error_t dummystatus = mdnullerror; - md_error_t *dummyep = &dummystatus; - int eval = 1; - int error; - bool_t called_thru_rpc = FALSE; - char *cp; - pid_t pid; - - /* - * Get the locale set up before calling any other routines - * with messages to ouput. Just in case we're not in a build - * environment, make sure that TEXT_DOMAIN gets set to - * something. - */ -#if !defined(TEXT_DOMAIN) -#define TEXT_DOMAIN "SYS_TEST" -#endif - (void) setlocale(LC_ALL, ""); - (void) textdomain(TEXT_DOMAIN); - if ((cp = strstr(argv[0], ".rpc_call")) != NULL) { - *cp = '\0'; /* cut off ".rpc_call" */ - called_thru_rpc = TRUE; - } else { - if (sdssc_bind_library() == SDSSC_OKAY) - if (sdssc_cmd_proxy(argc, argv, SDSSC_PROXY_PRIMARY, - &error) == SDSSC_PROXY_DONE) - exit(error); - } - - /* initialize */ - if (md_init(argc, argv, 0, 1, ep) != 0 || - meta_check_root(ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - /* parse args */ - optind = 1; - opterr = 1; - while ((c = getopt(argc, argv, "afhnrs:?")) != -1) { - switch (c) { - - /* help */ - case 'h': - usage(sp, 0); - break; - - /* set name */ - case 's': - sname = optarg; - break; - - /* all devices in md.tab */ - case 'a': - if (todo != NONE) - usage(sp, 1); - todo = ALL; - options |= MDCMD_ALLOPTION; - break; - /* check for validity, but don't really init */ - case 'n': - options &= ~MDCMD_DOIT; - break; - - /* for recovery */ - case 'r': - if (todo != NONE) - usage(sp, 1); - todo = INIT; - break; - - /* mounted and swapped components are OK */ - case 'f': - options |= MDCMD_FORCE; - break; - - case '?': - if (optopt == '?') - usage(sp, 0); - /*FALLTHROUGH*/ - default: - usage(sp, 1); - break; - } - } - - /* sname is MD_LOCAL_NAME if not specified on the command line */ - if ((sp = metasetname(sname, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - argc -= optind; - argv += optind; - if (todo == NONE) { - if (argc <= 0) { - usage(sp, 1); - } - } else if (argc > 0) { - usage(sp, 1); - } - - - /* setup database locations */ - if (meta_setup_db_locations(ep) != 0) { - mde_perror(ep, ""); - if (mdismddberror(ep, MDE_DB_STALE)) - md_exit(sp, 66); - if (! mdiserror(ep, MDE_MDDB_CKSUM)) /* relatively benign */ - md_exit(sp, 1); - } - if (todo == INIT) { /* load and take auto-take sets */ - auto_take_sets(); - - /* - * During the boot sequence we need to update the mediator - * records, however this depends upon the rpc.metamedd - * running. So, in order to not introduce a delay in the - * boot time, fork a new process to do this work in the - * background. - */ - pid = fork1(); - if (pid == (pid_t)-1) { - /* - * We could not fork a child process to udpate mediator - * information on this node. There is no need to panic. - * We shall simply return 1. - */ - mde_perror(ep, "Could not fork a child process to" - " update mediator record"); - md_exit(sp, 1); - } else if (pid == (pid_t)0) { - /* child */ - if (meta_mediator_info_from_file(NULL, 0, ep) == 1) { - /* - * No need to print any error messages. - * All the errors messages are printed in the - * library routine itself. - */ - md_exit(sp, 1); - } else { - md_exit(sp, 0); - } - } else { - /* Parent process */ - md_exit(sp, 0); - } - } else if (todo == ALL) { /* initialize all devices in md.tab */ - eval = init_all(&sp, options, called_thru_rpc, ep); - } else { /* initialize the named device */ - eval = 0; - if (init_name(&sp, argc, argv, options, called_thru_rpc, - ep) != 0) { - /* - * If we're dealing with MN metadevices and we are - * directly called, then the appropriate error message - * has already been displayed. So just exit. - */ - if (meta_is_mn_set(sp, dummyep) && (!called_thru_rpc)) { - md_exit(sp, 1); - } - mde_perror(ep, ""); - mdclrerror(ep); - eval = 1; - goto nomdcf; - } - } - -domdcf: - /* update md.cf, return success */ - if (meta_update_md_cf(sp, ep) != 0) { - mde_perror(ep, ""); - eval = 1; - } - -nomdcf: - md_exit(sp, eval); - /*NOTREACHED*/ - return (eval); -} diff --git a/usr/src/cmd/lvm/util/metainit.xml b/usr/src/cmd/lvm/util/metainit.xml deleted file mode 100644 index 0571b52b5bee..000000000000 --- a/usr/src/cmd/lvm/util/metainit.xml +++ /dev/null @@ -1,110 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/usr/src/cmd/lvm/util/metaoffline.c b/usr/src/cmd/lvm/util/metaoffline.c deleted file mode 100644 index 6fd85fe5b495..000000000000 --- a/usr/src/cmd/lvm/util/metaoffline.c +++ /dev/null @@ -1,190 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -/* - * offline sub-mirror - */ - -#include - -#include -/* - * print usage message - */ -static void -usage( - mdsetname_t *sp, - int eval -) -{ - (void) fprintf(stderr, gettext("\ -usage: %s [-s setname] [-f] mirror submirror\n"), - myname); - md_exit(sp, eval); -} - -/* - * Metaoffline: to offline a metadevice - */ -int -main( - int argc, - char *argv[] -) -{ - char *sname = NULL; - mdsetname_t *sp = NULL; - mdcmdopts_t options = (MDCMD_PRINT); - mdname_t *mirnp; - mdname_t *submirnp; - int c; - md_error_t status = mdnullerror; - md_error_t *ep = &status; - int error; - bool_t called_thru_rpc = FALSE; - char *cp; - int origargc = argc; - char **origargv = argv; - - /* - * Get the locale set up before calling any other routines - * with messages to ouput. Just in case we're not in a build - * environment, make sure that TEXT_DOMAIN gets set to - * something. - */ -#if !defined(TEXT_DOMAIN) -#define TEXT_DOMAIN "SYS_TEST" -#endif - (void) setlocale(LC_ALL, ""); - (void) textdomain(TEXT_DOMAIN); - - if ((cp = strstr(argv[0], ".rpc_call")) == NULL) { - if (sdssc_bind_library() == SDSSC_OKAY) - if (sdssc_cmd_proxy(argc, argv, SDSSC_PROXY_PRIMARY, - &error) == SDSSC_PROXY_DONE) - exit(error); - } else { - *cp = '\0'; /* cut off ".rpc_call" */ - called_thru_rpc = TRUE; - } - - /* initialize */ - if (md_init(argc, argv, 0, 1, ep) != 0 || - meta_check_root(ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - /* parse args */ - optind = 1; - opterr = 1; - while ((c = getopt(argc, argv, "hs:f?")) != -1) { - switch (c) { - case 'h': - usage(sp, 0); - break; - - case 's': - sname = optarg; - break; - - case 'f': - options |= MDCMD_FORCE; - break; - - case '?': - if (optopt == '?') - usage(sp, 0); - /*FALLTHROUGH*/ - default: - usage(sp, 1); - break; - } - } - argc -= optind; - argv += optind; - if (argc != 2) - usage(sp, 1); - - if (sname != NULL) { - if ((sp = metasetname(sname, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - } - - /* get names */ - if (((mirnp = metaname(&sp, argv[0], META_DEVICE, ep)) == NULL) || - ((submirnp = metaname(&sp, argv[1], META_DEVICE, ep)) == NULL)) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - assert(sp != NULL); - - if ((called_thru_rpc == FALSE) && - meta_is_mn_name(&sp, argv[0], ep)) { - /* - * If we are dealing with a MN set and we were not - * called thru an rpc call, we are just to send this - * command string to the master of the set and let it - * deal with it. - * Note that if sp is NULL, meta_is_mn_name() derives sp - * from argv[0] which is the metadevice arg - * If this fails, the master must panic as the mddb may be - * inconsistent. - */ - int result; - result = meta_mn_send_command(sp, origargc, origargv, - MD_DISP_STDERR | MD_PANIC_WHEN_INCONSISTENT, - NO_CONTEXT_STRING, ep); - md_exit(sp, result); - } - - /* grab set lock */ - if (meta_lock(sp, TRUE, ep)) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - /* check for ownership */ - if (meta_check_ownership(sp, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - /* offline submirror */ - if (meta_mirror_offline(sp, mirnp, submirnp, options, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - /* return success */ - md_exit(sp, 0); - /*NOTREACHED*/ - return (0); -} diff --git a/usr/src/cmd/lvm/util/metaonline.c b/usr/src/cmd/lvm/util/metaonline.c deleted file mode 100644 index b0cd6724173e..000000000000 --- a/usr/src/cmd/lvm/util/metaonline.c +++ /dev/null @@ -1,197 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -/* - * online sub-mirrors - */ - -#include -#include - -/* - * print usage message - */ -static void -usage( - mdsetname_t *sp, - int eval -) -{ - (void) fprintf(stderr, gettext("\ -usage: %s [-s setname] mirror submirror\n"), - myname); - md_exit(sp, eval); -} - -/* - * Metaonline: to online a metadevice - */ -int -main( - int argc, - char *argv[] -) -{ - char *sname = NULL; - mdsetname_t *sp = NULL; - mdcmdopts_t options = (MDCMD_PRINT); - mdname_t *mirnp; - mdname_t *submirnp; - int c; - md_error_t status = mdnullerror; - md_error_t *ep = &status; - int error; - bool_t called_thru_rpc = FALSE; - char *cp; - int origargc = argc; - char **origargv = argv; - - /* - * Get the locale set up before calling any other routines - * with messages to ouput. Just in case we're not in a build - * environment, make sure that TEXT_DOMAIN gets set to - * something. - */ -#if !defined(TEXT_DOMAIN) -#define TEXT_DOMAIN "SYS_TEST" -#endif - (void) setlocale(LC_ALL, ""); - (void) textdomain(TEXT_DOMAIN); - - if ((cp = strstr(argv[0], ".rpc_call")) == NULL) { - if (sdssc_bind_library() == SDSSC_OKAY) - if (sdssc_cmd_proxy(argc, argv, SDSSC_PROXY_PRIMARY, - &error) == SDSSC_PROXY_DONE) - exit(error); - } else { - *cp = '\0'; /* cut off ".rpc_call" */ - called_thru_rpc = TRUE; - } - - /* initialize */ - if (md_init(argc, argv, 0, 1, ep) != 0 || - meta_check_root(ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - /* parse args */ - optind = 1; - opterr = 1; - while ((c = getopt(argc, argv, "hs:?")) != -1) { - switch (c) { - case 'h': - usage(sp, 0); - break; - - case 's': - sname = optarg; - break; - - case '?': - if (optopt == '?') - usage(sp, 0); - /*FALLTHROUGH*/ - default: - usage(sp, 1); - break; - } - } - argc -= optind; - argv += optind; - if (argc != 2) - usage(sp, 1); - - if (sname != NULL) { - if ((sp = metasetname(sname, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - } - - /* get names */ - if (((mirnp = metaname(&sp, argv[0], META_DEVICE, ep)) == NULL) || - ((submirnp = metaname(&sp, argv[1], META_DEVICE, ep)) == NULL)) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - assert(sp != NULL); - - if ((called_thru_rpc == FALSE) && - meta_is_mn_name(&sp, argv[0], ep)) { - /* - * If we are dealing with a MN set and we were not - * called thru an rpc call, we are just to send this - * command string to the master of the set and let it - * deal with it. - * Note that if sp is NULL, meta_is_mn_name() derives sp - * from argv[0] which is the metadevice arg - * If this fails, the master must panic as the mddb may be - * inconsistent. - */ - int result; - result = meta_mn_send_command(sp, origargc, origargv, - MD_DISP_STDERR | MD_PANIC_WHEN_INCONSISTENT, - NO_CONTEXT_STRING, ep); - /* - * Unlike non-MN sets, the metaonline command does not actually - * start a resync, it simply updates the state on all of the - * nodes. Therefore, to start a resync we send a resync starting - * message for the metadevice - */ - if (result == 0) { - if ((result = meta_mn_send_resync_starting(mirnp, ep)) - != 0) - mde_perror(ep, "Unable to start resync"); - } - md_exit(sp, result); - } - - /* grab set lock */ - if (meta_lock(sp, TRUE, ep)) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - /* check for ownership */ - if (meta_check_ownership(sp, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - /* online submirror */ - if (meta_mirror_online(sp, mirnp, submirnp, options, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - /* return success */ - md_exit(sp, 0); - /*NOTREACHED*/ - return (0); -} diff --git a/usr/src/cmd/lvm/util/metaparam.c b/usr/src/cmd/lvm/util/metaparam.c deleted file mode 100644 index 26754a049f8b..000000000000 --- a/usr/src/cmd/lvm/util/metaparam.c +++ /dev/null @@ -1,479 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -/* - * change metadevice parameters - */ - -#include - -#include - -/* - * print usage message - */ -static void -usage( - mdsetname_t *sp, - int eval -) -{ - (void) fprintf(stderr, gettext("\ -usage: %s [-s setname] [options] concat/stripe | RAID\n\ - %s [-s setname] [options] mirror\n\ -\n\ -Concat/Stripe or RAID options:\n\ --h hotspare_pool | \"none\"\n\ -\n\ -Mirror options:\n\ --r roundrobin | geometric | first\n\ --w parallel | serial\n\ --p 0-%d\n"), myname, myname, MD_PASS_MAX); - - md_exit(sp, eval); -} - -/* - * do mirror parameters - */ -static int -mirror_params( - mdsetname_t *sp, - mdname_t *mirnp, - int argc, - char *argv[], - md_error_t *ep -) -{ - mm_params_t mmp; - int modified = 0; - int c; - - /* we must have a set */ - assert(sp != NULL); - assert(sp->setno == MD_MIN2SET(meta_getminor(mirnp->dev))); - - /* initialize */ - (void) memset(&mmp, '\0', sizeof (mmp)); - - /* reset and parse args */ - optind = 1; - opterr = 1; - while ((c = getopt(argc, argv, "s:r:w:p:")) != -1) { - switch (c) { - case 's': - break; - - case 'r': - if (name_to_rd_opt(mirnp->cname, optarg, - &mmp.read_option, ep) != 0) { - return (-1); - } - mmp.change_read_option = 1; - modified = 1; - break; - - case 'w': - if (name_to_wr_opt(mirnp->cname, optarg, - &mmp.write_option, ep) != 0) { - return (-1); - } - mmp.change_write_option = 1; - modified = 1; - break; - - case 'p': - if (name_to_pass_num(mirnp->cname, optarg, - &mmp.pass_num, ep) != 0) { - return (-1); - } - mmp.change_pass_num = 1; - modified = 1; - break; - - default: - usage(sp, 1); - /*NOTREACHED*/ - break; - } - } - - argc -= optind; - argv += optind; - if (argc != 1) - usage(sp, 1); - - /* if just printing */ - if (! modified) { - if (meta_mirror_get_params(sp, mirnp, &mmp, ep) != 0) - return (-1); - (void) printf( - gettext( - "%s: Mirror current parameters are:\n"), - mirnp->cname); - if (meta_print_mirror_options(mmp.read_option, - mmp.write_option, mmp.pass_num, 0, NULL, - sp, stdout, ep) != 0) { - return (-1); - } - } - - /* otherwise, change parameters */ - else { - if (meta_mirror_set_params(sp, mirnp, &mmp, ep) != 0) - return (-1); - - /* update md.cf */ - if (meta_update_md_cf(sp, ep) != 0) - return (-1); - } - - /* return success */ - return (0); -} - -/* - * do stripe parameters - */ -static int -stripe_params( - mdsetname_t *sp, - mdname_t *stripenp, - int argc, - char *argv[], - md_error_t *ep -) -{ - ms_params_t msp; - int modified = 0; - mdhspname_t *hspnp; - int c; - - /* we must have a set */ - assert(sp != NULL); - assert(sp->setno == MD_MIN2SET(meta_getminor(stripenp->dev))); - - /* initialize */ - (void) memset(&msp, '\0', sizeof (msp)); - - /* reset and parse args */ - optind = 1; - opterr = 1; - while ((c = getopt(argc, argv, "s:h:")) != -1) { - switch (c) { - case 's': - break; - - case 'h': - if (meta_is_none(optarg)) { - msp.hsp_id = MD_HSP_NONE; - } else if ((hspnp = metahspname(&sp, optarg, - ep)) == NULL) { - return (-1); - } else if (metachkhsp(sp, hspnp, ep) != 0) { - return (-1); - } else { - msp.hsp_id = hspnp->hsp; - } - msp.change_hsp_id = 1; - modified = 1; - break; - - default: - usage(sp, 1); - /*NOTREACHED*/ - break; - } - } - argc -= optind; - argv += optind; - if (argc != 1) - usage(sp, 1); - - /* if just printing */ - if (! modified) { - if (meta_stripe_get_params(sp, stripenp, &msp, ep) != 0) - return (-1); - if (msp.hsp_id == MD_HSP_NONE) - hspnp = NULL; - else if ((hspnp = metahsphspname(&sp, msp.hsp_id, ep)) == NULL) - return (-1); - (void) printf(gettext( - "%s: Concat/Stripe current parameters are:\n"), - stripenp->cname); - if (meta_print_stripe_options(hspnp, NULL, stdout, ep) != 0) - return (-1); - } - - /* otherwise, change parameters */ - else { - if (meta_stripe_set_params(sp, stripenp, &msp, ep) != 0) - return (-1); - - /* update md.cf */ - if (meta_update_md_cf(sp, ep) != 0) - return (-1); - } - - /* return success */ - return (0); -} - -/* - * do raid parameters - */ -static int -raid_params( - mdsetname_t *sp, - mdname_t *raidnp, - int argc, - char *argv[], - md_error_t *ep -) -{ - mr_params_t msp; - int modified = 0; - mdhspname_t *hspnp; - int c; - - /* we must have a set */ - assert(sp != NULL); - assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev))); - - /* initialize */ - (void) memset(&msp, '\0', sizeof (msp)); - - /* reset and parse args */ - optind = 1; - opterr = 1; - while ((c = getopt(argc, argv, "s:h:")) != -1) { - switch (c) { - case 's': - break; - - case 'h': - if (meta_is_none(optarg)) { - msp.hsp_id = MD_HSP_NONE; - } else if ((hspnp = metahspname(&sp, optarg, - ep)) == NULL) { - return (-1); - } else if (metachkhsp(sp, hspnp, ep) != 0) { - return (-1); - } else { - msp.hsp_id = hspnp->hsp; - } - msp.change_hsp_id = 1; - modified = 1; - break; - - default: - usage(sp, 1); - /*NOTREACHED*/ - break; - } - } - argc -= optind; - argv += optind; - if (argc != 1) - usage(sp, 1); - - /* if just printing */ - if (! modified) { - if (meta_raid_get_params(sp, raidnp, &msp, ep) != 0) - return (-1); - if (msp.hsp_id == MD_HSP_NONE) - hspnp = NULL; - else if ((hspnp = metahsphspname(&sp, msp.hsp_id, ep)) == NULL) - return (-1); - (void) printf(gettext( - "%s: RAID current parameters are:\n"), - raidnp->cname); - if (meta_print_raid_options(hspnp, NULL, stdout, ep) != 0) - return (-1); - } - - /* otherwise, change parameters */ - else { - if (meta_raid_set_params(sp, raidnp, &msp, ep) != 0) - return (-1); - - /* update md.cf */ - if (meta_update_md_cf(sp, ep) != 0) - return (-1); - } - - /* return success */ - return (0); -} - -/* - * parse args and doit - */ -int -main( - int argc, - char **argv -) -{ - char *sname = NULL; - mdsetname_t *sp = NULL; - mdname_t *np; - char *miscname; - int c; - md_error_t status = mdnullerror; - md_error_t *ep = &status; - int error; - bool_t called_thru_rpc = FALSE; - char *cp; - char *firstarg = NULL; - - - /* - * Get the locale set up before calling any other routines - * with messages to ouput. Just in case we're not in a build - * environment, make sure that TEXT_DOMAIN gets set to - * something. - */ -#if !defined(TEXT_DOMAIN) -#define TEXT_DOMAIN "SYS_TEST" -#endif - (void) setlocale(LC_ALL, ""); - (void) textdomain(TEXT_DOMAIN); - - if ((cp = strstr(argv[0], ".rpc_call")) == NULL) { - if (sdssc_bind_library() == SDSSC_OKAY) - if (sdssc_cmd_proxy(argc, argv, SDSSC_PROXY_PRIMARY, - &error) == SDSSC_PROXY_DONE) - exit(error); - } else { - *cp = '\0'; /* cut off ".rpc_call" */ - called_thru_rpc = TRUE; - } - - - /* initialize */ - if (md_init(argc, argv, 0, 1, ep) != 0 || - meta_check_root(ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - /* find set and metadevice first */ - optind = 1; - opterr = 1; - while ((c = getopt(argc, argv, "s:h:p:r:w:o:?")) != -1) { - switch (c) { - case 's': - sname = optarg; - break; - case 'h': - firstarg = optarg; - break; - case '?': - if (optopt == '?') - usage(sp, 0); - break; - } - } - if ((argc - optind) <= 0) - usage(sp, 1); - - if (sname != NULL) { - if ((sp = metasetname(sname, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - } - if (firstarg == NULL) - firstarg = argv[optind]; - if ((called_thru_rpc == FALSE) && - meta_is_mn_name(&sp, firstarg, ep)) { - /* - * If we are dealing with a MN set and we were not - * called thru an rpc call, we are just to send this - * command string to the master of the set and let it - * deal with it. - * Note that if sp is NULL, meta_is_mn_name() derives sp - * from firstarg which is the metadevice arg - * If this fails, the master must panic as the mddb may be - * inconsistent - */ - int result; - result = meta_mn_send_command(sp, argc, argv, MD_DISP_STDERR | - MD_PANIC_WHEN_INCONSISTENT, NO_CONTEXT_STRING, ep); - /* No further action required */ - md_exit(sp, result); - } - - if ((np = metaname(&sp, argv[optind], META_DEVICE, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - assert(sp != NULL); - /* grab set lock */ - if (meta_lock(sp, TRUE, ep)) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - if (meta_check_ownership(sp, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - if ((miscname = metagetmiscname(np, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - /* dispatch based on device type */ - if (strcmp(miscname, MD_STRIPE) == 0) { - if (stripe_params(sp, np, argc, argv, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - } else if (strcmp(miscname, MD_MIRROR) == 0) { - if (mirror_params(sp, np, argc, argv, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - } else if (strcmp(miscname, MD_RAID) == 0) { - if (raid_params(sp, np, argc, argv, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - } else { - md_eprintf(gettext( - "%s: invalid metadevice type %s\n"), - np->cname, miscname); - md_exit(sp, 1); - } - - /* return success */ - md_exit(sp, 0); - /*NOTREACHED*/ - return (0); -} diff --git a/usr/src/cmd/lvm/util/metarecover.c b/usr/src/cmd/lvm/util/metarecover.c deleted file mode 100644 index 17e2ff90d624..000000000000 --- a/usr/src/cmd/lvm/util/metarecover.c +++ /dev/null @@ -1,196 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* - * Recover metadevice configurations that have been lost by scanning - * media, intelligent guessing, or other means. - */ - -#include -#include - -/* - * print usage message - */ -static void -usage( - mdsetname_t *sp, - int eval -) -{ - (void) fprintf(stderr, gettext( - "usage: %s [-s setname] [-v] raw-device -p\n"), myname); - (void) fprintf(stderr, gettext( - " %s [-s setname] [-v] [-n] raw-device -p -d\n"), myname); - (void) fprintf(stderr, gettext( - " %s [-s setname] [-v] [-n] raw-device -p -m\n"), myname); - - md_exit(sp, eval); -} - -int -main( - int argc, - char *argv[] -) -{ - char *sname = MD_LOCAL_NAME; - mdcmdopts_t options = (MDCMD_DOIT | MDCMD_PRINT); - - mdsetname_t *sp = NULL; - md_error_t status = mdnullerror; - md_error_t *ep = &status; - mdname_t *namep; - char *devname; - int error; - int c; - - /* - * Get the locale set up before calling any other routines - * with messages to ouput. Just in case we're not in a build - * environment, make sure that TEXT_DOMAIN gets set to - * something. - */ -#if !defined(TEXT_DOMAIN) -#define TEXT_DOMAIN "SYS_TEST" -#endif - (void) setlocale(LC_ALL, ""); - (void) textdomain(TEXT_DOMAIN); - - if (sdssc_bind_library() == SDSSC_ERROR) { - (void) printf(gettext( - "%s: Interface error with libsds_sc.so\n"), argv[0]); - exit(1); - } - - if (md_init(argc, argv, 0, 1, ep) != 0 || - meta_check_root(ep) != 0) { - mde_perror(ep, ""); - md_exit((mdsetname_t *)NULL, 1); - } - - /* parse arguments */ - optind = 1; - opterr = 1; - while ((c = getopt(argc, argv, "s:hnv?")) != -1) { - switch (c) { - case 's': - sname = optarg; - break; - - case 'h': - usage(sp, 0); - break; - - case 'v': - options |= MDCMD_VERBOSE; - break; - - case 'n': - options &= ~MDCMD_DOIT; - break; - - case '?': - if (optopt == '?') - usage(sp, 0); - /*FALLTHROUGH*/ - default: - usage(sp, 1); - break; - } - } - argc -= optind; - argv += optind; - - /* sname is MD_LOCAL_NAME if not specified on the command line */ - if ((sp = metasetname(sname, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - if ((argc == 0) || (argv[0] == NULL)) { - usage(sp, 1); - } - - /* get raw device name */ - devname = Strdup(argv[0]); - argv++; - argc--; - - if ((namep = metaname(&sp, devname, UNKNOWN, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - /* check for a valid component */ - if ((metagetsize(namep, ep) == MD_DISKADDR_ERROR)) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - /* check for ownership */ - assert(sp != NULL); - if (meta_check_ownership(sp, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - /* - * If the component is not a metadevice and we have a named set - * make sure that the component is part of the named set. - */ - if (strcmp(sp->setname, MD_LOCAL_NAME) != 0) { - if (!metaismeta(namep)) { - if (! meta_is_drive_in_thisset(sp, namep->drivenamep, - FALSE, ep)) { - (void) mddeverror(ep, MDE_NOT_IN_SET, - namep->dev, namep->cname); - mde_perror(ep, ""); - md_exit(sp, 1); - } - } - } - - /* parse command line -- currently only soft partitions are supported */ - if ((argc > 0) && (*argv != NULL) && strncmp(*argv, "-p", 2) == 0) { - error = meta_recover_sp(sp, namep, --argc, ++argv, options, ep); - } else { - usage(sp, 1); - } - - if (error < 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } else { - if (meta_update_md_cf(sp, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - } - - md_exit(sp, 0); - /*NOTREACHED*/ - return (0); -} diff --git a/usr/src/cmd/lvm/util/metarename.c b/usr/src/cmd/lvm/util/metarename.c deleted file mode 100644 index 22f3ead80373..000000000000 --- a/usr/src/cmd/lvm/util/metarename.c +++ /dev/null @@ -1,271 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -/* - * rename or exchange metadevice identity - */ - -#include - -#include - -/* - * print usage message - */ -static void -usage( - mdsetname_t *sp, - int eval -) -{ - (void) fprintf(stderr, gettext("\ -usage: %s [-s setname] [-f] [-x] metadevice1 metadevice2\n\ - %s -h\n\ -options:\n\ --s operations are done on the set setname, rather than the local set\n\ --f force exchange or rename\n\ --x exchange the identities of metadevice1 and metadevice2\n\ --h help: print this message\n"), myname, myname); - md_exit(sp, eval); -} - -/* - * mainline. crack command line arguments. - */ -int -main( - int argc, - char *argv[] -) -{ - char *sname = NULL; - mdsetname_t *sp = NULL; - int xflag = 0; - mdcmdopts_t options = (MDCMD_PRINT | MDCMD_DOIT); - md_error_t status = mdnullerror; - md_error_t *ep = &status; - int rc = 0; - mdname_t *mdnms[2]; - int c, i; - int error; - bool_t called_thru_rpc = FALSE; - char *cp; - int origargc = argc; - char **origargv = argv; - char *miscname; - - /* - * Get the locale set up before calling any other routines - * with messages to ouput. Just in case we're not in a build - * environment, make sure that TEXT_DOMAIN gets set to - * something. - */ -#if !defined(TEXT_DOMAIN) -#define TEXT_DOMAIN "SYS_TEST" -#endif - (void) setlocale(LC_ALL, ""); - (void) textdomain(TEXT_DOMAIN); - - - if ((cp = strstr(argv[0], ".rpc_call")) == NULL) { - if (sdssc_bind_library() == SDSSC_OKAY) - if (sdssc_cmd_proxy(argc, argv, SDSSC_PROXY_PRIMARY, - &error) == SDSSC_PROXY_DONE) - exit(error); - } else { - *cp = '\0'; /* cut off ".rpc_call" */ - called_thru_rpc = TRUE; - } - - /* initialize */ - if (md_init(argc, argv, 0, 1, ep) != 0 || - meta_check_root(ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - /* parse args */ - optind = 1; - opterr = 1; - while ((c = getopt(argc, argv, "fns:xh?")) != -1) { - switch (c) { - case 'h': - usage(sp, 0); - break; - - case 's': - sname = optarg; - break; - - case 'x': - ++xflag; - break; - - case 'f': - options |= MDCMD_FORCE; - break; - - case 'n': - if (called_thru_rpc == TRUE) { - options &= ~MDCMD_DOIT; - } else { - usage(sp, 1); - } - break; - - case '?': - if (optopt == '?') - usage(sp, 0); - /*FALLTHROUGH*/ - default: - usage(sp, 1); - break; - } - } - argc -= optind; - argv += optind; - - if (sname != NULL) { - if ((sp = metasetname(sname, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - } - - if (argc != 2) { - usage(sp, 1); - } - - if ((called_thru_rpc == FALSE) && - meta_is_mn_name(&sp, argv[0], ep)) { - /* - * If we are dealing with a MN set and we were not - * called thru an rpc call, we are just to send this - * command string to the master of the set and let it - * deal with it. - * Note that if sp is NULL, meta_is_mn_name() derives sp - * from argv[0] which is the metadevice arg - */ - int result; - int i; - int newargc; - char **newargv; - - /* - * For MN sets we start a dryrun version of this command - * before sending out the real version. - * Thus we need a new array for the arguments as the first - * one will be -n to indicate the dryrun - */ - newargv = calloc(origargc+1, sizeof (char *)); - newargv[0] = "metarename"; - newargv[1] = "-n"; /* always do "-n" first */ - newargc = 2; - for (i = 1; i < origargc; i++, newargc++) - newargv[newargc] = origargv[i]; - - result = meta_mn_send_command(sp, newargc, newargv, - MD_DISP_STDERR | MD_DRYRUN, NO_CONTEXT_STRING, ep); - - /* If we found a problem don't do it for real */ - if (result != 0) { - md_exit(sp, result); - } - - /* - * Do it for real now. Remove "-n" from the arguments and - * MD_DRYRUN from the flags. If this fails, the master must - * panic as the mddb may be inconsistent. - */ - newargv[1] = ""; /* this was "-n" before */ - result = meta_mn_send_command(sp, newargc, newargv, - MD_DISP_STDERR | MD_RETRY_BUSY | MD_PANIC_WHEN_INCONSISTENT, - NO_CONTEXT_STRING, ep); - free(newargv); - - md_exit(sp, result); - } - - for (i = 0; i < 2; i++) { - if (!is_metaname(argv[i])) { - /* - * one of the input devices is not a valid - * metadevice name - */ - usage(sp, 1); - } - if (i == 1 && !xflag) { - /* rename, create dest metadevice name */ - if (meta_init_make_device(&sp, argv[i], ep) <= 0) { - mde_perror(ep, argv[i]); - md_exit(sp, 1); - } - } - - if ((mdnms[i] = metaname(&sp, argv[i], - META_DEVICE, ep)) == NULL) { - mde_perror(ep, argv[i]); - md_exit(sp, 1); - } - } - - /* - * The FORCE option is only valid for a trans metadevice, clear it if - * it is not trans - */ - if ((miscname = metagetmiscname(mdnms[0], ep)) == NULL) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - if (strcmp(miscname, MD_TRANS) != 0) { - options &= ~MDCMD_FORCE; - } - - if (meta_lock(sp, TRUE, ep)) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - if (meta_check_ownership(sp, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - if (xflag) { - rc = meta_exchange(sp, mdnms[0], mdnms[1], options, ep); - } else { - rc = meta_rename(sp, mdnms[0], mdnms[1], options, ep); - } -out: - if (rc != 0 || !mdisok(ep)) { - mde_perror(ep, ""); - } - md_exit(sp, rc); - /*NOTREACHED*/ - return (rc); -} diff --git a/usr/src/cmd/lvm/util/metareplace.c b/usr/src/cmd/lvm/util/metareplace.c deleted file mode 100644 index 3e69ace6290c..000000000000 --- a/usr/src/cmd/lvm/util/metareplace.c +++ /dev/null @@ -1,293 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -/* - * replace mirror component - */ - -#include - -#include - -/* - * print usage message - */ -static void -usage( - mdsetname_t *sp, - int eval -) -{ - (void) fprintf(stderr, gettext("\ -usage: %s [-s setname] mirror component-old component-new\n\ - %s [-s setname] -e mirror component\n\ - %s [-s setname] [-f] RAID component-old component-new\n\ - %s [-s setname] [-f] -e RAID component\n"), - myname, myname, myname, myname); - md_exit(sp, eval); -} - -/* - * online replace a physical disk in a metamirror - */ -int -main( - int argc, - char *argv[] -) -{ - char *sname = NULL; - mdsetname_t *sp = NULL; - mdcmdopts_t options = (MDCMD_PRINT|MDCMD_DOIT); - mdname_t *namep; - int eflag = 0; - int c; - md_error_t status = mdnullerror; - md_error_t *ep = &status; - int error; - char *uname = NULL; - bool_t called_thru_rpc = FALSE; - char *cp; - int origargc = argc; - char **origargv = argv; - - /* - * Get the locale set up before calling any other routines - * with messages to ouput. Just in case we're not in a build - * environment, make sure that TEXT_DOMAIN gets set to - * something. - */ -#if !defined(TEXT_DOMAIN) -#define TEXT_DOMAIN "SYS_TEST" -#endif - (void) setlocale(LC_ALL, ""); - (void) textdomain(TEXT_DOMAIN); - - - if ((cp = strstr(argv[0], ".rpc_call")) == NULL) { - if (sdssc_bind_library() == SDSSC_OKAY) - if (sdssc_cmd_proxy(argc, argv, SDSSC_PROXY_PRIMARY, - &error) == SDSSC_PROXY_DONE) - exit(error); - } else { - *cp = '\0'; /* cut off ".rpc_call" */ - called_thru_rpc = TRUE; - } - - /* initialize */ - if (md_init(argc, argv, 0, 1, ep) != 0 || - meta_check_root(ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - /* parse arguments */ - optind = 1; - opterr = 1; - while ((c = getopt(argc, argv, "hs:efn?")) != -1) { - switch (c) { - case 'h': - usage(sp, 0); - break; - - case 's': - sname = optarg; - break; - - case 'e': - ++eflag; - break; - - case 'f': - options |= MDCMD_FORCE; - break; - - case 'n': - if (called_thru_rpc == TRUE) { - options &= ~MDCMD_DOIT; - } else { - usage(sp, 1); - } - break; - - case '?': - if (optopt == '?') - usage(sp, 0); - /*FALLTHROUGH*/ - default: - usage(sp, 1); - break; - } - } - argc -= optind; - argv += optind; - - if (sname != NULL) { - if ((sp = metasetname(sname, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - } - - /* get device */ - if (argc < 1) - usage(sp, 1); - - uname = argv[0]; - - if (((namep = metaname(&sp, uname, META_DEVICE, ep)) == NULL)) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - if (metachkmeta(namep, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - assert(sp != NULL); - if ((called_thru_rpc == FALSE) && - meta_is_mn_name(&sp, argv[0], ep)) { - /* - * If we are dealing with a MN set and we were not - * called thru an rpc call, we are just to send this - * command string to the master of the set and let it - * deal with it. - * Note that if sp is NULL, meta_is_mn_name() derives sp - * from argv[0] which is the metadevice arg - */ - int i; - int newargc; - int result; - char *miscname; - char **newargv; - - if ((miscname = metagetmiscname(namep, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - newargv = calloc(origargc+1, sizeof (char *)); - newargv[0] = "metareplace"; - newargv[1] = "-n"; /* always do "-n" first */ - newargc = 2; - for (i = 1; i < origargc; i++, newargc++) { - newargv[newargc] = origargv[i]; - } - - result = meta_mn_send_command(sp, newargc, newargv, - MD_DISP_STDERR | MD_DRYRUN, NO_CONTEXT_STRING, ep); - - /* If we've found a problem don't do it for real */ - if (result != 0) { - md_exit(sp, result); - } - /* - * Do it for real now. Remove "-n" from the arguments and - * MD_DRYRUN from the flags. If this fails, the master must - * panic as the mddbs may be inconsistent. - */ - newargv[1] = ""; /* this was "-n" before */ - result = meta_mn_send_command(sp, newargc, newargv, - MD_DISP_STDERR | MD_RETRY_BUSY | MD_PANIC_WHEN_INCONSISTENT, - NO_CONTEXT_STRING, ep); - - free(newargv); - - /* - * if the metareplace command succeeds for a mirror, send a - * resync starting message for the metadevice - */ - if ((result == 0) && (strcmp(miscname, MD_MIRROR) == 0)) { - if ((result = meta_mn_send_resync_starting(namep, ep)) - != 0) - mde_perror(ep, "Unable to start resync"); - } - md_exit(sp, result); - } - - --argc, ++argv; - - /* grab set lock */ - if (meta_lock(sp, TRUE, ep)) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - /* check for ownership */ - if (meta_check_ownership(sp, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - if (eflag) { /* enable component */ - mdname_t *compnp; - - if (argc != 1) - usage(sp, 1); - - if ((compnp = metaname(&sp, argv[0], UNKNOWN, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - if (meta_enable_byname(sp, namep, compnp, options, ep) - != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - } else { /* replace component */ - mdname_t *oldnp; - mdname_t *newnp; - - if (argc != 2) - usage(sp, 1); - - if ((oldnp = metaname(&sp, argv[0], UNKNOWN, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - if ((newnp = metaname(&sp, argv[1], UNKNOWN, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - if (meta_replace_byname(sp, namep, oldnp, newnp, - options, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - } - - /* update md.cf */ - if (meta_update_md_cf(sp, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - md_exit(sp, 0); - /*NOTREACHED*/ - return (0); -} diff --git a/usr/src/cmd/lvm/util/metaroot.c b/usr/src/cmd/lvm/util/metaroot.c deleted file mode 100644 index d523e46b3c91..000000000000 --- a/usr/src/cmd/lvm/util/metaroot.c +++ /dev/null @@ -1,536 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -/* - * patch system files for root on metadevice - */ - -#include -#include -#include - -#define METAROOT_OK 0 -#define METAROOT_ERR -1 -#define METAROOT_NOTFOUND -2 - -struct def_map { - char **dm_fname; /* Location of file name */ - char *dm_default; /* Default name */ -}; - -/* - * options - */ -static char *cname = NULL; /* take default */ -static char *sname = NULL; /* take default */ -static char *vname = NULL; /* take default */ -static char *dbname = NULL; /* take default bootlist location */ -static int doit = 1; -static int verbose = 0; - -/* - * Map of default system file names to the place where they are stored. - * This is used if the -R option is specified. Note that the members of - * the map point to the cname, sname, vname and dbname global variables - * above. These global variables are used in the call to - * meta_patch_rootdev() in main(). - */ -static struct def_map default_names[] = { - &cname, META_DBCONF, - &sname, "/etc/system", - &vname, "/etc/vfstab", - &dbname, "/kernel/drv/md.conf" -}; - -static int validate_stripe_root(); - -/* - * print usage message, md_exit - */ -static void -usage( - mdsetname_t *sp, - int eval -) -{ - (void) fprintf(stderr, gettext("\ -usage:\t%s [-n] [-k system-name] [-m md.conf-name] [-v vfstab-name] \\\n\ -\t\t[-c mddb.cf-name] device\n\ -\t%s [-n] [-R root-path] device\n"), - myname, myname); - md_exit(sp, eval); -} - -static void -free_mem() -{ - int i; - struct def_map *map; - - for (i = 0, map = default_names; - i < sizeof (default_names) / sizeof (struct def_map); - i++, map++) { - if (*map->dm_fname != NULL) { - free((void *) *map->dm_fname); - *map->dm_fname = NULL; - } - } -} - -/* - * Check if mirror, mirnp, is a valid root filesystem, ie all - * submirrors must be single disk stripe, and that the slice, slicenp, - * if not NULL, is a component of one of the submirrors. - * The arg metaroot is TRUE if mirnp is the current root filesystem. - * Returns: - * METAROOT_OK if mirror is valid and slicenp is a component - * METAROOT_NOTFOUND if mirror valid but slicenp not a component - * METAROOT_ERR if mirror not a valid root - */ -static int -validate_mirror_root( - mdsetname_t *sp, - mdname_t *mirnp, - mdname_t *slicenp, - int metaroot, - md_error_t *ep -) -{ - int smi; - md_mirror_t *mirrorp; - char *miscname; - int found = 0; - int rval; - int err = 0; - - if ((mirrorp = meta_get_mirror(sp, mirnp, ep)) == NULL) { - mde_perror(ep, ""); - return (METAROOT_ERR); - } - - for (smi = 0; (smi < NMIRROR); ++smi) { - /* Check all submirrors */ - md_submirror_t *mdsp = &mirrorp->submirrors[smi]; - mdname_t *submirnamep = mdsp->submirnamep; - - /* skip unused submirrors */ - if (submirnamep == NULL) { - assert(mdsp->state == SMS_UNUSED); - continue; - } - if ((miscname = metagetmiscname(submirnamep, ep)) == NULL) { - return (mdmderror(ep, MDE_UNKNOWN_TYPE, - meta_getminor(submirnamep->dev), - submirnamep->cname)); - } - if (strcmp(miscname, MD_STRIPE) != 0) { - md_eprintf(gettext("Submirror is not a stripe\n")); - return (METAROOT_ERR); - } - rval = validate_stripe_root(sp, submirnamep, slicenp, - metaroot, ep); - switch (rval) { - case METAROOT_OK: - found = 1; - break; - case METAROOT_ERR: - err++; - break; - case METAROOT_NOTFOUND: - default: - break; - } - } - if (err > 0) - return (METAROOT_ERR); - if (!found) - return (METAROOT_NOTFOUND); - return (METAROOT_OK); -} - -/* - * Check if stripe, strnp, is a valid root filesystem, ie must - * be single disk stripe, and the the slice, slicenp, if not NULL, must - * be a component of this stripe. - * The arg metaroot is TRUE if strnp is the current root filesystem. - * Returns: - * METAROOT_OK if stripe is valid and slicenp is a component - * METAROOT_NOTFOUND if stripe valid but slicenp not a component - * METAROOT_ERR if stripe not a valid root - */ -static int -validate_stripe_root( - mdsetname_t *sp, - mdname_t *strnp, - mdname_t *slicenp, - int metaroot, - md_error_t *ep -) -{ - md_stripe_t *stripep; - md_row_t *rp; - md_comp_t *cp; - - if ((stripep = meta_get_stripe(sp, strnp, ep)) == NULL) { - mde_perror(ep, ""); - return (METAROOT_ERR); - } - if (stripep->rows.rows_len != 1) { - md_eprintf(gettext( - "Concat %s has more than 1 slice\n"), strnp->cname); - return (METAROOT_ERR); - } - rp = &stripep->rows.rows_val[0]; - - if (rp->comps.comps_len != 1) { - md_eprintf(gettext( - "Stripe %s has more than 1 slice\n"), strnp->cname); - return (METAROOT_ERR); - } - cp = &rp->comps.comps_val[0]; - if (!metaismeta(cp->compnamep)) { - if (slicenp == NULL) - return (METAROOT_OK); - if (strcmp(slicenp->cname, cp->compnamep->cname) == 0) - return (METAROOT_OK); - if (!metaroot) { - md_eprintf(gettext( - "Root %s is not a component of metadevice %s\n"), - slicenp->cname, strnp->cname); - } - return (METAROOT_NOTFOUND); - } - md_eprintf(gettext( - "Component %s is not a stripe\n"), cp->compnamep->cname); - return (METAROOT_ERR); -} - -/* - * Check if the device devnp is valid. It must be a component of the - * metadevice that contains the root filesystem - */ - -static int -validate_root_device( - mdsetname_t *sp, - mdname_t *devnp, - md_error_t *ep -) -{ - mdname_t *rootnp; - char *curroot; - char *miscname; - int rval; - - if ((curroot = meta_get_current_root(ep)) == NULL) { - mde_perror(ep, ""); - return (METAROOT_ERR); - } - if ((rootnp = metaname(&sp, curroot, UNKNOWN, ep)) == NULL) { - mde_perror(ep, ""); - return (METAROOT_ERR); - } - - if (metaismeta(rootnp)) { - /* get type */ - if ((miscname = metagetmiscname(rootnp, ep)) == NULL) { - mde_perror(ep, ""); - return (METAROOT_ERR); - } - if (strcmp(miscname, MD_MIRROR) == 0) { - if ((rval = validate_mirror_root(sp, rootnp, - devnp, 1, ep)) == METAROOT_OK) - return (METAROOT_OK); - if (rval == METAROOT_NOTFOUND) { - md_eprintf(gettext( - "Slice %s is not a component of root %s\n"), - devnp->cname, rootnp->cname); - } - return (METAROOT_ERR); - } else if (strcmp(miscname, MD_STRIPE) == 0) { - if ((rval = validate_stripe_root(sp, rootnp, - devnp, 1, ep)) == METAROOT_OK) - return (METAROOT_OK); - if (rval == METAROOT_NOTFOUND) { - md_eprintf(gettext( - "Slice %s is not a component of root %s\n"), - devnp->cname, rootnp->cname); - } - return (METAROOT_ERR); - } else { - md_eprintf(gettext( - "Root metadevice, %s, is not a Slice or Mirror\n"), - rootnp->cname); - return (METAROOT_ERR); - } - } else { - md_eprintf(gettext( - "Current Root %s is not a metadevice\n"), rootnp->cname); - return (METAROOT_ERR); - } -} - -/* - * What we're going to do: - * - * 1) Check if the device is a metadevice or not. - * - * 2) If a metadevice, and it is valid, ie a stripe or a mirror containing - * a single slice, add "forceload:{drv,misc}/" of - * underlying drivers for the meta-root and the metadevice - * database to system. Otherwise, remove forceloads from system if the - * slice is a component of the current root metadevice. - * - * 3) Add "rootdev:/devices/..." to system. - * - * 4) Replace / mount in vfstab. - * - * 5) Repatch database locations, just to be safe. - */ -int -main( - int argc, - char *argv[] -) -{ - int i; - mdsetname_t *sp = NULL; - mdname_t *rootnp; - int c; - int ckmv_flag = 0; /* non-zero if -c, -k, -m or -v */ - md_error_t status = mdnullerror; - md_error_t *ep = &status; - char *miscname; - char *curroot; - mdname_t *currootnp; - mdname_t *currootdevnp; - char *root_path = NULL; - struct def_map *map; - size_t root_path_size; - size_t path_buf_size; - int error; - - /* - * Get the locale set up before calling any other routines - * with messages to ouput. Just in case we're not in a build - * environment, make sure that TEXT_DOMAIN gets set to - * something. - */ -#if !defined(TEXT_DOMAIN) -#define TEXT_DOMAIN "SYS_TEST" -#endif - (void) setlocale(LC_ALL, ""); - (void) textdomain(TEXT_DOMAIN); - - if ((sdssc_bind_library() == SDSSC_OKAY) && - (sdssc_cmd_proxy(argc, argv, SDSSC_PROXY_PRIMARY, - &error) == SDSSC_PROXY_DONE)) - exit(error); - - /* initialize */ - if (md_init(argc, argv, 0, 1, ep) != 0 || - meta_check_root(ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - /* parse options */ - optind = 1; - opterr = 1; - while ((c = getopt(argc, argv, "hnk:m:v:c:R:?")) != -1) { - switch (c) { - case 'h': - usage(sp, 0); - break; - case 'm': - dbname = optarg; - ckmv_flag = 1; - break; - case 'n': - doit = 0; - verbose = 1; - break; - case 'k': - sname = optarg; - ckmv_flag = 1; - break; - case 'v': - vname = optarg; - ckmv_flag = 1; - break; - case 'c': - cname = optarg; - ckmv_flag = 1; - break; - case 'R': - root_path = optarg; - break; - case '?': - if (optopt == '?') - usage(sp, 0); - /*FALLTHROUGH*/ - default: - usage(sp, 1); - break; - } - } - argc -= optind; - argv += optind; - if (argc != 1) - usage(sp, 1); - - /* Can't use -R with any of -c, -k, -m or -v */ - if ((ckmv_flag != 0) && (root_path != NULL)) { - md_eprintf( - gettext("-R invalid with any of -c, -k, -m or -v\n")); - usage(sp, 1); - } - - /* get device name */ - if ((rootnp = metaname(&sp, argv[0], UNKNOWN, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - if ((curroot = meta_get_current_root(ep)) == NULL) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - /* - * Get device name of current root metadevice. If root is net - * mounted as happens if this command is part of the install - * process, currootnp will be set to NULL. - */ - currootnp = metaname(&sp, curroot, UNKNOWN, ep); - /* - * If the argument is the name of the current root filesystem, then - * the command is allowed, otherwise check that the argument is - * valid. - */ - if ((currootnp == NULL) || - (strcmp(currootnp->cname, rootnp->cname) != 0)) { - if (metaismeta(rootnp)) { - /* - * Validate that the metadevice is based on a - * single slice. If none of the -k, -m, -v, -c or - * -R options are specified, then the default - * system files are being modified and hence the - * current root slice must be a component of the - * metadevice. If any of the previously mentioned - * options are used don't check that the current - * root is a component. - */ - if ((ckmv_flag == 0) && (root_path == NULL)) { - /* Get device name of current root slice */ - if ((currootdevnp = - meta_get_current_root_dev(sp, ep)) - == NULL) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - } else currootdevnp = NULL; - - if ((miscname = metagetmiscname(rootnp, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - /* Check that metadevice is a mirror or a stripe */ - if (strcmp(miscname, MD_MIRROR) == 0) { - if (validate_mirror_root(sp, rootnp, - currootdevnp, 0, ep) != METAROOT_OK) { - md_exit(sp, 1); - } - } else if (strcmp(miscname, MD_STRIPE) == 0) { - if (validate_stripe_root(sp, rootnp, - currootdevnp, 0, ep) != METAROOT_OK) { - md_exit(sp, 1); - } - } else { - md_eprintf(gettext( - "%s is not a mirror or stripe\n"), - rootnp->cname); - md_exit(sp, 1); - } - } else { - /* - * Check that the root device is a component of the - * current root filesystem only if the default system - * files are being modified - */ - if ((ckmv_flag == 0) && (root_path == NULL)) { - if (validate_root_device(sp, rootnp, ep) != 0) { - md_exit(sp, 1); - } - } - } - } - - if (meta_lock(sp, TRUE, ep)) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - /* - * If -R is specified, use the default system file names relative - * to the new root location. - */ - if (root_path != NULL) { - root_path_size = strlen(root_path); - for (i = 0, map = default_names; - i < sizeof (default_names) / sizeof (struct def_map); - i++, map++) { - /* Add 1 for null terminator */ - path_buf_size = root_path_size + - strlen(map->dm_default) + 1; - *map->dm_fname = malloc(path_buf_size); - if (*map->dm_fname == NULL) { - md_eprintf(gettext("Cannot allocate memory \ -for system file path relocation\n")); - md_exit(sp, 1); - } - (void) snprintf(*map->dm_fname, path_buf_size, - "%s%s", root_path, map->dm_default); - } - } - - /* patch system and vfstab for root and mddb locations */ - if (meta_patch_rootdev(rootnp, sname, vname, cname, dbname, doit, - verbose, ep) != 0) { - if (root_path != NULL) { - free_mem(); - } - mde_perror(ep, ""); - md_exit(sp, 1); - } - if (root_path != NULL) { - free_mem(); - } - - /* return success */ - md_exit(sp, 0); - /*NOTREACHED*/ - return (0); -} diff --git a/usr/src/cmd/lvm/util/metaset.c b/usr/src/cmd/lvm/util/metaset.c deleted file mode 100644 index c83455c6a128..000000000000 --- a/usr/src/cmd/lvm/util/metaset.c +++ /dev/null @@ -1,2683 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved. - */ - -/* - * Metadevice diskset utility. - */ - -#include -#include -#include - -enum metaset_cmd { - notspecified, - add, - balance, - delete, - cluster, - isowner, - purge, - query, - release, - take, - join, /* Join a multinode diskset */ - withdraw /* Withdraw from a multinode diskset */ -}; - -enum cluster_cmd { - ccnotspecified, - clusterversion, /* Return the version of the cluster I/F */ - clusterdisksin, /* List disks in a given diskset */ - clustertake, /* back door for Cluster take */ - clusterrelease, /* ditto */ - clusterpurge, /* back door for Cluster purge */ - clusterproxy /* proxy the args after '--' to primary */ -}; - -static void -usage( - mdsetname_t *sp, - char *string) -{ - if ((string != NULL) && (*string != '\0')) - md_eprintf("%s\n", string); - (void) fprintf(stderr, gettext( - "usage:\t%s -s setname -a [-A enable | disable] -h hostname ...\n" - " %s -s setname -a [-M] -h hostname ...\n" - " %s -s setname -a [-M] [-l length] [-L] drivename ...\n" - " %s -s setname -d [-M] -h hostname ...\n" - " %s -s setname -d [-M] -f -h all-hostnames\n" - " %s -s setname -d [-M] [-f] drivename ...\n" - " %s -s setname -d [-M] [-f] hostname ...\n" - " %s -s setname -A enable | disable\n" - " %s -s setname -t [-f]\n" - " %s -s setname -r\n" - " %s [-s setname] -j [-M]\n" - " %s [-s setname] -w [-M]\n" - " %s -s setname -P [-M]\n" - " %s -s setname -b [-M]\n" - " %s -s setname -o [-M] [-h hostname]\n" - " %s [-s setname]\n" - "\n" - " hostname = contents of /etc/nodename\n" - " drivename = cNtNdN no slice\n" - " [-M] for multi-owner set is optional except" - " on set creation\n"), - myname, myname, myname, myname, myname, myname, myname, myname, - myname, myname, myname, myname, myname, myname, myname, myname); - md_exit(sp, (string == NULL) ? 0 : 1); -} - -/* - * The svm.sync rc script relies heavily on the metaset output. - * Any changes to the metaset output MUST verify that the rc script - * does not break. Not doing so may potentially leave the system - * unusable. You have been WARNED. - */ -static int -printset(mdsetname_t *sp, md_error_t *ep) -{ - int i, j; - md_set_desc *sd; - md_drive_desc *dd, *p; - int max_meds; - md_mnnode_desc *nd; - - if ((sd = metaget_setdesc(sp, ep)) == NULL) - return (-1); - - /* - * Only get set owner information for traditional diskset. - * This set owner information is stored in the node records - * for a MN diskset. - */ - if (!(MD_MNSET_DESC(sd))) { - if (metaget_setownership(sp, ep) == -1) - return (-1); - } - - if (((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST), - ep)) == NULL) && !mdisok(ep)) - return (-1); - - if (MD_MNSET_DESC(sd)) { - (void) printf(gettext( - "\nMulti-owner Set name = %s, Set number = %d, Master = %s\n"), - sp->setname, sp->setno, sd->sd_mn_master_nodenm); - if ((sd->sd_mn_master_nodeid == MD_MN_INVALID_NID) && - (dd != NULL)) { - (void) printf(gettext( - "Master and owner information unavailable " - "until joined (metaset -j)\n")); - } - } else { - (void) printf(gettext( - "\nSet name = %s, Set number = %d\n"), - sp->setname, sp->setno); - } - - if (MD_MNSET_DESC(sd)) { - (void) printf(gettext("\n%-19.19s %-14.14s %-6.6s\n"), - gettext("Host"), gettext("Owner"), gettext("Member")); - nd = sd->sd_nodelist; - while (nd) { - /* - * Don't print nodes that aren't ok since they may be - * removed from config during a reconfig cycle. If a - * node was being added to a diskset and the entire - * cluster went down but the node being added was unable - * to reboot, there's no way to know if that node had - * its own node record set to OK or not. So, node - * record is left in ADD state during reconfig cycle. - * When that node reboots and returns to the cluster, - * the reconfig cycle will either remove the node - * record (if not marked OK on that node) or will mark - * it OK on all nodes. - * It is very important to only remove a node record - * from the other nodes when that node record is not - * marked OK on its own node - otherwise, different - * nodes would have different nodelists possibly - * causing different nodes to to choose different - * masters. - * - * Standard hostname field is 17 bytes but metaset - * will display up to MD_MAX_NODENAME, defined in - * meta_basic.h - */ - if (!(nd->nd_flags & MD_MN_NODE_OK)) { - nd = nd->nd_next; - continue; - } - if ((nd->nd_flags & MD_MN_NODE_ALIVE) && - (nd->nd_flags & MD_MN_NODE_OWN)) { - (void) printf( - gettext(" %-17.*s %-12.12s %-4.4s\n"), - MD_MAX_NODENAME, - nd->nd_nodename, gettext("multi-owner"), - gettext("Yes")); - } else if ((!(nd->nd_flags & MD_MN_NODE_ALIVE)) && - (nd->nd_flags & MD_MN_NODE_OWN)) { - /* Should never be able to happen */ - (void) printf( - gettext(" %-17.*s %-12.12s %-4.4s\n"), - MD_MAX_NODENAME, - nd->nd_nodename, gettext("multi-owner"), - gettext("No")); - } else if ((nd->nd_flags & MD_MN_NODE_ALIVE) && - (!(nd->nd_flags & MD_MN_NODE_OWN))) { - (void) printf( - gettext(" %-17.*s %-12.12s %-4.4s\n"), - MD_MAX_NODENAME, - nd->nd_nodename, gettext(""), - gettext("Yes")); - } else if ((!(nd->nd_flags & MD_MN_NODE_ALIVE)) && - (!(nd->nd_flags & MD_MN_NODE_OWN))) { - (void) printf( - gettext(" %-17.*s %-12.12s %-4.4s\n"), - MD_MAX_NODENAME, - nd->nd_nodename, gettext(""), - gettext("No")); - } - nd = nd->nd_next; - } - } else { - (void) printf("\n%-19.19s %-5.5s\n", - gettext("Host"), gettext("Owner")); - for (i = 0; i < MD_MAXSIDES; i++) { - /* Skip empty slots */ - if (sd->sd_nodes[i][0] == '\0') - continue; - - /* - * Standard hostname field is 17 bytes but metaset will - * display up to MD_MAX_NODENAME, def in meta_basic.h - */ - (void) printf(" %-17.*s %s\n", MD_MAX_NODENAME, - sd->sd_nodes[i], (sd->sd_flags & MD_SR_AUTO_TAKE ? - (sd->sd_isown[i] ? gettext("Yes (auto)") : - gettext("No (auto)")) - : (sd->sd_isown[i] ? gettext("Yes") : ""))); - } - } - - if (sd->sd_med.n_cnt > 0) - (void) printf("\n%-19.19s %-7.7s\n", - gettext("Mediator Host(s)"), gettext("Aliases")); - - if ((max_meds = get_max_meds(ep)) == 0) - return (-1); - - for (i = 0; i < max_meds; i++) { - if (sd->sd_med.n_lst[i].a_cnt == 0) - continue; - /* - * Standard hostname field is 17 bytes but metaset will - * display up to MD_MAX_NODENAME, def in meta_basic.h - */ - (void) printf(" %-17.*s ", MD_MAX_NODENAME, - sd->sd_med.n_lst[i].a_nm[0]); - for (j = 1; j < sd->sd_med.n_lst[i].a_cnt; j++) { - (void) printf("%s", sd->sd_med.n_lst[i].a_nm[j]); - if (sd->sd_med.n_lst[i].a_cnt - j > 1) - (void) printf(gettext(", ")); - } - (void) printf("\n"); - } - - if (dd) { - int len = 0; - - - /* - * Building a format string on the fly that will - * be used in (f)printf. This allows the length - * of the ctd to vary from small to large without - * looking horrible. - */ - for (p = dd; p != NULL; p = p->dd_next) - len = max(len, strlen(p->dd_dnp->cname)); - - len += 2; - (void) printf("\n%-*.*s %-5.5s\n", len, len, - gettext("Drive"), - gettext("Dbase")); - for (p = dd; p != NULL; p = p->dd_next) { - (void) printf("\n%-*.*s %-5.5s\n", len, len, - p->dd_dnp->cname, - (p->dd_dbcnt ? gettext("Yes") : - gettext("No"))); - } - } - - return (0); -} - -static int -printsets(mdsetname_t *sp, md_error_t *ep) -{ - int i; - mdsetname_t *sp1; - set_t max_sets; - - /* - * print setname given. - */ - if (! metaislocalset(sp)) { - if (printset(sp, ep)) - return (-1); - return (0); - } - - if ((max_sets = get_max_sets(ep)) == 0) - return (-1); - - /* - * Print all known sets - */ - for (i = 1; i < max_sets; i++) { - if ((sp1 = metasetnosetname(i, ep)) == NULL) { - if (! mdiserror(ep, MDE_NO_SET)) - break; - mdclrerror(ep); - continue; - } - - if (printset(sp1, ep)) - break; - } - if (! mdisok(ep)) - return (-1); - - return (0); -} - -/* - * Print the current versionn of the cluster contract private interface. - */ -static void -printclusterversion() -{ - (void) printf("%s\n", METASETIFVERSION); -} - -/* - * Print the disks that make up the given disk set. This is used - * exclusively by Sun Cluster and is contract private. - * Should never be called with sname of a Multinode diskset. - */ -static int -printdisksin(char *sname, md_error_t *ep) -{ - mdsetname_t *sp; - md_drive_desc *dd, *p; - - if ((sp = metasetname(sname, ep)) == NULL) { - - /* - * During a deletion of a set the associated service is - * put offline. The SC3.0 reservation code calls disksuite - * to find a list of disks associated with the set so that - * it can release the reservation on those disks. In this - * case there won't be any disks or even a set left. So just - * return. - */ - return (0); - } - - if (metaget_setownership(sp, ep) == -1) - return (-1); - - if (((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST), - ep)) == NULL) && !mdisok(ep)) - return (-1); - - for (p = dd; p != NULL; p = p->dd_next) - (void) printf("%s\n", p->dd_dnp->rname); - - return (0); -} - -static void -parse_printset(int argc, char **argv) -{ - int c; - mdsetname_t *sp = NULL; - char *sname = MD_LOCAL_NAME; - md_error_t status = mdnullerror; - md_error_t *ep = &status; - - /* reset and parse args */ - optind = 1; - opterr = 1; - while ((c = getopt(argc, argv, "s:")) != -1) { - switch (c) { - case 's': - sname = optarg; - break; - default: - usage(sp, gettext("unknown options")); - } - } - - argc -= optind; - argv += optind; - - if (argc != 0) - usage(sp, gettext("too many args")); - - if ((sp = metasetname(sname, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - if (printsets(sp, ep) && !mdiserror(ep, MDE_SMF_NO_SERVICE)) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - if (meta_smf_isonline(meta_smf_getmask(), ep) == 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - md_exit(sp, 0); -} - -static void -parse_add(int argc, char **argv) -{ - int c, created_set; - int hosts = FALSE; - int meds = FALSE; - int auto_take = FALSE; - int force_label = FALSE; - int default_size = TRUE; - mdsetname_t *sp = NULL; - char *sname = MD_LOCAL_NAME; - md_error_t status = mdnullerror; - md_error_t *ep = &status; - mddrivenamelist_t *dnlp = NULL; - mddrivenamelist_t *p; - daddr_t dbsize, nblks; - mdsetname_t *local_sp = NULL; - int multi_node = 0; - md_set_desc *sd; - rval_e sdssc_rval; - - /* reset and parse args */ - optind = 1; - opterr = 1; - while ((c = getopt(argc, argv, "MaA:hl:Lms:")) != -1) { - switch (c) { - case 'M': - multi_node = 1; - break; - case 'A': - /* verified sub-option in main */ - if (strcmp(optarg, "enable") == 0) - auto_take = TRUE; - break; - case 'a': - break; - case 'h': - case 'm': - if (meds == TRUE || hosts == TRUE) - usage(sp, gettext( - "only one -m or -h option allowed")); - - if (default_size == FALSE || force_label == TRUE) - usage(sp, gettext( - "conflicting options")); - - if (c == 'h') - hosts = TRUE; - else - meds = TRUE; - break; - case 'l': - if (hosts == TRUE || meds == TRUE) - usage(sp, gettext( - "conflicting options")); - if (sscanf(optarg, "%ld", &dbsize) != 1) { - md_eprintf(gettext( - "%s: bad format\n"), optarg); - usage(sp, ""); - } - - default_size = FALSE; - break; - case 'L': - /* Same criteria as -l */ - if (hosts == TRUE || meds == TRUE) - usage(sp, gettext( - "conflicting options")); - force_label = TRUE; - break; - case 's': - sname = optarg; - break; - default: - usage(sp, gettext( - "unknown options")); - } - } - - /* Can only use -A enable when creating the single-node set */ - if (auto_take && hosts != TRUE) - usage(sp, gettext("conflicting options")); - - argc -= optind; - argv += optind; - - /* - * Add hosts - */ - if (hosts == TRUE) { - - if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(local_sp, 1); - } - - if (meta_lock(local_sp, TRUE, ep) != 0) { - mde_perror(ep, ""); - md_exit(local_sp, 1); - } - - /* - * Keep track of Cluster set creation. Need to complete - * the transaction no matter if the set was created or not. - */ - created_set = 0; - - /* - * Have no set, cannot take the lock, so only take the - * local lock. - */ - if ((sp = metasetname(sname, ep)) == NULL) { - sdssc_rval = 0; - if (multi_node) { - /* - * When running on a cluster system that - * does not support MN disksets, the routine - * sdssc_mo_create_begin will be bound - * to the SVM routine not_bound_error - * which returns SDSSC_NOT_BOUND_ERROR. - * - * When running on a cluster system that - * does support MN disksets, the routine - * sdssc_mo_create_begin will be bound to - * the sdssc_mo_create_begin routine in - * library libsdssc_so. A call to - * sdssc_mo_create_begin will return with - * either SDSSC_ERROR or SDSSC_OKAY. If - * an SDSSC_OKAY is returned, then the - * cluster framework has allocated a - * set number for this new set that is unique - * across traditional and MN disksets. - * Libmeta will get this unique set number - * by calling sdssc_get_index. - * - * When running on a non-cluster system, - * the routine sdssc_mo_create_begin - * will be bound to the SVM routine - * not_bound which returns SDSSC_NOT_BOUND. - * In this case, all sdssc routines will - * return SDSSC_NOT_BOUND. No need to check - * for return value of SDSSC_NOT_BOUND since - * the libmeta call to get the set number - * (sdssc_get_index) will also fail with - * SDSSC_NOT_BOUND causing libmeta to - * determine its own set number. - */ - sdssc_rval = sdssc_mo_create_begin(sname, argc, - argv, SDSSC_PICK_SETNO); - if (sdssc_rval == SDSSC_NOT_BOUND_ERROR) { - (void) mderror(ep, MDE_NOT_MN, NULL); - mde_perror(ep, - "Cluster node does not support " - "multi-owner diskset operations"); - md_exit(local_sp, 1); - } else if (sdssc_rval == SDSSC_ERROR) { - mde_perror(ep, ""); - md_exit(local_sp, 1); - } - } else { - sdssc_rval = sdssc_create_begin(sname, argc, - argv, SDSSC_PICK_SETNO); - if (sdssc_rval == SDSSC_ERROR) { - mde_perror(ep, ""); - md_exit(local_sp, 1); - } - } - /* - * Created diskset (as opposed to adding a - * host to an existing diskset). - */ - created_set = 1; - - sp = Zalloc(sizeof (*sp)); - sp->setname = Strdup(sname); - sp->lockfd = MD_NO_LOCK; - mdclrerror(ep); - } else { - if ((sd = metaget_setdesc(sp, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(local_sp, 1); - } - if (MD_MNSET_DESC(sd)) { - multi_node = 1; - } - - /* - * can't add hosts to an existing set & enable - * auto-take - */ - if (auto_take) - usage(sp, gettext("conflicting options")); - - /* - * Have a valid set, take the set lock also. - * - * A MN diskset does not use the set meta_lock but - * instead uses the clnt_lock of rpc.metad and the - * suspend/resume feature of the rpc.mdcommd. Can't - * use set meta_lock since class 1 messages are - * grabbing this lock and if this thread is holding - * the set meta_lock then no rpc.mdcommd suspend - * can occur. - */ - if (!multi_node) { - if (meta_lock(sp, TRUE, ep) != 0) { - mde_perror(ep, ""); - md_exit(local_sp, 1); - } - } - } - - if (meta_set_addhosts(sp, multi_node, argc, argv, auto_take, - ep)) { - if (created_set) - sdssc_create_end(sname, SDSSC_CLEANUP); - mde_perror(&status, ""); - if (!multi_node) - (void) meta_unlock(sp, ep); - md_exit(local_sp, 1); - } - - if (created_set) - sdssc_create_end(sname, SDSSC_COMMIT); - - else { - /* - * If adding hosts to existing diskset, - * call DCS svcs - */ - sdssc_add_hosts(sname, argc, argv); - } - if (!multi_node) - (void) meta_unlock(sp, ep); - md_exit(local_sp, 0); - } - - /* - * Add mediators - */ - if (meds == TRUE) { - - if ((sp = metasetname(sname, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(local_sp, 1); - } - - if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(local_sp, 1); - } - - if ((sd = metaget_setdesc(sp, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(local_sp, 1); - } - if (MD_MNSET_DESC(sd)) { - multi_node = 1; - } - - if (meta_lock(local_sp, TRUE, ep) != 0) { - mde_perror(ep, ""); - md_exit(local_sp, 1); - } - /* - * A MN diskset does not use the set meta_lock but - * instead uses the clnt_lock of rpc.metad and the - * suspend/resume feature of the rpc.mdcommd. Can't - * use set meta_lock since class 1 messages are - * grabbing this lock and if this thread is holding - * the set meta_lock then no rpc.mdcommd suspend - * can occur. - */ - if (!multi_node) { - if (meta_lock(sp, TRUE, ep) != 0) { - mde_perror(ep, ""); - md_exit(local_sp, 1); - } - } - - if (meta_set_addmeds(sp, argc, argv, ep)) { - mde_perror(&status, ""); - if (!multi_node) - (void) meta_unlock(sp, ep); - md_exit(local_sp, 1); - } - - if (!multi_node) - (void) meta_unlock(sp, ep); - md_exit(local_sp, 0); - } - - /* - * Add drives - */ - if ((sp = metasetname(sname, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(local_sp, 1); - } - - if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(local_sp, 1); - } - - /* Determine if diskset is a MN diskset or not */ - if ((sd = metaget_setdesc(sp, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(local_sp, 1); - } - if (MD_MNSET_DESC(sd)) { - multi_node = 1; - } - - if (meta_lock(local_sp, TRUE, ep) != 0) { - mde_perror(ep, ""); - md_exit(local_sp, 1); - } - - /* Make sure database size is within limits */ - if (default_size == FALSE) { - if ((multi_node && dbsize < MDDB_MN_MINBLKS) || - (!multi_node && dbsize < MDDB_MINBLKS)) - usage(sp, gettext( - "size (-l) is too small")); - - if ((multi_node && dbsize > MDDB_MN_MAXBLKS) || - (!multi_node && dbsize > MDDB_MAXBLKS)) - usage(sp, gettext( - "size (-l) is too big")); - } - - /* - * Have a valid set, take the set lock also. - * - * A MN diskset does not use the set meta_lock but - * instead uses the clnt_lock of rpc.metad and the - * suspend/resume feature of the rpc.mdcommd. Can't - * use set meta_lock since class 1 messages are - * grabbing this lock and if this thread is holding - * the set meta_lock then no rpc.mdcommd suspend - * can occur. - */ - if (!multi_node) { - if (meta_lock(sp, TRUE, ep) != 0) { - mde_perror(ep, ""); - md_exit(local_sp, 1); - } - } - - - /* - * If using the default size, - * then let's adjust the default to the minimum - * size currently in use. - */ - if (default_size) { - dbsize = multi_node ? MD_MN_DBSIZE : MD_DBSIZE; - if ((nblks = meta_db_minreplica(sp, ep)) < 0) - mdclrerror(ep); - else - dbsize = nblks; /* adjust replica size */ - } - - if ((c = metadrivenamelist(&sp, &dnlp, argc, argv, ep)) < 0) { - mde_perror(ep, ""); - if (!multi_node) - (void) meta_unlock(sp, ep); - md_exit(local_sp, 1); - } - - if (c == 0) { - md_perror(gettext( - "No drives specified to add.\n")); - if (!multi_node) - (void) meta_unlock(sp, ep); - md_exit(local_sp, 1); - } - - if (meta_set_adddrives(sp, dnlp, dbsize, force_label, ep)) { - metafreedrivenamelist(dnlp); - mde_perror(ep, ""); - if (!multi_node) - (void) meta_unlock(sp, ep); - md_exit(local_sp, 1); - } - - /* - * MN disksets don't have a device id in the master block - * For traditional disksets, check for the drive device - * id not fitting in the master block - */ - if (!multi_node) { - for (p = dnlp; p != NULL; p = p->next) { - int fd; - ddi_devid_t devid; - mdname_t *np; - - np = metaslicename(p->drivenamep, 0, ep); - if (np == NULL) - continue; - - if ((fd = open(np->rname, O_RDONLY | O_NDELAY)) < 0) - continue; - - if (devid_get(fd, &devid) == 0) { - size_t len; - - len = devid_sizeof(devid); - if (len > (DEV_BSIZE - sizeof (mddb_mb_t))) - (void) mddserror(ep, - MDE_DS_NOTSELFIDENTIFY, NULL, NULL, - np->rname, NULL); - devid_free(devid); - } else { - (void) mddserror(ep, MDE_DS_NOTSELFIDENTIFY, - NULL, NULL, np->rname, NULL); - } - (void) close(fd); - } - } - - /* - * MN disksets don't use DCS clustering services. - * For traditional disksets: - * There's not really much we can do here if this call fails. - * The drives have been added to the set and DiskSuite believes - * it owns the drives. - * Relase the set and hope for the best. - */ - if ((!multi_node) && - (sdssc_notify_service(sname, Make_Primary) == SDSSC_ERROR)) { - (void) meta_set_release(sp, ep); - (void) printf(gettext( - "Sun Clustering failed to make set primary\n")); - } - - metafreedrivenamelist(dnlp); - if (!multi_node) - (void) meta_unlock(sp, ep); - md_exit(local_sp, 0); -} - -static void -parse_balance(int argc, char **argv) -{ - int c; - mdsetname_t *sp = NULL; - char *sname = MD_LOCAL_NAME; - md_error_t status = mdnullerror; - md_set_desc *sd; - int multi_node = 0; - - /* reset and parse args */ - optind = 1; - opterr = 1; - while ((c = getopt(argc, argv, "Mbs:")) != -1) { - switch (c) { - case 'M': - break; - case 'b': - break; - case 's': - sname = optarg; - break; - default: - usage(sp, gettext("unknown options")); - } - } - - argc -= optind; - argv += optind; - - if (argc != 0) - usage(sp, gettext("too many args")); - - if ((sp = metasetname(sname, &status)) == NULL) { - mde_perror(&status, ""); - md_exit(sp, 1); - } - if ((sd = metaget_setdesc(sp, &status)) == NULL) { - mde_perror(&status, ""); - md_exit(sp, 1); - } - if (MD_MNSET_DESC(sd)) { - multi_node = 1; - } - /* - * Have a valid set, take the set lock also. - * - * A MN diskset does not use the set meta_lock but - * instead uses the clnt_lock of rpc.metad and the - * suspend/resume feature of the rpc.mdcommd. Can't - * use set meta_lock since class 1 messages are - * grabbing this lock and if this thread is holding - * the set meta_lock then no rpc.mdcommd suspend - * can occur. - */ - if (!multi_node) { - if (meta_lock(sp, TRUE, &status) != 0) { - mde_perror(&status, ""); - md_exit(sp, 1); - } - } - - if (meta_set_balance(sp, &status) != 0) { - mde_perror(&status, ""); - md_exit(sp, 1); - } - md_exit(sp, 0); -} - -static void -parse_autotake(int argc, char **argv) -{ - int c; - int enable = 0; - mdsetname_t *sp = NULL; - char *sname = MD_LOCAL_NAME; - md_error_t status = mdnullerror; - md_error_t *ep = &status; - - /* reset and parse args */ - optind = 1; - opterr = 1; - while ((c = getopt(argc, argv, "A:s:")) != -1) { - switch (c) { - case 'A': - /* verified sub-option in main */ - if (strcmp(optarg, "enable") == 0) - enable = 1; - break; - case 's': - /* verified presence of setname in main */ - sname = optarg; - break; - default: - usage(sp, gettext("unknown options")); - } - } - - if ((sp = metasetname(sname, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - if (meta_lock(sp, TRUE, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - if (meta_check_ownership(sp, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - if (meta_set_auto_take(sp, enable, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - md_exit(sp, 0); -} - -static void -parse_del(int argc, char **argv) -{ - int c; - mdsetname_t *sp = NULL; - char *sname = MD_LOCAL_NAME; - int hosts = FALSE; - int meds = FALSE; - int forceflg = FALSE; - md_error_t status = mdnullerror; - md_error_t *ep = &status; - mddrivenamelist_t *dnlp = NULL; - mdsetname_t *local_sp = NULL; - md_set_desc *sd; - int multi_node = 0; - - /* reset and parse args */ - optind = 1; - opterr = 1; - while ((c = getopt(argc, argv, "Mdfhms:")) != -1) { - switch (c) { - case 'M': - break; - case 'd': - break; - case 'f': - forceflg = TRUE; - break; - case 'h': - case 'm': - if (meds == TRUE || hosts == TRUE) - usage(sp, gettext( - "only one -m or -h option allowed")); - - if (c == 'h') - hosts = TRUE; - else - meds = TRUE; - break; - case 's': - sname = optarg; - break; - default: - usage(sp, gettext("unknown options")); - } - } - - argc -= optind; - argv += optind; - - if ((sp = metasetname(sname, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(local_sp, 1); - } - - if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(local_sp, 1); - } - - if ((sd = metaget_setdesc(sp, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(local_sp, 1); - } - if (MD_MNSET_DESC(sd)) - multi_node = 1; - - if (meta_lock(local_sp, TRUE, ep) != 0) { - mde_perror(ep, ""); - md_exit(local_sp, 1); - } - - /* - * Have a valid set, take the set lock also. - * - * A MN diskset does not use the set meta_lock but - * instead uses the clnt_lock of rpc.metad and the - * suspend/resume feature of the rpc.mdcommd. Can't - * use set meta_lock since class 1 messages are - * grabbing this lock and if this thread is holding - * the set meta_lock then no rpc.mdcommd suspend - * can occur. - */ - if (!multi_node) { - if (meta_lock(sp, TRUE, ep) != 0) { - mde_perror(ep, ""); - md_exit(local_sp, 1); - } - } - - /* - * Delete hosts - */ - if (hosts == TRUE) { - if (meta_check_ownership(sp, ep) != 0) { - /* - * If we don't own the set bail out here otherwise - * we could delete the node from the DCS service - * yet not delete the host from the set. - */ - mde_perror(ep, ""); - if (!multi_node) - (void) meta_unlock(sp, ep); - md_exit(local_sp, 1); - } - if (sdssc_delete_hosts(sname, argc, argv) == SDSSC_ERROR) { - if (!metad_isautotakebyname(sname)) { - /* - * SC could have been installed after the set - * was created. We still want to be able to - * delete these sets. - */ - md_perror(gettext( - "Failed to delete hosts from DCS service")); - if (!multi_node) - (void) meta_unlock(sp, ep); - md_exit(local_sp, 1); - } - } - if (meta_set_deletehosts(sp, argc, argv, forceflg, ep)) { - if (sdssc_add_hosts(sname, argc, argv) == SDSSC_ERROR) { - (void) printf(gettext( - "Failed to restore host(s) in DCS " - "database\n")); - } - mde_perror(ep, ""); - if (!multi_node) - (void) meta_unlock(sp, ep); - md_exit(local_sp, 1); - } - if (!multi_node) - (void) meta_unlock(sp, ep); - md_exit(local_sp, 0); - } - - /* - * Delete mediators - */ - if (meds == TRUE) { - if (meta_set_deletemeds(sp, argc, argv, forceflg, ep)) { - mde_perror(ep, ""); - if (!multi_node) - (void) meta_unlock(sp, ep); - md_exit(local_sp, 1); - } - if (!multi_node) - (void) meta_unlock(sp, ep); - md_exit(local_sp, 0); - } - - /* - * Delete drives - */ - - if ((c = metadrivenamelist(&sp, &dnlp, argc, argv, ep)) < 0) { - mde_perror(ep, ""); - if (!multi_node) - (void) meta_unlock(sp, ep); - md_exit(local_sp, 1); - } - - if (c == 0) { - md_perror(gettext( - "No drives specified to delete.\n")); - if (!multi_node) - (void) meta_unlock(sp, ep); - md_exit(local_sp, 1); - } - - if (meta_set_deletedrives(sp, dnlp, forceflg, ep)) { - metafreedrivenamelist(dnlp); - mde_perror(ep, ""); - if (!multi_node) - (void) meta_unlock(sp, ep); - md_exit(local_sp, 1); - } - - metafreedrivenamelist(dnlp); - if (!multi_node) - (void) meta_unlock(sp, ep); - md_exit(local_sp, 0); -} - -static void -parse_isowner(int argc, char **argv) -{ - int c; - mdsetname_t *sp = NULL; - char *sname = MD_LOCAL_NAME; - md_error_t status = mdnullerror; - md_error_t *ep = &status; - char *host = NULL; - - /* reset and parse args */ - optind = 1; - opterr = 1; - while ((c = getopt(argc, argv, "Moh:s:")) != -1) { - switch (c) { - case 'M': - break; - case 'o': - break; - case 'h': - if (host != NULL) { - usage(sp, gettext( - "only one -h option allowed")); - } - host = optarg; - break; - case 's': - sname = optarg; - break; - default: - usage(sp, gettext("unknown options")); - } - } - - argc -= optind; - argv += optind; - - if (argc != 0) - usage(sp, gettext("too many args")); - - if ((sp = metasetname(sname, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - if (host == NULL) { - if (meta_check_ownership(sp, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - } else { - if (meta_check_ownership_on_host(sp, host, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - } - md_exit(sp, 0); -} - -static void -parse_purge(int argc, char **argv) -{ - int c; - mdsetname_t *sp = NULL; - mdsetname_t *local_sp = NULL; - md_drive_desc *dd; - char *sname = MD_LOCAL_NAME; - char *thishost = mynode(); - md_error_t status = mdnullerror; - md_error_t *ep = &status; - int bypass_cluster_purge = 0; - int forceflg = FALSE; - int ret = 0; - int multi_node = 0; - md_set_desc *sd; - - optind = 1; - opterr = 1; - while ((c = getopt(argc, argv, "C:fPs:")) != -1) { - switch (c) { - case 'M': - break; - case 'C': - bypass_cluster_purge = 1; - break; - case 'f': - forceflg = TRUE; - break; - case 'P': - break; - case 's': - sname = optarg; - break; - default: - usage(sp, gettext("unknown options")); - } - } - - argc -= optind; - argv += optind; - - if (argc != 0) - usage(sp, gettext("too many arguments")); - - if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(local_sp, 1); - } - - if (meta_lock(local_sp, TRUE, ep) != 0) { - mde_perror(ep, ""); - md_exit(local_sp, 1); - } - - if ((sp = metasetname(sname, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - if ((sd = metaget_setdesc(sp, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(local_sp, 1); - } - if (MD_MNSET_DESC(sd)) - multi_node = 1; - - if (!multi_node) { - if (meta_lock(sp, TRUE, ep) != 0) { - mde_perror(ep, ""); - md_exit(local_sp, 1); - } - } - - /* Must not own the set if purging it from this host */ - if (meta_check_ownership(sp, ep) == 0) { - /* - * Need to see if there are disks in the set, if not then - * there is no ownership but meta_check_ownership returns 0 - */ - dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST), ep); - if (!mdisok(ep)) { - mde_perror(ep, ""); - if (!multi_node) - (void) meta_unlock(sp, ep); - md_exit(local_sp, 1); - } - if (dd != NULL) { - (void) printf(gettext - ("Must not be owner of the set when purging it\n")); - if (!multi_node) - (void) meta_unlock(sp, ep); - md_exit(local_sp, 1); - } - } - /* - * Remove the node from the DCS service - */ - if (!bypass_cluster_purge) { - if (sdssc_delete_hosts(sname, 1, &thishost) == SDSSC_ERROR) { - md_perror(gettext - ("Failed to purge hosts from DCS service")); - if (!multi_node) - (void) meta_unlock(sp, ep); - md_exit(local_sp, 1); - } - } - - if ((ret = meta_set_purge(sp, bypass_cluster_purge, forceflg, - ep)) != 0) { - if (!bypass_cluster_purge) { - if (sdssc_add_hosts(sname, 1, &thishost) == - SDSSC_ERROR) { - (void) printf(gettext( - "Failed to restore host in DCS " - "database\n")); - } - } - mde_perror(ep, ""); - if (!multi_node) - (void) meta_unlock(sp, ep); - md_exit(local_sp, ret); - } - - if (!multi_node) - (void) meta_unlock(sp, ep); - md_exit(local_sp, 0); -} - -static void -parse_query(int argc, char **argv) -{ - int c; - mdsetname_t *sp = NULL; - mddb_dtag_lst_t *dtlp = NULL; - mddb_dtag_lst_t *tdtlp; - char *sname = MD_LOCAL_NAME; - md_error_t status = mdnullerror; - - /* reset and parse args */ - optind = 1; - opterr = 1; - while ((c = getopt(argc, argv, "Mqs:")) != -1) { - switch (c) { - case 'M': - break; - case 'q': - break; - case 's': - sname = optarg; - break; - default: - usage(sp, gettext("unknown options")); - } - } - - argc -= optind; - argv += optind; - - if (argc != 0) - usage(sp, gettext("too many args")); - - if ((sp = metasetname(sname, &status)) == NULL) { - mde_perror(&status, ""); - md_exit(sp, 1); - } - - if (meta_lock(sp, TRUE, &status) != 0) { - mde_perror(&status, ""); - md_exit(sp, 1); - } - - if (meta_set_query(sp, &dtlp, &status) != 0) { - mde_perror(&status, ""); - md_exit(sp, 1); - } - - if (dtlp != NULL) - (void) printf("The following tag(s) were found:\n"); - - for (tdtlp = dtlp; tdtlp != NULL; tdtlp = dtlp) { - dtlp = tdtlp->dtl_nx; - (void) printf("%2d - %s - %s", tdtlp->dtl_dt.dt_id, - tdtlp->dtl_dt.dt_hn, - ctime((long *)&tdtlp->dtl_dt.dt_tv.tv_sec)); - Free(tdtlp); - } - - md_exit(sp, 0); -} - -/* Should never be called with sname of a Multinode diskset. */ -static void -parse_releaseset(int argc, char **argv) -{ - int c; - mdsetname_t *sp = NULL; - md_error_t status = mdnullerror; - md_error_t *ep = &status; - char *sname = MD_LOCAL_NAME; - sdssc_boolean_e cluster_release = SDSSC_False; - sdssc_version_t vers; - rval_e rval; - md_set_desc *sd; - - /* reset and parse args */ - optind = 1; - opterr = 1; - while ((c = getopt(argc, argv, "C:s:r")) != -1) { - switch (c) { - case 'C': - cluster_release = SDSSC_True; - break; - case 's': - sname = optarg; - break; - case 'r': - break; - default: - usage(sp, gettext("unknown options")); - } - } - - argc -= optind; - argv += optind; - - if (argc > 0) - usage(sp, gettext("too many args")); - - (void) memset(&vers, 0, sizeof (vers)); - - if ((sdssc_version(&vers) == SDSSC_OKAY) && - (vers.major == 3) && - (cluster_release == SDSSC_False)) { - - /* - * If the release is being done by the user via the CLI - * we need to notify the DCS to release this node as being - * the primary. The reason nothing else needs to be done - * is due to the fact that the reservation code will exec - * metaset -C release to complete the operation. - */ - rval = sdssc_notify_service(sname, Release_Primary); - if (rval == SDSSC_ERROR) { - (void) printf(gettext( - "metaset: failed to notify DCS of release\n")); - } - md_exit(NULL, rval == SDSSC_ERROR); - } - - if ((sp = metasetname(sname, ep)) == NULL) { - - /* - * It's entirely possible for the SC3.0 reservation code - * to call for DiskSet to release a diskset and have that - * diskset not exist. During a diskset removal DiskSuite - * maybe able to remove all traces of the diskset before - * the reservation code execs metaset -C release in which - * case the metasetname will fail, but the overall command - * shouldn't. - */ - if (vers.major == 3) - md_exit(sp, 0); - else { - mde_perror(ep, ""); - md_exit(sp, 1); - } - } - - if ((sd = metaget_setdesc(sp, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - if (sd->sd_flags & MD_SR_AUTO_TAKE) { - md_eprintf(gettext("cannot release auto-take diskset\n")); - md_exit(sp, 1); - } - - if (meta_lock_nowait(sp, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 10); /* special errcode */ - } - - if (meta_set_release(sp, ep)) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - md_exit(sp, 0); -} - -/* Should never be called with sname of a Multinode diskset. */ -static void -parse_takeset(int argc, char **argv) -{ - int c; - mdsetname_t *sp = NULL; - int flags = 0; - char *sname = MD_LOCAL_NAME; - mhd_mhiargs_t mhiargs; - char *cp = NULL; - int pos = -1; /* position of timeout value */ - int usetag = 0; - static char *nullopts[] = { NULL }; - md_error_t status = mdnullerror; - md_error_t *ep = &status; - sdssc_boolean_e cluster_take = SDSSC_False; - sdssc_version_t vers; - rval_e rval; - int set_take_rval; - - /* reset and parse args */ - optind = 1; - opterr = 1; - while ((c = getopt(argc, argv, "C:fs:tu:y")) != -1) { - switch (c) { - case 'C': - cluster_take = SDSSC_True; - break; - case 'f': - flags |= TAKE_FORCE; - break; - case 's': - sname = optarg; - break; - case 't': - break; - case 'u': - usetag = atoi(optarg); - flags |= TAKE_USETAG; - break; - case 'y': - flags |= TAKE_USEIT; - break; - default: - usage(sp, gettext("unknown options")); - } - } - - mhiargs = defmhiargs; - - argc -= optind; - argv += optind; - - if (argc > 1) - usage(sp, gettext("too many args")); - - /* - * If we have a list of timeout value overrides, handle it here - */ - while (argv[0] != NULL && *argv[0] != '\0') { - /* - * The use of the nullopts[] "token list" here is to make - * getsubopts() simply parse a comma separated list - * returning either "" or the contents of the field, the - * end condition is exaustion of the initial string, which - * is modified in the process. - */ - (void) getsubopt(&argv[0], nullopts, &cp); - - c = 0; /* re-use c as temp value of timeout */ - - if (*cp != '-') /* '-' uses default */ - c = atoi(cp); - - if (c < 0) { - usage(sp, gettext( - "time out values must be > 0")); - } - - if (++pos > 3) { - usage(sp, gettext( - "too many timeout values specified.")); - } - - if (c == 0) /* 0 or "" field uses default */ - continue; - - /* - * Assign temp value to appropriate structure member based on - * its position in the comma separated list. - */ - switch (pos) { - case 0: - mhiargs.mh_ff = c; - break; - - case 1: - mhiargs.mh_tk.reinstate_resv_delay = c; - break; - - case 2: - mhiargs.mh_tk.min_ownership_delay = c; - break; - - case 3: - mhiargs.mh_tk.max_ownership_delay = c; - break; - } - } - - (void) memset(&vers, 0, sizeof (vers)); - - if ((sdssc_version(&vers) == SDSSC_OKAY) && - (vers.major == 3) && - (cluster_take == SDSSC_False)) { - - /* - * If the take is beging done by the user via the CLI we need - * to notify the DCS to make this current node the primary. - * The SC3.0 reservation code will in turn exec metaset with - * the -C take arg to complete this operation. - */ - if ((rval = sdssc_notify_service(sname, Make_Primary)) == - SDSSC_ERROR) { - (void) printf(gettext( - "metaset: failed to notify DCS of take\n")); - } - md_exit(NULL, rval == SDSSC_ERROR); - } - - if ((sp = metasetname(sname, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - if ((vers.major == 3) && (meta_check_ownership(sp, ep) == 0)) { - - /* - * If we're running in a cluster environment and this - * node already owns the set. Don't bother trying to - * take the set again. There's one case where an adminstrator - * is adding disks to a set for the first time. metaset - * will take the ownership of the set at that point. During - * that add operation SC3.0 notices activity on the device - * and also tries to perform a take operation. The SC3.0 take - * will fail because the adminstrative add has the set locked - */ - md_exit(sp, 0); - } - - if (meta_lock_nowait(sp, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 10); /* special errcode */ - } - - /* - * If a 2 is returned from meta_set_take, this take was able to resolve - * an unresolved replicated disk (i.e. a disk is now available that - * had been missing during the import of the replicated diskset). - * Need to release the diskset and re-take in order to have - * the subdrivers re-snarf using the newly resolved (or newly mapped) - * devids. This also allows the namespace to be updated with the - * correct major names in the case where the disk being replicated - * was handled by a different driver than the replicated disk. - */ - set_take_rval = meta_set_take(sp, &mhiargs, flags, usetag, &status); - if (set_take_rval == 2) { - if (meta_set_release(sp, &status)) { - mde_perror(&status, - "Need to release and take set to resolve names."); - md_exit(sp, 1); - } - metaflushdrivenames(); - metaflushsetname(sp); - set_take_rval = meta_set_take(sp, &mhiargs, - (flags | TAKE_RETAKE), usetag, &status); - } - - if (set_take_rval == -1) { - mde_perror(&status, ""); - if (mdismddberror(&status, MDE_DB_TAGDATA)) - md_exit(sp, 2); - if (mdismddberror(&status, MDE_DB_ACCOK)) - md_exit(sp, 3); - if (mdismddberror(&status, MDE_DB_STALE)) - md_exit(sp, 66); - md_exit(sp, 1); - } - md_exit(sp, 0); -} - -/* - * Joins a node to a specific set or to all multinode disksets known - * by this node. If set is specified then caller should have verified - * that the set is a multinode diskset. - * - * If an error occurs, metaset exits with a 1. - * If there is no error, metaset exits with a 0. - */ -static void -parse_joinset(int argc, char **argv) -{ - int c; - mdsetname_t *sp = NULL, *local_sp = NULL; - char *sname = MD_LOCAL_NAME; - md_error_t status = mdnullerror; - md_error_t *ep = &status; - md_set_desc *sd; - char buf[BUFSIZ]; - char *p = buf; - set_t max_sets, setno; - int err, cumm_err = 0; - size_t bufsz; - - bufsz = sizeof (buf); - /* reset and parse args */ - optind = 1; - opterr = 1; - while ((c = getopt(argc, argv, "Ms:j")) != -1) { - switch (c) { - case 'M': - break; - case 'j': - break; - case 's': - sname = optarg; - break; - default: - usage(sp, gettext("unknown options")); - } - } - - argc -= optind; - argv += optind; - - if (argc > 1) - usage(sp, gettext("too many args")); - - /* - * If no setname option was used, then join all disksets - * that this node knows about. Attempt to join all - * disksets that this node knows about. - * - * Additional text is added to the error messages during - * this section of code in order to help the user understand - * why the 'join of all sets' failed and which set caused - * the failure. - */ - - /* - * Hold local set lock throughout this call to keep - * other actions from interfering (such as creating a new - * set, etc.). - */ - if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - if (meta_lock(local_sp, TRUE, ep) != 0) { - mde_perror(ep, ""); - md_exit(local_sp, 1); - } - - if (strcmp(sname, MD_LOCAL_NAME) == 0) { - /* - * If no set name is given, then walk through all sets - * on this node which could include: - * - MN disksets - * - traditional disksets - * - non-existent disksets - * Attempt to join the MN disksets. - * If the join of one set fails, print out an error message - * about that set and continue the walk. - */ - if ((max_sets = get_max_sets(ep)) == 0) { - mde_perror(ep, ""); - md_exit(local_sp, 1); - } - - /* Start walking through all possible disksets */ - for (setno = 1; setno < max_sets; setno++) { - if ((sp = metasetnosetname(setno, ep)) == NULL) { - if (mdiserror(ep, MDE_NO_SET)) { - /* No set for this setno - continue */ - mdclrerror(ep); - continue; - } else { - (void) sprintf(p, gettext( - "Unable to get set %d information"), - setno); - mde_perror(ep, p); - cumm_err = 1; - mdclrerror(ep); - continue; - } - } - - /* If setname is there, set desc should exist. */ - if ((sd = metaget_setdesc(sp, ep)) == NULL) { - (void) snprintf(p, bufsz, gettext( - "Unable to get set %s desc information"), - sp->setname); - mde_perror(ep, p); - cumm_err = 1; - mdclrerror(ep); - continue; - } - - /* Only check MN disksets */ - if (!MD_MNSET_DESC(sd)) { - continue; - } - - /* - * Return value of 0 is success. - * Return value of -1 means a failure. - * Return value of -2 means set could not be - * joined, but shouldn't cause an error. - * Reasons would be: - * - no drives in set - * - node already joined to set - * Return value of -3 means joined stale set. - * Can't check for all reasons here - * since set isn't locked yet across all - * nodes in the cluster. The call - * to libmeta routine, meta_set_join, will - * lock across the cluster and perform - * the checks. - */ - if ((err = meta_set_join(sp, ep)) == -1) { - /* Print error of diskset join failure */ - (void) snprintf(p, bufsz, - gettext("Join to diskset %s failed"), - sp->setname); - mde_perror(ep, p); - cumm_err = 1; - mdclrerror(ep); - continue; - } - - if (err == -3) { - /* Print error of diskset join failure */ - (void) snprintf(p, bufsz, - gettext("Joined to stale diskset %s"), - sp->setname); - mde_perror(ep, p); - mdclrerror(ep); - } - - mdclrerror(ep); - } - - md_exit(local_sp, cumm_err); - } - - /* - * Code for a specific set is much simpler. - * Error messages don't need extra text since specific setname - * was used. - * Don't need to lock the local set, just the specific set given. - */ - if ((sp = metasetname(sname, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(local_sp, 1); - } - - /* - * Fail command if meta_set_join returns -1. - * - * Return of 0 means that node joined set. - * - * Return of -2 means that node was unable to - * join a set since that set had no drives - * or that had already joined the set. No - * need to fail the command for these reasons. - * - * Return of -3 means that set is stale. - * Return a value of 66 to historically match traditional disksets. - */ - if ((err = meta_set_join(sp, ep)) == -1) { - mde_perror(&status, ""); - md_exit(local_sp, 1); - } - - if (err == -3) { - /* Print error of diskset join failure */ - (void) snprintf(p, bufsz, - gettext("Joined to stale diskset %s"), - sp->setname); - mde_perror(&status, ""); - md_exit(local_sp, 66); - } - - md_exit(local_sp, 0); -} - -/* - * Withdraws a node from a specific set or from all multinode disksets known - * by this node. If set is specified then caller should have verified - * that the set is a multinode diskset. - * - * If an error occurs, metaset exits with a 1. - * If there is no error, metaset exits with a 0. - */ -static void -parse_withdrawset(int argc, char **argv) -{ - int c; - mdsetname_t *sp = NULL, *local_sp = NULL; - char *sname = MD_LOCAL_NAME; - md_error_t status = mdnullerror; - md_error_t *ep = &status; - char buf[BUFSIZ]; - char *p = buf; - md_set_desc *sd; - set_t max_sets, setno; - int err, cumm_err = 0; - size_t bufsz; - - bufsz = sizeof (buf); - /* reset and parse args */ - optind = 1; - opterr = 1; - while ((c = getopt(argc, argv, "Ms:w")) != -1) { - switch (c) { - case 'M': - break; - case 'w': - break; - case 's': - sname = optarg; - break; - default: - usage(sp, gettext("unknown options")); - } - } - - argc -= optind; - argv += optind; - - if (argc > 1) - usage(sp, gettext("too many args")); - - /* - * If no setname option was used, then withdraw from all disksets - * that this node knows about. - * - * Additional text is added to the error messages during - * this section of code in order to help the user understand - * why the 'withdraw from all sets' failed and which set caused - * the failure. - */ - - /* - * Hold local set lock throughout this call to keep - * other actions from interfering (such as creating a new - * set, etc.). - */ - if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - if (meta_lock(local_sp, TRUE, ep) != 0) { - mde_perror(ep, ""); - md_exit(local_sp, 1); - } - - if (strcmp(sname, MD_LOCAL_NAME) == 0) { - /* - * If no set name is given, then walk through all sets - * on this node which could include: - * - MN disksets - * - traditional disksets - * - non-existent disksets - * Attempt to withdraw from the MN disksets. - * If the withdraw of one set fails, print out an error - * message about that set and continue the walk. - */ - if ((max_sets = get_max_sets(ep)) == 0) { - mde_perror(ep, ""); - md_exit(local_sp, 1); - } - - /* Start walking through all possible disksets */ - for (setno = 1; setno < max_sets; setno++) { - if ((sp = metasetnosetname(setno, ep)) == NULL) { - if (mdiserror(ep, MDE_NO_SET)) { - /* No set for this setno - continue */ - mdclrerror(ep); - continue; - } else { - (void) sprintf(p, gettext( - "Unable to get set %d information"), - setno); - mde_perror(ep, p); - cumm_err = 1; - mdclrerror(ep); - continue; - } - } - - /* If setname is there, set desc should exist. */ - if ((sd = metaget_setdesc(sp, ep)) == NULL) { - (void) snprintf(p, bufsz, gettext( - "Unable to get set %s desc information"), - sp->setname); - mde_perror(ep, p); - cumm_err = 1; - mdclrerror(ep); - continue; - } - - /* Only check MN disksets */ - if (!MD_MNSET_DESC(sd)) { - continue; - } - - /* - * Return value of 0 is success. - * Return value of -1 means a failure. - * Return value of -2 means set could not be - * withdrawn from, but this shouldn't cause - * an error. Reasons would be: - * - no drives in set - * - node already withdrawn from set - * Can't check for all reasons here - * since set isn't locked yet across all - * nodes in the cluster. The call - * to libmeta routine, meta_set_withdraw, will - * lock across the cluster and perform - * the checks. - */ - if ((err = meta_set_withdraw(sp, ep)) == -1) { - /* Print error of diskset withdraw failure */ - (void) snprintf(p, bufsz, - gettext("Withdraw from diskset %s failed"), - sp->setname); - mde_perror(ep, p); - mdclrerror(ep); - cumm_err = 1; - continue; - } - - if (err == -2) { - mdclrerror(ep); - continue; - } - - mdclrerror(ep); - } - md_exit(local_sp, cumm_err); - } - - - /* - * Code for a specific set is much simpler. - * Error messages don't need extra text since specific setname - * was used. - * Don't need to lock the local set, just the specific set given. - */ - if ((sp = metasetname(sname, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(local_sp, 1); - } - - /* - * Fail command if meta_set_withdraw returns -1. - * - * Return of 0 means that node withdrew from set. - * - * Return of -2 means that node was unable to - * withdraw from a set since that set had no drives - * or node was not joined to set. No - * need to fail the command for these reasons. - */ - if (meta_set_withdraw(sp, ep) == -1) { - mde_perror(&status, ""); - md_exit(local_sp, 1); - } - - md_exit(local_sp, 0); -} - -static void -parse_cluster(int argc, char **argv, int multi_node) -{ - int c, error, new_argc, x; - enum cluster_cmd cmd = ccnotspecified; - char *hostname = SDSSC_PROXY_PRIMARY; - char *argument = NULL; - char *sname = MD_LOCAL_NAME; - char primary_node[SDSSC_NODE_NAME_LEN]; - char **new_argv = NULL; - char **np = NULL; - mdsetname_t *sp = NULL; - md_error_t status = mdnullerror; - md_error_t *ep = &status; - - /* reset and parse args */ - optind = 1; - opterr = 1; - while ((c = getopt(argc, argv, "C:s:h:ftu:yr")) != -1) { - switch (c) { - case 'C': - if (cmd != ccnotspecified) { - md_exit(sp, -1); - } - argument = optarg; - - if (strcmp(argument, "disksin") == 0) { - cmd = clusterdisksin; - } else if (strcmp(argument, "version") == 0) { - cmd = clusterversion; - } else if (strcmp(argument, "release") == 0) { - cmd = clusterrelease; - } else if (strcmp(argument, "take") == 0) { - cmd = clustertake; - } else if (strcmp(argument, "proxy") == 0) { - cmd = clusterproxy; - } else if (strcmp(argument, "purge") == 0) { - cmd = clusterpurge; - } else { - md_exit(sp, -1); - } - - break; - - case 'h': - hostname = optarg; - break; - - case 's': - sname = optarg; - break; - - case 'f': - case 't': - case 'u': - case 'y': - case 'r': - break; - - default: - md_exit(sp, -1); - } - } - - /* Now call the appropriate command function. */ - switch (cmd) { - case clusterversion: - printclusterversion(); - break; - - case clusterdisksin: - if (printdisksin(sname, ep)) { - md_exit(sp, -1); - } - break; - - case clusterrelease: - if (multi_node) { - usage(sp, gettext( - "-C release is not allowed on multi-owner" - " disksets")); - } - parse_releaseset(argc, argv); - break; - - case clustertake: - if (multi_node) { - usage(sp, gettext( - "-C take is not allowed on multi-owner disksets")); - } - parse_takeset(argc, argv); - break; - - case clusterproxy: - if (multi_node) { - usage(sp, gettext( - "-C proxy is not allowed on multi-owner disksets")); - } - - if ((new_argv = calloc(argc, sizeof (char *))) == NULL) { - (void) printf(gettext("Out of memory\n")); - md_exit(sp, 1); - } - - np = new_argv; - new_argc = 0; - (void) memset(primary_node, '\0', SDSSC_NODE_NAME_LEN); - - for (x = 0; x < argc; x++) { - if (strcmp(argv[x], "-C") == 0) { - - /* - * Need to skip the '-C proxy' args so - * just increase x by one and the work is - * done. - */ - x++; - } else { - *np++ = strdup(argv[x]); - new_argc++; - } - } - - switch (sdssc_get_primary_host(sname, primary_node, - SDSSC_NODE_NAME_LEN)) { - case SDSSC_ERROR: - md_exit(sp, 1); - break; - - case SDSSC_NO_SERVICE: - if (hostname != SDSSC_PROXY_PRIMARY) { - (void) strlcpy(primary_node, hostname, - SDSSC_NODE_NAME_LEN); - } - break; - } - - if (sdssc_cmd_proxy(new_argc, new_argv, - primary_node[0] == '\0' ? SDSSC_PROXY_PRIMARY : - primary_node, &error) == SDSSC_PROXY_DONE) { - md_exit(sp, error); - } else { - (void) printf(gettext( - "Couldn't proxy command\n")); - md_exit(sp, 1); - } - break; - - case clusterpurge: - parse_purge(argc, argv); - break; - - default: - break; - } - - md_exit(sp, 0); -} - -/* - * parse args and do it - */ -int -main(int argc, char *argv[]) -{ - enum metaset_cmd cmd = notspecified; - md_error_t status = mdnullerror; - md_error_t *ep = &status; - mdsetname_t *sp = NULL; - char *hostname = SDSSC_PROXY_PRIMARY; - char *sname = MD_LOCAL_NAME; - char *auto_take_option = NULL; - char primary_node[SDSSC_NODE_NAME_LEN]; - int error, c, stat; - int auto_take = FALSE; - md_set_desc *sd; - int mflag = 0; - int multi_node = 0; - rval_e sdssc_res; - - /* - * Get the locale set up before calling any other routines - * with messages to ouput. Just in case we're not in a build - * environment, make sure that TEXT_DOMAIN gets set to - * something. - */ -#if !defined(TEXT_DOMAIN) -#define TEXT_DOMAIN "SYS_TEST" -#endif - (void) setlocale(LC_ALL, ""); - (void) textdomain(TEXT_DOMAIN); - - sdssc_res = sdssc_bind_library(); - if (sdssc_res == SDSSC_ERROR) { - (void) printf(gettext( - "%s: Interface error with libsds_sc.so\n"), argv[0]); - exit(1); - } - - /* initialize */ - if (md_init(argc, argv, 0, 1, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - optind = 1; - opterr = 1; - - /* - * NOTE: The "C" option is strictly for cluster use. it is not - * and should not be documented for the customer. - JST - */ - while ((c = getopt(argc, argv, "C:MaA:bdfh:jl:Lm:oPqrs:tu:wy?")) - != -1) { - switch (c) { - case 'M': - mflag = 1; - break; - case 'A': - auto_take = TRUE; - if (optarg == NULL || !(strcmp(optarg, "enable") == 0 || - strcmp(optarg, "disable") == 0)) - usage(sp, gettext( - "-A: enable or disable must be specified")); - auto_take_option = optarg; - break; - case 'a': - if (cmd != notspecified) { - usage(sp, gettext( - "conflicting options")); - } - cmd = add; - break; - case 'b': - if (cmd != notspecified) { - usage(sp, gettext( - "conflicting options")); - } - cmd = balance; - break; - case 'd': - if (cmd != notspecified) { - usage(sp, gettext( - "conflicting options")); - } - cmd = delete; - break; - case 'C': /* cluster commands */ - if (cmd != notspecified) { - md_exit(sp, -1); /* conflicting options */ - } - cmd = cluster; - break; - case 'f': - break; - case 'h': - hostname = optarg; - break; - case 'j': - if (cmd != notspecified) { - usage(sp, gettext( - "conflicting options")); - } - cmd = join; - break; - case 'l': - break; - case 'L': - break; - case 'm': - break; - case 'o': - if (cmd != notspecified) { - usage(sp, gettext( - "conflicting options")); - } - cmd = isowner; - break; - case 'P': - if (cmd != notspecified) { - usage(sp, gettext( - "conflicting options")); - } - cmd = purge; - break; - case 'q': - if (cmd != notspecified) { - usage(sp, gettext( - "conflicting options")); - } - cmd = query; - break; - case 'r': - if (cmd != notspecified) { - usage(sp, gettext( - "conflicting options")); - } - cmd = release; - break; - case 's': - sname = optarg; - break; - case 't': - if (cmd != notspecified) { - usage(sp, gettext( - "conflicting options")); - } - cmd = take; - break; - case 'u': - break; - case 'w': - if (cmd != notspecified) { - usage(sp, gettext( - "conflicting options")); - } - cmd = withdraw; - break; - case 'y': - break; - case '?': - if (optopt == '?') - usage(sp, NULL); - /*FALLTHROUGH*/ - default: - if (cmd == cluster) { /* cluster is silent */ - md_exit(sp, -1); - } else { - usage(sp, gettext( - "unknown command")); - } - } - } - - /* check if suncluster is installed and -A enable specified */ - if (auto_take && sdssc_res != SDSSC_NOT_BOUND && - strcmp(auto_take_option, "enable") == 0) { - md_eprintf(gettext( - "cannot enable auto-take when SunCluster is installed\n")); - md_exit(sp, 1); - } - - /* - * At this point we know that if the -A enable option is specified - * for an auto-take diskset that SC is not installed on the machine, so - * all of the sdssc calls will just be no-ops. - */ - - /* list sets */ - if (cmd == notspecified && auto_take == FALSE) { - parse_printset(argc, argv); - /*NOTREACHED*/ - } - - if (meta_check_root(ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - /* snarf MDDB */ - if (meta_setup_db_locations(ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - /* - * If sname is a diskset - check for multi_node. - * It is possible for sname to not exist. - */ - if (strcmp(sname, MD_LOCAL_NAME)) { - if ((sp = metasetname(sname, ep)) != NULL) { - /* Set exists - check for MN diskset */ - if ((sd = metaget_setdesc(sp, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - if (MD_MNSET_DESC(sd)) { - /* - * If a MN diskset always set multi_node - * regardless of whether the -M option was - * used or not (mflag). - */ - multi_node = 1; - } else { - /* - * If a traditional diskset, mflag must - * not be set. - */ - if (mflag) { - usage(sp, gettext( - "-M option only allowed " - "on multi-owner diskset")); - } - } - } else { - /* - * Set name does not exist, set multi_node - * based on -M option. - */ - if (mflag) { - multi_node = 1; - } - } - } - - if (auto_take && multi_node) { - /* Can't mix multinode and auto-take on a diskset */ - usage(sp, - gettext("-A option not allowed on multi-owner diskset")); - } - - /* - * MN disksets don't use DCS clustering services, so - * do not get primary_node for MN diskset since no command - * proxying is done to Primary cluster node. Do not proxy - * MN diskset commands of join and withdraw when issued without - * a valid setname. - * For traditional disksets: proxy all commands except a take - * and release. Use first host listed as the host to send the - * command to if there isn't already a primary - */ - if (strcmp(sname, MD_LOCAL_NAME) && (multi_node == 0) && - (cmd != take) && (cmd != release) && - (cmd != cluster) && (cmd != join) && - (cmd != withdraw) && (cmd != purge)) { - stat = sdssc_get_primary_host(sname, primary_node, - SDSSC_NODE_NAME_LEN); - switch (stat) { - case SDSSC_ERROR: - return (0); - - case SDSSC_NO_SERVICE: - if (hostname != SDSSC_PROXY_PRIMARY) { - (void) strlcpy(primary_node, hostname, - SDSSC_NODE_NAME_LEN); - } else { - (void) memset(primary_node, '\0', - SDSSC_NODE_NAME_LEN); - } - break; - } - - /* - * We've got a complicated decision here regarding - * the hostname. If we didn't get a primary host - * and a host name wasn't supplied on the command line - * then we need to revert to SDSSC_PROXY_PRIMARY. Otherwise - * use what's been found. - */ - if (sdssc_cmd_proxy(argc, argv, - primary_node[0] == '\0' ? - SDSSC_PROXY_PRIMARY : primary_node, - &error) == SDSSC_PROXY_DONE) { - exit(error); - } - } - - /* cluster-specific commands */ - if (cmd == cluster) { - parse_cluster(argc, argv, multi_node); - /*NOTREACHED*/ - } - - /* join MultiNode diskset */ - if (cmd == join) { - /* - * If diskset specified, verify that it exists - * and is a multinode diskset. - */ - if (strcmp(sname, MD_LOCAL_NAME)) { - if ((sp = metasetname(sname, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - if (!multi_node) { - usage(sp, gettext( - "-j option only allowed on " - "multi-owner diskset")); - } - } - /* - * Start mddoors daemon here. - * mddoors itself takes care there will be only one - * instance running, so starting it twice won't hurt - */ - (void) pclose(popen("/usr/lib/lvm/mddoors", "w")); - parse_joinset(argc, argv); - /*NOTREACHED*/ - } - - /* withdraw from MultiNode diskset */ - if (cmd == withdraw) { - /* - * If diskset specified, verify that it exists - * and is a multinode diskset. - */ - if (strcmp(sname, MD_LOCAL_NAME)) { - if ((sp = metasetname(sname, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - if (!multi_node) { - usage(sp, gettext( - "-w option only allowed on " - "multi-owner diskset")); - } - } - parse_withdrawset(argc, argv); - /*NOTREACHED*/ - } - - /* must have set for everything else */ - if (strcmp(sname, MD_LOCAL_NAME) == 0) - usage(sp, gettext("setname must be specified")); - - /* add hosts or drives */ - if (cmd == add) { - /* - * In the multi node case start mddoors daemon. - * mddoors itself takes care there will be - * only one instance running, so starting it twice won't hurt - */ - if (multi_node) { - (void) pclose(popen("/usr/lib/lvm/mddoors", "w")); - } - - parse_add(argc, argv); - /*NOTREACHED*/ - } - - /* re-balance the replicas */ - if (cmd == balance) { - parse_balance(argc, argv); - /*NOTREACHED*/ - } - - /* delete hosts or drives */ - if (cmd == delete) { - parse_del(argc, argv); - /*NOTREACHED*/ - } - - /* check ownership */ - if (cmd == isowner) { - parse_isowner(argc, argv); - /*NOTREACHED*/ - } - - /* purge the diskset */ - if (cmd == purge) { - parse_purge(argc, argv); - /*NOTREACHED*/ - } - - /* query for data marks */ - if (cmd == query) { - parse_query(argc, argv); - /*NOTREACHED*/ - } - - /* release ownership */ - if (cmd == release) { - if (multi_node) { - /* Can't release multinode diskset */ - usage(sp, gettext( - "-r option not allowed on multi-owner diskset")); - } else { - parse_releaseset(argc, argv); - /*NOTREACHED*/ - } - } - - /* take ownership */ - if (cmd == take) { - if (multi_node) { - /* Can't take multinode diskset */ - usage(sp, gettext( - "-t option not allowed on multi-owner diskset")); - } else { - parse_takeset(argc, argv); - /*NOTREACHED*/ - } - } - - /* take ownership of auto-take sets */ - if (auto_take) { - parse_autotake(argc, argv); - /*NOTREACHED*/ - } - - /*NOTREACHED*/ - return (0); -} diff --git a/usr/src/cmd/lvm/util/metastat.c b/usr/src/cmd/lvm/util/metastat.c deleted file mode 100644 index 23bf4c804d69..000000000000 --- a/usr/src/cmd/lvm/util/metastat.c +++ /dev/null @@ -1,1837 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - - -#include -#include -#include -#include - -#include -#include -#include - -/* - * print metadevice status - */ - - -#define MD_PROBE_OPEN_T "probe open test" - -/* used to keep track of the softparts on the same underlying device */ -struct sp_base_list { - struct sp_base_list *next; - char *base; -}; - -/* - * Function prototypes - */ -static void probe_all_devs(mdsetname_t *sp); - -static int print_devid(mdsetname_t *sp, mdnamelist_t *nlp, FILE *fp, - md_error_t *ep); - -static md_common_t *get_concise_unit(mdsetname_t *sp, mdname_t *np, - md_error_t *ep); -static void print_all_sets(mdprtopts_t options, int concise_flag, - int quiet_flg); -static void print_specific_set(mdsetname_t *sp, mdprtopts_t options, - int concise_flag, int quiet_flg); -static void print_concise_diskset(mdsetname_t *sp); -static void print_concise_namelist(mdsetname_t *sp, mdnamelist_t **nl, - char mtype); -static void print_concise_md(int indent, mdsetname_t *sp, mdname_t *np); -static void print_concise_mirror(int indent, mdsetname_t *sp, - md_mirror_t *mirror); -static void print_concise_raid(int indent, mdsetname_t *sp, - md_raid_t *raid); -static void print_concise_stripe(int indent, mdsetname_t *sp, - md_stripe_t *stripe); -static void print_concise_sp(int indent, mdsetname_t *sp, md_sp_t *part); -static void print_concise_trans(int indent, mdsetname_t *sp, - md_trans_t *trans); -static void free_names(mdnamelist_t **nlp); -static char *get_sm_state(md_mirror_t *mirror, int i, - md_status_t mirror_status, uint_t tstate); -static char *get_raid_col_state(md_raidcol_t *colp, uint_t tstate); -static char *get_stripe_state(md_comp_t *mdcp, uint_t tstate); -static char *get_hs_state(md_hs_t *hsp); -static struct sp_base_list *sp_add_done(md_sp_t *part, struct sp_base_list *lp); -static int sp_done(md_sp_t *part, struct sp_base_list *lp); -static int sp_match(md_sp_t *part, struct sp_base_list *lp); -static void sp_free_list(struct sp_base_list *lp); - - -/* - * print named hotspare pool or metadevice - */ -static int -print_name( - mdsetname_t **spp, - char *uname, - mdnamelist_t **nlistpp, - char *fname, - FILE *fp, - mdprtopts_t options, - int *meta_print_trans_msgp, - mdnamelist_t **lognlpp, - md_error_t *ep -) -{ - mdname_t *namep; - char *miscname; - - /* recurse */ - options |= PRINT_SUBDEVS; - - /* hotspare pool */ - if (is_existing_hsp(*spp, uname)) { - mdhspname_t *hspnamep; - - /* get hotsparepool */ - if ((hspnamep = metahspname(spp, uname, ep)) == NULL) - return (-1); - - /* check for ownership */ - assert(*spp != NULL); - if (meta_check_ownership(*spp, ep) != 0) - return (-1); - - /* print hotspare pool */ - return (meta_hsp_print(*spp, hspnamep, lognlpp, fname, fp, - options, ep)); - } - - /* get metadevice */ - if (((namep = metaname(spp, uname, META_DEVICE, ep)) == NULL) || - (metachkmeta(namep, ep) != 0)) - return (-1); - - /* check for ownership */ - assert(*spp != NULL); - if (meta_check_ownership(*spp, ep) != 0) - return (-1); - - if ((miscname = metagetmiscname(namep, ep)) != NULL) { - if (strcmp(miscname, MD_TRANS) == 0) { - *meta_print_trans_msgp = 1; - } - } - - /* print metadevice */ - return (meta_print_name(*spp, namep, nlistpp, fname, fp, options, - lognlpp, ep)); -} - -/* - * print the per set flags - */ -/*ARGSUSED*/ -static int -print_setstat( - mdsetname_t **spp, - char *fname, - FILE *fp, - mdprtopts_t options, - md_error_t *ep -) -{ - int rval = -1; - char *cname = NULL; - char *cp = NULL; - md_gs_stat_parm_t gsp; - - - if (fname != NULL && strchr(fname, '/') != NULL) { - /* get the canonical name */ - cname = meta_name_getname(spp, fname, META_DEVICE, ep); - if (cname == NULL) - return (-1); - Free(cname); - } - - if ((cp = getenv("MD_DEBUG")) == NULL) - return (0); - - if (strstr(cp, "SETINFO") == NULL) - return (0); - - (void) memset(&gsp, '\0', sizeof (md_gs_stat_parm_t)); - gsp.gs_setno = (*spp)->setno; - - if (metaioctl(MD_GET_SETSTAT, &gsp, &gsp.gs_mde, NULL) != 0) - return (mdstealerror(ep, &gsp.gs_mde)); - - if (fprintf(fp, "Status for set %d = ", gsp.gs_setno) == EOF) - goto out; - - if (meta_prbits(fp, NULL, gsp.gs_status, MD_SET_STAT_BITS) == EOF) - goto out; - - - if (fprintf(fp, "\n") == EOF) - goto out; - - /* success */ - rval = 0; - - /* cleanup, return error */ -out: - if (rval != 0) - (void) mdsyserror(ep, errno, fname); - - return (rval); -} - -/* - * check_replica_state: - * If the replica state is stale or the set has been halted - * this routine returns an error. - */ -static int -check_replica_state(mdsetname_t *sp, md_error_t *ep) -{ - mddb_config_t c; - - (void) memset(&c, 0, sizeof (c)); - c.c_id = 0; - c.c_setno = sp->setno; - - if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) { - if (mdismddberror(&c.c_mde, MDE_DB_INVALID)) - (void) mdstealerror(ep, &c.c_mde); - return (-1); - } - - if (c.c_flags & MDDB_C_STALE) { - return (mdmddberror(ep, MDE_DB_STALE, NODEV32, sp->setno, - 0, NULL)); - } else - return (0); -} - -static void -print_trans_msg(mdprtopts_t options, int meta_print_trans_msg) -{ - if (meta_print_trans_msg != 0) { - (void) fprintf(stderr, "\n\n"); - if (options & PRINT_SHORT) { - (void) fprintf(stderr, gettext(MD_SHORT_EOF_TRANS_MSG)); - (void) fprintf(stderr, - gettext(MD_SHORT_EOF_TRANS_WARNING)); - } else { - (void) fprintf(stderr, gettext(MD_EOF_TRANS_MSG)); - (void) fprintf(stderr, gettext(MD_EOF_TRANS_WARNING)); - } - } -} - -/* - * print usage message - * - */ -static void -usage( - mdsetname_t *sp, - int eval -) -{ - (void) fprintf(stderr, gettext("\ -usage: %s [-s setname] [-a][-c][-B][-D][-r][-i][-p] [-t] [metadevice...]\n"), - myname); - md_exit(sp, eval); -} - -/* - * mainline. crack command line arguments. - */ -int -main( - int argc, - char *argv[] -) -{ - char *sname = MD_LOCAL_NAME; - mdsetname_t *sp = NULL; - mdprtopts_t options = PRINT_HEADER | PRINT_DEVID | PRINT_FAST; - int c; - char *p; - md_error_t status = mdnullerror; - md_error_t *ep = &status; - int eval = 0; - int inquire = 0; - int quiet_flg = 0; - int set_flg = 0; - int error; - int all_sets_flag = 0; - int concise_flag = 0; - mdnamelist_t *nlistp = NULL; - mdname_t *namep; - int devcnt = 0; - mdnamelist_t *lognlp = NULL; - uint_t hsi; - int meta_print_trans_msg = 0; - - /* - * Get the locale set up before calling any other routines - * with messages to ouput. Just in case we're not in a build - * environment, make sure that TEXT_DOMAIN gets set to - * something. - */ -#if !defined(TEXT_DOMAIN) -#define TEXT_DOMAIN "SYS_TEST" -#endif - (void) setlocale(LC_ALL, ""); - (void) textdomain(TEXT_DOMAIN); - - if (sdssc_bind_library() == SDSSC_OKAY) - if (sdssc_cmd_proxy(argc, argv, SDSSC_PROXY_PRIMARY, - &error) == SDSSC_PROXY_DONE) - exit(error); - - /* initialize */ - if (md_init(argc, argv, 0, 1, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - /* parse arguments */ - optind = 1; - opterr = 1; - while ((c = getopt(argc, argv, "acSs:hpBDrtiq?")) != -1) { - switch (c) { - case 'a': - all_sets_flag++; - break; - - case 'c': - concise_flag++; - quiet_flg++; - break; - - case 'S': - options |= PRINT_SETSTAT_ONLY; - break; - - case 's': - sname = optarg; - set_flg++; - break; - - case 'h': - usage(sp, 0); - break; - - case 'p': - options |= PRINT_SHORT; - options &= ~PRINT_DEVID; - break; - - case 't': - options |= PRINT_TIMES; - break; - - case 'i': - inquire++; - break; - - case 'B': - options |= PRINT_LARGEDEVICES; - break; - case 'D': - options |= PRINT_FN; - break; - case 'r': /* defunct option */ - break; - case 'q': - quiet_flg++; - break; - case '?': - if (optopt == '?') - usage(sp, 0); - /*FALLTHROUGH*/ - default: - usage(sp, 1); - break; - } - } - argc -= optind; - argv += optind; - - if (all_sets_flag && set_flg) { - (void) fprintf(stderr, gettext("metastat: " - "incompatible options: -a and -s\n")); - usage(sp, 1); - } - - /* get set context */ - if ((sp = metasetname(sname, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - /* make sure that the mddb is not stale. Else print a warning */ - - if (check_replica_state(sp, ep)) { - if (mdismddberror(ep, MDE_DB_STALE)) { - (void) fprintf(stdout, gettext( - "****\nWARNING: Stale " - "state database replicas. Metastat output " - "may be inaccurate.\n****\n\n")); - } - } - - /* if inquire is set. We probe first */ - if (inquire) { - if (geteuid() != 0) { - (void) fprintf(stderr, gettext("metastat: -i " - "option requires super-user privilages\n")); - md_exit(sp, 1); - } - probe_all_devs(sp); - } - /* print debug stuff */ - if (((p = getenv("MD_DEBUG")) != NULL) && - (strstr(p, "STAT") != NULL)) { - options |= (PRINT_SETSTAT | PRINT_DEBUG | PRINT_TIMES); - } - - if ((options & PRINT_SETSTAT) || (options & PRINT_SETSTAT_ONLY)) { - if (print_setstat(&sp, argv[0], stdout, options, ep)) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - if (options & PRINT_SETSTAT_ONLY) - md_exit(sp, 0); - } - - /* status all devices */ - if (argc == 0) { - if (all_sets_flag) { - print_all_sets(options, concise_flag, quiet_flg); - } else { - print_specific_set(sp, options, concise_flag, - quiet_flg); - } - - if (meta_smf_isonline(meta_smf_getmask(), ep) == 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - /* success */ - md_exit(sp, 0); - } - /* print named device types */ - while (devcnt < argc) { - char *uname = argv[devcnt]; - char *cname = NULL; - - /* get the canonical name */ - cname = meta_name_getname(&sp, uname, META_DEVICE, ep); - if (cname == NULL) { - /* already printed the error */ - mdclrerror(ep); - eval = 1; - ++devcnt; - continue; - } - - if (concise_flag) { - mdname_t *np; - - np = metaname(&sp, cname, META_DEVICE, ep); - if (np == NULL) { - mde_perror(ep, ""); - mdclrerror(ep); - eval = 1; - } else { - print_concise_md(0, sp, np); - } - - } else { - if (print_name(&sp, cname, &nlistp, NULL, stdout, - options, &meta_print_trans_msg, &lognlp, ep) != 0) { - mde_perror(ep, ""); - mdclrerror(ep); - eval = 1; - } - } - Free(cname); - ++devcnt; - } - - /* print metadevice & relocation device id */ - if ((options & PRINT_DEVID) && (eval != 1) && !quiet_flg) { - devcnt = 0; - - while (devcnt < argc) { - char *uname = argv[devcnt]; - char *cname = NULL; - - /* get the canonical name */ - cname = meta_name_getname(&sp, uname, META_DEVICE, ep); - if (cname == NULL) { - mde_perror(ep, ""); - mdclrerror(ep); - ++devcnt; - continue; - } - - /* hotspare pools */ - if (is_existing_hsp(sp, cname)) { - mdhspname_t *hspnamep; - md_hsp_t *hsp; - - /* get hotsparepool */ - if ((hspnamep = metahspname(&sp, cname, - ep)) == NULL) - eval = 1; - - if ((hsp = meta_get_hsp(sp, hspnamep, - ep)) == NULL) - eval = 1; - - for (hsi = 0; - hsi < hsp->hotspares.hotspares_len; - hsi++) { - - namep = hsp->hotspares. - hotspares_val[hsi].hsnamep; - - if (!(options & - (PRINT_LARGEDEVICES | PRINT_FN))) { - /* meta_getdevs populates the */ - /* nlistp structure for use */ - if (meta_getdevs(sp, namep, - &nlistp, ep) != 0) - eval = 1; - } - - } - - } else { - - /* get metadevice */ - if (((namep = metaname(&sp, cname, - META_DEVICE, ep)) == NULL) || - (metachkmeta(namep, ep) != 0)) - eval = 1; - - if (!(options & - (PRINT_LARGEDEVICES | PRINT_FN))) { - /* meta_getdevs populates the */ - /* nlistp structure for use */ - if (meta_getdevs(sp, namep, &nlistp, ep) - != 0) - eval = 1; - } - } - Free(cname); - ++devcnt; - } - if (print_devid(sp, nlistp, stdout, ep) != 0) - eval = 1; - - - } - - print_trans_msg(options, meta_print_trans_msg); - - if (meta_smf_isonline(meta_smf_getmask(), ep) == 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - /* return success */ - md_exit(sp, eval); - /*NOTREACHED*/ - return (eval); -} - -static void -print_all_sets(mdprtopts_t options, int concise_flag, int quiet_flg) -{ - uint_t max_sets; - md_error_t error = mdnullerror; - int i; - - if ((max_sets = get_max_sets(&error)) == 0) { - return; - } - - if (!mdisok(&error)) { - mdclrerror(&error); - return; - } - - /* for each possible set number, see if we really have a diskset */ - for (i = 0; i < max_sets; i++) { - mdsetname_t *sp; - - if ((sp = metasetnosetname(i, &error)) == NULL) { - if (!mdisok(&error) && - mdisrpcerror(&error, RPC_PROGNOTREGISTERED)) { - /* metad rpc program not registered - no metasets */ - break; - } - - mdclrerror(&error); - continue; - } - mdclrerror(&error); - - if (meta_check_ownership(sp, &error) == 0) { - /* we own the set, so we can print the metadevices */ - print_specific_set(sp, options, concise_flag, - quiet_flg); - (void) printf("\n"); - } - - metaflushsetname(sp); - } -} - -static void -print_specific_set(mdsetname_t *sp, mdprtopts_t options, int concise_flag, - int quiet_flg) -{ - md_error_t status = mdnullerror; - md_error_t *ep = &status; - int meta_print_trans_msg = 0; - - /* check for ownership */ - assert(sp != NULL); - if (meta_check_ownership(sp, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - if (concise_flag) { - print_concise_diskset(sp); - - } else { - mdnamelist_t *nlistp = NULL; - - /* status devices */ - if (meta_print_all(sp, NULL, &nlistp, stdout, options, - &meta_print_trans_msg, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - /* print relocation device id on all dev's */ - if ((options & PRINT_DEVID) && !quiet_flg) { - /* - * Ignore return value from meta_getalldevs since - * it will return a failure if even one device cannot - * be found - which could occur in the case of device - * failure or a device being powered off during - * upgrade. Even if meta_getalldevs fails, the - * data in nlistp is still valid. - */ - if (!(options & (PRINT_LARGEDEVICES | PRINT_FN))) { - (void) meta_getalldevs(sp, &nlistp, 0, ep); - } - if (nlistp != NULL) { - if (print_devid(sp, nlistp, stdout, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - } - } - } - - print_trans_msg(options, meta_print_trans_msg); -} - -/* - * print_devid prints out cxtxdx and devid for devices passed in a - * mdnamelist_t structure - */ -static int -print_devid( - mdsetname_t *sp, - mdnamelist_t *nlp, - FILE *fp, - md_error_t *ep -) -{ - int retval = 0; - mdnamelist_t *onlp = NULL; - mddevid_t *ldevidp = NULL; - mddevid_t *nextp; - - /* make a non-duplicate list of nlp */ - for (onlp = nlp; (onlp != NULL); onlp = onlp->next) { - meta_create_non_dup_list(onlp->namep, &ldevidp); - } - - retval = meta_print_devid(sp, fp, ldevidp, ep); - - /* cleanup */ - for (nextp = ldevidp; nextp != NULL; ldevidp = nextp) { - Free(ldevidp->ctdname); - nextp = ldevidp->next; - Free(ldevidp); - } - - return (retval); -} - -/* - * probedev issues ioctls for all the metadevices - */ - - - - -/* - * Failure return's a 1 - */ -int -hotspare_ok(char *bname) -{ - int fd; - char buf[512]; - - if ((fd = open(bname, O_RDONLY)) < 0) - return (0); - if (read(fd, buf, sizeof (buf)) < 0) { - (void) close(fd); - return (0); - } - (void) close(fd); - return (1); -} - -void -delete_hotspares_impl(mdsetname_t *sp, mdhspname_t *hspnp, md_hsp_t *hspp) -{ - md_hs_t *hsp; - uint_t hsi; - char *bname; - md_error_t e = mdnullerror; - int deleted_hs = 0; - - for (hsi = 0; (hsi < hspp->hotspares.hotspares_len); ++hsi) { - mdnamelist_t *nlp; - - hsp = &hspp->hotspares.hotspares_val[hsi]; - bname = hsp->hsnamep->bname; - nlp = NULL; - (void) metanamelist_append(&nlp, hsp->hsnamep); - /* print hotspare */ - if (hsp->state == HSS_AVAILABLE) { - if (hotspare_ok(bname)) - continue; - - (void) fprintf(stderr, - "NOTICE: Hotspare %s in %s has failed.\n" - "\tDeleting %s since it not in use\n\n", - bname, hspnp->hspname, bname); - - if (meta_hs_delete(sp, hspnp, nlp, 0, &e) != NULL) { - mde_perror(&e, ""); - mdclrerror(&e); - } else { - deleted_hs++; - } - } - } -} - - - -/* - * Generic routine to issue ioctls - */ - -void -md_setprobetest(md_probedev_t *iocp) -{ - (void) strcpy(iocp->test_name, MD_PROBE_OPEN_T); -} - -int -md_probe_ioctl(mdsetname_t *sp, mdnamelist_t *nlp, int ndevs, char *drvname) -{ - mdnamelist_t *p; - mdname_t *np; - md_probedev_t probe_ioc, *iocp; - int i, retval = 0; - /* - * Allocate space for all the metadevices and fill in - * the minor numbers. - */ - - (void) memset(&probe_ioc, 0, sizeof (probe_ioc)); - iocp = &probe_ioc; - - if ((iocp->mnum_list = (uintptr_t)calloc(ndevs, sizeof (minor_t))) - == 0) { - perror("md_probe_ioctl: calloc"); - return (-1); - } - - MD_SETDRIVERNAME(iocp, drvname, sp->setno); - md_setprobetest(iocp); - - iocp->nmdevs = ndevs; - - for (p = nlp, i = 0; p; p = p->next, i++) { - np = p->namep; - ((minor_t *)(uintptr_t)iocp->mnum_list)[i] = - meta_getminor(np->dev); - } - - - if (metaioctl(MD_IOCPROBE_DEV, iocp, &(iocp->mde), NULL) != 0) - retval = -1; - Free((void *)(uintptr_t)iocp->mnum_list); - return (retval); -} -/* - * - * - remove p from nlp list - * - put it on the toplp list. - * - update the p to the next element - */ - -void -add_to_list(mdnamelist_t **curpp, mdnamelist_t **prevpp, mdnamelist_t **newlpp) -{ - mdnamelist_t *p, *prevp, *nlp; - - p = *curpp; - prevp = *prevpp; - nlp = *newlpp; - - if (prevp == p) { - /* if first element reset prevp */ - prevp = p->next; - p->next = nlp; - nlp = p; - p = prevp; - } else { - prevp->next = p->next; - p->next = nlp; - nlp = p; - p = prevp->next; - } - *curpp = p; - *prevpp = prevp; - *newlpp = nlp; -} -/* - * Scans the given list of metadeivces and returns a list of top level - * metadevices. - * Note: The orignal list is not valid at the end and is set to NULL. - */ - -int -get_toplevel_mds(mdsetname_t *sp, mdnamelist_t **lpp, - mdnamelist_t **top_pp) -{ - mdnamelist_t *p, *prevp, *toplp; - int ntopmd; - md_common_t *mdp; - md_error_t e = mdnullerror; - - ntopmd = 0; - prevp = p = *lpp; - toplp = NULL; - - while (p) { - if ((mdp = meta_get_unit(sp, p->namep, &e)) == NULL) { - prevp = p; - p = p->next; - continue; - } - - if (mdp->parent == MD_NO_PARENT) { - /* increment the top level md count. */ - ntopmd++; - add_to_list(&p, &prevp, &toplp); - } else { - prevp = p; - p = p->next; - } - } - *lpp = NULL; - *top_pp = toplp; - - return (ntopmd); -} - -int -get_namelist(mdnamelist_t **transdevlist, mdnamelist_t **devlist, - char *dev_type) -{ - mdnamelist_t *np, *prevp; - md_error_t e = mdnullerror; - char *type_name; - int i = 0; - - prevp = np = *transdevlist; - while (np) { - if ((type_name = metagetmiscname(np->namep, &e)) == NULL) { - *devlist = NULL; - return (-1); - } - if (strcmp(type_name, dev_type) == 0) { - /* move it to the devlist */ - add_to_list(&np, &prevp, devlist); - i++; - } else { - prevp = np; - np = np->next; - } - } - return (i); -} - - -mdnamelist_t * -create_nlp(mdsetname_t *sp) -{ - mdnamelist_t *np; - md_error_t e = mdnullerror; - - if (np = (mdnamelist_t *)malloc(sizeof (mdnamelist_t))) { - np->next = NULL; - return (np); - } else { - /* error condition below */ - mde_perror(&e, "create_nlp: malloc failed\n"); - md_exit(sp, 1); - } - return (0); -} - -/* - * Create a list of metadevices associated with trans. top_pp points to - * this list. The number of components in the list are also returned. - */ -int -create_trans_compslist(mdsetname_t *sp, mdnamelist_t **lpp, - mdnamelist_t **top_pp) -{ - mdnamelist_t *p, *tailp, *toplp, *newlp; - int ntoptrans; - md_error_t e = mdnullerror; - md_trans_t *tp; - - ntoptrans = 0; - p = *lpp; - tailp = toplp = NULL; - /* - * Scan the current list of trans devices. From that - * extract all the lower level metadevices and put them on - * toplp list. - */ - - while (p) { - if (tp = meta_get_trans(sp, p->namep, &e)) { - /* - * Check the master and log devices to see if they - * are metadevices - */ - if (metaismeta(tp->masternamep)) { - /* get a mdnamelist_t. */ - newlp = create_nlp(sp); - newlp->namep = tp->masternamep; - if (toplp == NULL) { - toplp = tailp = newlp; - } else { - tailp->next = newlp; - tailp = newlp; - } - ntoptrans++; - } - - if (tp->lognamep && metaismeta(tp->lognamep)) { - newlp = create_nlp(sp); - newlp->namep = tp->lognamep; - if (toplp == NULL) { - toplp = tailp = newlp; - } else { - tailp->next = newlp; - tailp = newlp; - } - ntoptrans++; - } - p = p->next; - } - } - *top_pp = toplp; - return (ntoptrans); -} - -void -probe_mirror_devs(mdsetname_t *sp) -{ - mdnamelist_t *nlp, *toplp; - int cnt; - md_error_t e = mdnullerror; - - nlp = toplp = NULL; - - if (meta_get_mirror_names(sp, &nlp, 0, &e) > 0) { - /* - * We have some mirrors to probe - * get a list of top-level mirrors - */ - - cnt = get_toplevel_mds(sp, &nlp, &toplp); - if (cnt && (md_probe_ioctl(sp, toplp, cnt, MD_MIRROR) < 0)) - perror("MD_IOCPROBE_DEV"); - } else { - mdclrerror(&e); - } - metafreenamelist(nlp); - metafreenamelist(toplp); - -} - -void -probe_raid_devs(mdsetname_t *sp) -{ - mdnamelist_t *nlp, *toplp; - int cnt; - md_error_t e = mdnullerror; - - nlp = toplp = NULL; - - if (meta_get_raid_names(sp, &nlp, 0, &e) > 0) { - /* - * We have some mirrors to probe - * get a list of top-level mirrors - */ - - cnt = get_toplevel_mds(sp, &nlp, &toplp); - - if (cnt && (md_probe_ioctl(sp, toplp, cnt, MD_RAID) < 0)) - perror("MD_IOCPROBE_DEV"); - } else { - mdclrerror(&e); - } - metafreenamelist(nlp); - metafreenamelist(toplp); -} - -/* - * Trans probes are diffenent. -- so whats new. - * we separate out the master and log device and then issue the - * probe calls. - * Since the underlying device could be disk, stripe, RAID or miror, - * we have to sort them out and then call the ioctl for each. - */ - -void -probe_trans_devs(mdsetname_t *sp) -{ - mdnamelist_t *nlp, *toplp; - mdnamelist_t *trans_raidlp, *trans_mmlp, *trans_stripelp; - int cnt; - md_error_t e = mdnullerror; - - nlp = toplp = NULL; - trans_raidlp = trans_mmlp = trans_stripelp = NULL; - - if (meta_get_trans_names(sp, &nlp, 0, &e) > 0) { - /* - * get a list of master and log metadevices. - */ - - cnt = create_trans_compslist(sp, &nlp, &toplp); - - /* underlying RAID-5 components */ - - cnt = get_namelist(&toplp, &trans_raidlp, MD_RAID); - if ((cnt > 0) && (md_probe_ioctl(sp, trans_raidlp, cnt, - MD_RAID) < 0)) - perror("MD_IOCPROBE_DEV"); - - metafreenamelist(trans_raidlp); - - /* underlying mirror components */ - - cnt = get_namelist(&toplp, &trans_mmlp, MD_MIRROR); - - if ((cnt > 0) && (md_probe_ioctl(sp, trans_mmlp, cnt, - MD_MIRROR) < 0)) - perror("MD_IOCPROBE_DEV"); - - metafreenamelist(trans_mmlp); - - /* underlying stripe components */ - - cnt = get_namelist(&toplp, &trans_stripelp, MD_STRIPE); - if ((cnt > 0) && (md_probe_ioctl(sp, trans_stripelp, cnt, - MD_STRIPE) < 0)) - perror("MD_IOCPROBE_DEV"); - metafreenamelist(trans_stripelp); - metafreenamelist(nlp); - } else { - mdclrerror(&e); - } -} - -/* - * probe hot spares. This is differs from other approaches since - * there are no read/write routines through md. We check at the physical - * component level and then delete it if its bad. - */ - -void -probe_hotspare_devs(mdsetname_t *sp) -{ - mdhspnamelist_t *hspnlp = NULL; - mdhspnamelist_t *p; - md_hsp_t *hspp; - md_error_t e = mdnullerror; - - if (meta_get_hsp_names(sp, &hspnlp, 0, &e) <= 0) { - mdclrerror(&e); - return; - } - for (p = hspnlp; (p != NULL); p = p->next) { - mdhspname_t *hspnp = p->hspnamep; - - if ((hspp = meta_get_hsp(sp, hspnp, &e)) == NULL) - continue; - - if (hspp->hotspares.hotspares_len != 0) { - delete_hotspares_impl(sp, hspnp, hspp); - } - } - metafreehspnamelist(hspnlp); - mdclrerror(&e); -} - -static void -probe_all_devs(mdsetname_t *sp) -{ - probe_hotspare_devs(sp); - probe_mirror_devs(sp); - probe_raid_devs(sp); - probe_trans_devs(sp); -} - -/* - * The following functions are used to print the concise output - * of the metastat coommand (-c option). - * - * Normally the output for metastat is performed within libmeta via - * the *_report functions within each of the metadevice specific files in - * libmeta. However, it is usually bad architecture for a library to - * perform output since there are so many different ways that an application - * can choose to do output (e.g. GUI, CLI, CIM, SNMP, etc.). So, for the - * concise output option we have moved the CLI output to the metastat - * code and just use libmeta as the source of data to be printed. - * - * This function gets all of the different top-level metadevices in the set - * and prints them. It calls the print_concise_md() function to recursively - * print the metadevices that underly the top-level metadevices. It does - * special handling for soft partitions so that all of the SPs on the - * same underlying device are grouped and then that underlying device - * is only printed once. - */ -static void -print_concise_diskset(mdsetname_t *sp) -{ - md_error_t error = mdnullerror; - mdnamelist_t *nl = NULL; - mdhspnamelist_t *hsp_list = NULL; - - /* - * We do extra handling for soft parts since we want to find - * all of the SPs on the same underlying device, group them and - * print them together before printing the underlying device just - * once. This logic doesn't apply to any other metadevice type. - */ - if (meta_get_sp_names(sp, &nl, 0, &error) >= 0) { - mdnamelist_t *nlp; - /* keep track of the softparts on the same underlying device */ - struct sp_base_list *base_list = NULL; - - for (nlp = nl; nlp != NULL; nlp = nlp->next) { - mdname_t *mdn; - md_sp_t *soft_part; - mdnamelist_t *tnlp; - - mdn = metaname(&sp, nlp->namep->cname, - META_DEVICE, &error); - mdclrerror(&error); - if (mdn == NULL) { - print_concise_entry(0, nlp->namep->cname, - 0, 'p'); - (void) printf("\n"); - continue; - } - - soft_part = meta_get_sp_common(sp, mdn, 1, &error); - mdclrerror(&error); - - if (soft_part == NULL || - MD_HAS_PARENT(soft_part->common.parent) || - sp_done(soft_part, base_list)) - continue; - - /* print this soft part */ - print_concise_entry(0, soft_part->common.namep->cname, - soft_part->common.size, 'p'); - (void) printf(" %s\n", soft_part->compnamep->cname); - - /* - * keep track of the underlying device of - * this soft part - */ - base_list = sp_add_done(soft_part, base_list); - - /* - * now print all of the other soft parts on the same - * underlying device - */ - for (tnlp = nlp->next; tnlp != NULL; tnlp = - tnlp->next) { - md_sp_t *part; - - mdn = metaname(&sp, tnlp->namep->cname, - META_DEVICE, &error); - - mdclrerror(&error); - if (mdn == NULL) - continue; - - part = meta_get_sp_common(sp, mdn, 1, &error); - mdclrerror(&error); - - if (part == NULL || MD_HAS_PARENT( - part->common.parent) || - ! sp_match(part, base_list)) - continue; - - /* on the same base so print this soft part */ - print_concise_entry(0, - part->common.namep->cname, - part->common.size, 'p'); - (void) printf(" %s\n", part->compnamep->cname); - } - - /* - * print the common metadevice hierarchy - * under these soft parts - */ - print_concise_md(META_INDENT, sp, soft_part->compnamep); - } - - free_names(&nl); - sp_free_list(base_list); - } - mdclrerror(&error); - - if (meta_get_trans_names(sp, &nl, 0, &error) >= 0) - print_concise_namelist(sp, &nl, 't'); - mdclrerror(&error); - - if (meta_get_mirror_names(sp, &nl, 0, &error) >= 0) - print_concise_namelist(sp, &nl, 'm'); - mdclrerror(&error); - - if (meta_get_raid_names(sp, &nl, 0, &error) >= 0) - print_concise_namelist(sp, &nl, 'r'); - mdclrerror(&error); - - if (meta_get_stripe_names(sp, &nl, 0, &error) >= 0) - print_concise_namelist(sp, &nl, 's'); - mdclrerror(&error); - - if (meta_get_hsp_names(sp, &hsp_list, 0, &error) >= 0) { - mdhspnamelist_t *nlp; - - for (nlp = hsp_list; nlp != NULL; nlp = nlp->next) { - md_hsp_t *hsp; - - print_concise_entry(0, nlp->hspnamep->hspname, 0, 'h'); - - hsp = meta_get_hsp_common(sp, nlp->hspnamep, 1, &error); - mdclrerror(&error); - if (hsp != NULL) { - int i; - - for (i = 0; i < hsp->hotspares.hotspares_len; i++) { - md_hs_t *hs; - char *state; - - hs = &hsp->hotspares.hotspares_val[i]; - - (void) printf(" %s", hs->hsnamep->cname); - - state = get_hs_state(hs); - if (state != NULL) - (void) printf(" (%s)", state); - } - } - - (void) printf("\n"); - } - - mdclrerror(&error); - metafreehspnamelist(hsp_list); - } -} - -/* - * Print the top-level metadevices in the name list for concise output. - */ -static void -print_concise_namelist(mdsetname_t *sp, mdnamelist_t **nl, char mtype) -{ - mdnamelist_t *nlp; - md_error_t error = mdnullerror; - - for (nlp = *nl; nlp != NULL; nlp = nlp->next) { - mdname_t *mdn; - md_common_t *u; - - mdn = metaname(&sp, nlp->namep->cname, META_DEVICE, &error); - mdclrerror(&error); - if (mdn == NULL) { - print_concise_entry(0, nlp->namep->cname, 0, mtype); - (void) printf("\n"); - continue; - } - - u = get_concise_unit(sp, mdn, &error); - mdclrerror(&error); - - if (u != NULL && !MD_HAS_PARENT(u->parent)) - print_concise_md(0, sp, mdn); - } - - free_names(nl); -} - -/* - * Concise mirror output. - */ -static void -print_concise_mirror(int indent, mdsetname_t *sp, md_mirror_t *mirror) -{ - md_error_t error = mdnullerror; - int i; - md_status_t status = mirror->common.state; - - if (mirror == NULL) - return; - - print_concise_entry(indent, mirror->common.namep->cname, - mirror->common.size, 'm'); - - for (i = 0; i < NMIRROR; i++) { - uint_t tstate = 0; - char *state; - - if (mirror->submirrors[i].submirnamep == NULL) - continue; - (void) printf(" %s", mirror->submirrors[i].submirnamep->cname); - - if (mirror->submirrors[i].state & SMS_OFFLINE) { - (void) printf(gettext(" (offline)")); - continue; - } - - if (metaismeta(mirror->submirrors[i].submirnamep)) - (void) meta_get_tstate( - mirror->submirrors[i].submirnamep->dev, - &tstate, &error); - - mdclrerror(&error); - state = get_sm_state(mirror, i, status, tstate); - if (state != NULL) - (void) printf(" (%s)", state); - } - - (void) printf("\n"); - - indent += META_INDENT; - for (i = 0; i < NMIRROR; i++) { - if (mirror->submirrors[i].submirnamep == NULL) - continue; - - print_concise_md(indent, sp, mirror->submirrors[i].submirnamep); - } -} - -/* - * Concise raid output. - */ -static void -print_concise_raid(int indent, mdsetname_t *sp, md_raid_t *raid) -{ - md_error_t error = mdnullerror; - int i; - uint_t tstate = 0; - - if (raid == NULL) - return; - - print_concise_entry(indent, raid->common.namep->cname, - raid->common.size, 'r'); - - if (metaismeta(raid->common.namep)) - (void) meta_get_tstate(raid->common.namep->dev, - &tstate, &error); - - for (i = 0; i < raid->cols.cols_len; i++) { - md_raidcol_t *colp = &raid->cols.cols_val[i]; - mdname_t *namep = ((colp->hsnamep != NULL) ? - colp->hsnamep : colp->colnamep); - char *hsname = ((colp->hsnamep != NULL) ? - colp->hsnamep->cname : NULL); - char *col_state = NULL; - - (void) printf(" %s", colp->colnamep->cname); - - if (metaismeta(namep)) { - uint_t tstate = 0; - - (void) meta_get_tstate(namep->dev, &tstate, &error); - mdclrerror(&error); - col_state = get_raid_col_state(colp, tstate); - - } else { - if (tstate != 0) - col_state = "-"; - else - col_state = get_raid_col_state(colp, tstate); - } - - if (col_state != NULL) { - if (hsname != NULL) - (void) printf(" (%s-%s)", col_state, hsname); - else - (void) printf(" (%s)", col_state); - - } else if (hsname != NULL) { - (void) printf(gettext(" (spared-%s)"), hsname); - } - } - - (void) printf("\n"); - - indent += META_INDENT; - for (i = 0; i < raid->cols.cols_len; i++) { - print_concise_md(indent, sp, raid->cols.cols_val[i].colnamep); - } -} - -/* - * Concise stripe output. - */ -static void -print_concise_stripe(int indent, mdsetname_t *sp, md_stripe_t *stripe) -{ - md_error_t error = mdnullerror; - int i; - uint_t top_tstate = 0; - - if (stripe == NULL) - return; - - print_concise_entry(indent, stripe->common.namep->cname, - stripe->common.size, 's'); - - if (metaismeta(stripe->common.namep)) - (void) meta_get_tstate(stripe->common.namep->dev, &top_tstate, - &error); - mdclrerror(&error); - - for (i = 0; i < stripe->rows.rows_len; i++) { - md_row_t *rowp; - int j; - - rowp = &stripe->rows.rows_val[i]; - - for (j = 0; j < rowp->comps.comps_len; j++) { - md_comp_t *comp; - uint_t tstate = 0; - char *comp_state = NULL; - char *hsname; - - comp = &rowp->comps.comps_val[j]; - (void) printf(" %s", comp->compnamep->cname); - - if (metaismeta(comp->compnamep)) { - uint_t tstate = 0; - (void) meta_get_tstate(comp->compnamep->dev, - &tstate, &error); - mdclrerror(&error); - comp_state = get_stripe_state(comp, tstate); - } else { - if (top_tstate != 0) - comp_state = "-"; - else - comp_state = get_stripe_state(comp, tstate); - } - - hsname = ((comp->hsnamep != NULL) ? - comp->hsnamep->cname : NULL); - - if (comp_state != NULL) { - if (hsname != NULL) - (void) printf(" (%s-%s)", - comp_state, hsname); - else - (void) printf(" (%s)", comp_state); - - } else if (hsname != NULL) { - (void) printf(gettext(" (spared-%s)"), hsname); - } - } - } - - (void) printf("\n"); - - indent += META_INDENT; - for (i = 0; i < stripe->rows.rows_len; i++) { - md_row_t *rowp; - int j; - - rowp = &stripe->rows.rows_val[i]; - - for (j = 0; j < rowp->comps.comps_len; j++) { - print_concise_md(indent, sp, - rowp->comps.comps_val[j].compnamep); - } - } -} - -/* - * Concise soft partition output. - */ -static void -print_concise_sp(int indent, mdsetname_t *sp, md_sp_t *part) -{ - if (part == NULL) - return; - - print_concise_entry(indent, part->common.namep->cname, - part->common.size, 'p'); - - (void) printf(" %s\n", part->compnamep->cname); - - print_concise_md(indent + META_INDENT, sp, part->compnamep); -} - -/* - * Concise trans output. - */ -static void -print_concise_trans(int indent, mdsetname_t *sp, md_trans_t *trans) -{ - if (trans == NULL) - return; - - print_concise_entry(indent, trans->common.namep->cname, - trans->common.size, 't'); - - if (trans->masternamep != NULL) - (void) printf(" %s", trans->masternamep->cname); - - if (trans->lognamep != NULL) - (void) printf(" %s", trans->lognamep->cname); - - (void) printf("\n"); - - indent += META_INDENT; - - print_concise_md(indent, sp, trans->masternamep); - - print_concise_md(indent, sp, trans->lognamep); -} - -/* - * Recursive function for concise metadevice nested output. - */ -static void -print_concise_md(int indent, mdsetname_t *sp, mdname_t *np) -{ - md_error_t error = mdnullerror; - md_unit_t *u; - md_mirror_t *mirror; - md_raid_t *raid; - md_sp_t *soft_part; - md_stripe_t *stripe; - md_trans_t *trans; - - if (np == NULL || !metaismeta(np)) - return; - - if ((u = meta_get_mdunit(sp, np, &error)) == NULL) { - mdclrerror(&error); - return; - } - - switch (u->c.un_type) { - case MD_DEVICE: - stripe = meta_get_stripe_common(sp, np, 1, &error); - print_concise_stripe(indent, sp, stripe); - break; - - case MD_METAMIRROR: - mirror = meta_get_mirror(sp, np, &error); - print_concise_mirror(indent, sp, mirror); - break; - - case MD_METATRANS: - trans = meta_get_trans_common(sp, np, 1, &error); - print_concise_trans(indent, sp, trans); - break; - - case MD_METARAID: - raid = meta_get_raid_common(sp, np, 1, &error); - print_concise_raid(indent, sp, raid); - break; - - case MD_METASP: - soft_part = meta_get_sp_common(sp, np, 1, &error); - print_concise_sp(indent, sp, soft_part); - break; - - default: - return; - } - mdclrerror(&error); -} - -/* - * Given a name get the unit for use in concise output. We use the *_common - * routines in libmeta which allow us to specify the "fast" flag, thereby - * avoiding the DKIOCGGEOM ioctl that normally happens. - */ -static md_common_t * -get_concise_unit(mdsetname_t *sp, mdname_t *np, md_error_t *ep) -{ - char *miscname; - - /* short circuit */ - if (np->drivenamep->unitp != NULL) - return (np->drivenamep->unitp); - if (metachkmeta(np, ep) != 0) - return (NULL); - - /* dispatch */ - if ((miscname = metagetmiscname(np, ep)) == NULL) - return (NULL); - else if (strcmp(miscname, MD_STRIPE) == 0) - return ((md_common_t *)meta_get_stripe_common(sp, np, 1, ep)); - else if (strcmp(miscname, MD_MIRROR) == 0) - return ((md_common_t *)meta_get_mirror(sp, np, ep)); - else if (strcmp(miscname, MD_TRANS) == 0) - return ((md_common_t *)meta_get_trans_common(sp, np, 1, ep)); - else if (strcmp(miscname, MD_RAID) == 0) - return ((md_common_t *)meta_get_raid_common(sp, np, 1, ep)); - else if (strcmp(miscname, MD_SP) == 0) - return ((md_common_t *)meta_get_sp_common(sp, np, 1, ep)); - else { - (void) mdmderror(ep, MDE_UNKNOWN_TYPE, meta_getminor(np->dev), - np->cname); - return (NULL); - } -} - -static void -free_names(mdnamelist_t **nlp) -{ - mdnamelist_t *p; - - for (p = *nlp; p != NULL; p = p->next) { - meta_invalidate_name(p->namep); - p->namep = NULL; - } - metafreenamelist(*nlp); - *nlp = NULL; -} - -/* - * Submirror state for concise output. - */ -static char * -get_sm_state(md_mirror_t *mirror, int i, md_status_t mirror_status, - uint_t tstate) -{ - sm_state_t state = mirror->submirrors[i].state; - uint_t is_target = - mirror->submirrors[i].flags & MD_SM_RESYNC_TARGET; - - /* - * Only return Unavailable if there is no flagged error on the - * submirror. If the mirror has received any writes since the submirror - * went into Unavailable state a resync is required. To alert the - * administrator to this we return a 'Needs maintenance' message. - */ - if ((tstate != 0) && (state & SMS_RUNNING)) - return (gettext("unavail")); - - /* all is well */ - if (state & SMS_RUNNING) { - if (!(mirror_status & MD_UN_OPT_NOT_DONE) || - ((mirror_status & MD_UN_OPT_NOT_DONE) && !is_target)) - return (NULL); - } - - /* resyncing, needs repair */ - if ((state & (SMS_COMP_RESYNC | SMS_ATTACHED_RESYNC | - SMS_OFFLINE_RESYNC)) || (mirror_status & MD_UN_OPT_NOT_DONE)) { - static char buf[MAXPATHLEN]; - - if (mirror_status & MD_UN_RESYNC_ACTIVE) { - - if (mirror->common.revision & MD_64BIT_META_DEV) { - (void) snprintf(buf, sizeof (buf), - gettext("resync-%2d.%1d%%"), - mirror->percent_done / 10, - mirror->percent_done % 10); - } else { - (void) snprintf(buf, sizeof (buf), - gettext("resync-%d%%"), mirror->percent_done); - } - return (buf); - } - return (gettext("maint")); - } - - /* needs repair */ - if (state & (SMS_COMP_ERRED | SMS_ATTACHED | SMS_OFFLINE)) - return (gettext("maint")); - - /* unknown */ - return (gettext("unknown")); -} - -/* - * Raid component state for concise output. - */ -static char * -get_raid_col_state(md_raidcol_t *colp, uint_t tstate) -{ - if (tstate != 0) - return (gettext("unavail")); - - return (meta_get_raid_col_state(colp->state)); -} - -/* - * Stripe state for concise output. - */ -static char * -get_stripe_state(md_comp_t *mdcp, uint_t tstate) -{ - comp_state_t state = mdcp->state; - - if (tstate != 0) - return ("unavail"); - - return (meta_get_stripe_state(state)); -} - -/* - * Hostspare state for concise output. - */ -static char * -get_hs_state(md_hs_t *hsp) -{ - hotspare_states_t state = hsp->state; - - return (meta_get_hs_state(state)); -} - - -/* - * Keep track of printed soft partitions for concise output. - */ -static struct sp_base_list * -sp_add_done(md_sp_t *part, struct sp_base_list *lp) -{ - struct sp_base_list *n; - - n = (struct sp_base_list *)malloc(sizeof (struct sp_base_list)); - if (n == NULL) - return (lp); - - if ((n->base = strdup(part->compnamep->cname)) == NULL) { - free(n); - return (lp); - } - - n->next = lp; - - return (n); -} - -/* - * Keep track of printed soft partitions for concise output. - */ -static int -sp_done(md_sp_t *part, struct sp_base_list *lp) -{ - for (; lp != NULL; lp = lp->next) { - if (strcmp(lp->base, part->compnamep->cname) == 0) - return (1); - } - - return (0); -} - -/* - * Check the first element for a match. - */ -static int -sp_match(md_sp_t *part, struct sp_base_list *lp) -{ - if (lp != NULL && strcmp(lp->base, part->compnamep->cname) == 0) - return (1); - - return (0); -} - -/* - * Free memory used for soft partition printed status in concise output. - */ -static void -sp_free_list(struct sp_base_list *lp) -{ - struct sp_base_list *n; - - for (; lp != NULL; lp = n) { - n = lp->next; - free(lp->base); - free(lp); - } -} diff --git a/usr/src/cmd/lvm/util/metasync.c b/usr/src/cmd/lvm/util/metasync.c deleted file mode 100644 index 4d36d35cf604..000000000000 --- a/usr/src/cmd/lvm/util/metasync.c +++ /dev/null @@ -1,294 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -/* - * sync metadevices - */ - -#include - -#include - -#include - -#include - -/* - * print usage message - */ -static void -usage( - mdsetname_t *sp, - int eval -) -{ - (void) fprintf(stderr, gettext("\ -usage: %s [-s setname] -r [buffer_size]\n\ - %s [-s setname] [buffer_size] metadevices...\n\ - %s [-s setname] -c metadevices...\n"), - myname, myname, myname); - md_exit(sp, eval); -} - -/* - * crack command line arguments. - */ -int -main( - int argc, - char *argv[] -) -{ - char *sname = NULL; - mdsetname_t *sp = NULL; - int rflag = 0; - int pflag = 0; - daddr_t size = 0; - int c; - md_error_t status = mdnullerror; - md_error_t *ep = &status; - int rval = 0; - int error; - md_resync_cmd_t resync_cmd = MD_RESYNC_START; - bool_t called_thru_rpc = FALSE; - char *cp; - int mn_set = FALSE; - int cflag = 0; - - /* - * Get the locale set up before calling any other routines - * with messages to ouput. Just in case we're not in a build - * environment, make sure that TEXT_DOMAIN gets set to - * something. - */ -#if !defined(TEXT_DOMAIN) -#define TEXT_DOMAIN "SYS_TEST" -#endif - (void) setlocale(LC_ALL, ""); - (void) textdomain(TEXT_DOMAIN); - - if ((cp = strstr(argv[0], ".rpc_call")) == NULL) { - if (sdssc_bind_library() == SDSSC_OKAY) - if (sdssc_cmd_proxy(argc, argv, SDSSC_PROXY_PRIMARY, - &error) == SDSSC_PROXY_DONE) - exit(error); - } else { - *cp = '\0'; /* cut off ".rpc_call" */ - called_thru_rpc = TRUE; - } - - - /* initialize */ - if (md_init(argc, argv, 0, 1, ep) != 0 || - meta_check_root(ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - /* parse args */ - optind = 1; - opterr = 1; - while ((c = getopt(argc, argv, "phs:rc?")) != -1) { - switch (c) { - case 'h': - usage(sp, 0); - break; - - case 's': - sname = optarg; - break; - - case 'r': - ++rflag; - break; - - case 'p': - ++pflag; - break; - - case 'c': - ++cflag; - resync_cmd = MD_RESYNC_KILL; - break; - - case '?': - if (optopt == '?') - usage(sp, 0); - /*FALLTHROUGH*/ - default: - usage(sp, 1); - break; - } - } - if ((pflag + rflag) > 1) { - usage(sp, 1); - mde_perror(ep, ""); - md_exit(sp, 1); - } - argc -= optind; - argv += optind; - - if (sname != NULL) { - if ((sp = metasetname(sname, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - } - - /* - * look for buffer size. If one is not specified we pass '0' to - * the meta_resync_all() call. This uses whatever size has been - * configured via md_mirror:md_resync_bufsz - * The default value (if not overridden in /etc/system) is - * MD_DEF_RESYNC_BUF_SIZE - */ - if ((argc > 0) && (isdigit(argv[0][0]))) { - if ((size = atoi(argv[0])) < 0) { - md_eprintf(gettext( - "illegal buffer size %s\n"), - argv[0]); - md_exit(sp, 1); - } - --argc; - ++argv; - } - - /* sync all devices in set */ - if (rflag) { - /* get set */ - if (argc != 0) - usage(sp, 1); - if ((sp == NULL) && - ((sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) && - (metaget_setdesc(sp, ep) == NULL)) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - assert(sp != NULL); - /* - * For a MN set "metasync -r" can only be called by the - * initiator. We must not take the set lock for a MN set as - * it will only generate individual metasync commands which - * will individually take the lock when executing the - * individual metasync commands. - * Therefore only take the set lock for non MN sets. - */ - if (meta_is_mn_set(sp, ep) == 0) { - /* grab set lock */ - if (meta_lock(sp, TRUE, ep)) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - /* check for ownership */ - if (meta_check_ownership(sp, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - } - /* resync all metadevices in set */ - if (meta_resync_all(sp, size, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - md_exit(sp, 0); - } - - /* sync specified metadevices */ - if (argc <= 0) - usage(sp, 1); - - /* - * Note that if sp is NULL, meta_is_mn_name() derives sp - * from argv[0] which is the metadevice arg - */ - if (meta_is_mn_name(&sp, argv[0], ep)) - mn_set = TRUE; - - for (; (argc > 0); --argc, ++argv) { - mdname_t *np; - int result; - - /* get device */ - if ((np = metaname(&sp, argv[0], META_DEVICE, ep)) == NULL) { - mde_perror(ep, ""); - rval = -1; - continue; - } - assert(sp != NULL); - - /* - * If we are not called through an rpc call and the - * set associated with the command is an MN set, send - * a setsync message to the master of the set and let it - * deal with it. - */ - if (!called_thru_rpc && mn_set) { - if ((result = meta_mn_send_setsync(sp, np, size, - ep)) != 0) { - mde_perror(ep, "Unable to start resync"); - md_exit(sp, result); - } - continue; - } - - /* grab set lock */ - if (meta_lock(sp, TRUE, ep)) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - /* check for ownership */ - if (meta_check_ownership(sp, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); /* no point in continuing */ - } - - /* resync or regen (raid only) metadevice */ - if (pflag) { - /* regen */ - if (meta_raid_regen_byname(sp, np, size, ep) != 0) { - mde_perror(ep, ""); - rval = -1; - continue; - } - } else { - if (meta_resync_byname(sp, np, size, ep, resync_cmd) - != 0) { - mde_perror(ep, ""); - rval = -1; - continue; - } - } - } - - /* return success */ - md_exit(sp, rval); - /*NOTREACHED*/ - return (rval); -} diff --git a/usr/src/cmd/lvm/util/metasync.xml b/usr/src/cmd/lvm/util/metasync.xml deleted file mode 100644 index e9b501addc31..000000000000 --- a/usr/src/cmd/lvm/util/metasync.xml +++ /dev/null @@ -1,110 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/usr/src/cmd/lvm/util/metattach.c b/usr/src/cmd/lvm/util/metattach.c deleted file mode 100644 index 8bd955105348..000000000000 --- a/usr/src/cmd/lvm/util/metattach.c +++ /dev/null @@ -1,546 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -/* - * attach submirrors - */ - -#include - -#include - -/* - * print usage message - */ -static void -usage( - mdsetname_t *sp, - int eval -) -{ - (void) fprintf(stderr, gettext("\ -usage: %s [-s setname] mirror [metadevice]\n\ - %s [-s setname] [-i interlace] concat/stripe component...\n\ - %s [-s setname] RAID component...\n\ - %s [-s setname] [-A alignment] softpart size|all\n"), - myname, myname, myname, myname); - md_exit(sp, eval); -} - -/* - * attach more space to a soft partition - */ -static int -sp_attach( - mdsetname_t **spp, - mdname_t *spnp, - int argc, - char *argv[], - mdcmdopts_t options, - md_error_t *ep -) -{ - int c; - sp_ext_offset_t alignment = 0; - - /* reset and parse args */ - optind = 1; - opterr = 1; - while ((c = getopt(argc, argv, "ns:A:")) != -1) { - switch (c) { - case 'n': - case 's': - break; - case 'A': - if (meta_sp_parsesize(optarg, &alignment) == -1) { - usage(*spp, 1); - /* NOTREACHED */ - } - break; - default: - usage(*spp, 1); - /* NOTREACHED */ - break; - } - } - argc -= optind + 1; - argv += optind + 1; - - if (argc != 1) - usage(*spp, 1); - - if (meta_sp_attach(*spp, spnp, argv[0], options, alignment, ep) != 0) { - return (-1); - } - - /* update md.cf file */ - if (meta_update_md_cf(*spp, ep) != 0) - return (-1); - - return (0); -} -/* - * attach components to stripe - */ -static int -stripe_attach( - mdsetname_t **spp, - mdname_t *stripenp, - int argc, - char *argv[], - mdcmdopts_t options, - md_error_t *ep -) -{ - diskaddr_t interlace = 0; - int c; - mdnamelist_t *compnlp = NULL; - mdnamelist_t *p; - mdname_t *currootnp; - md_stripe_t *stripep; - md_row_t *rp; - md_comp_t *cp; - - - /* reset and parse args */ - optind = 1; - opterr = 1; - while ((c = getopt(argc, argv, "s:ani:")) != -1) { - switch (c) { - case 'n': - case 's': - break; - - case 'a': - break; /* obsolete */ - - case 'i': - if (parse_interlace(stripenp->cname, optarg, - &interlace, ep) != 0) { - return (-1); - } - if (meta_stripe_check_interlace(interlace, - stripenp->cname, ep)) - return (-1); - break; - - default: - usage(*spp, 1); - /*NOTREACHED*/ - break; - } - } - - argc -= optind + 1; - argv += optind + 1; - - if (argc <= 0) - usage(*spp, 1); - - /* get list of components */ - if (metanamelist(spp, &compnlp, argc, argv, - UNKNOWN, ep) < 0) - return (-1); - assert(compnlp != NULL); - for (p = compnlp; (p != NULL); p = p->next) { - mdname_t *compnp = p->namep; - - /* see if we are a soft partition */ - if (meta_sp_issp(*spp, compnp, ep) != 0) { - /* nope, check component */ - if (metachkcomp(compnp, ep) != 0) - return (-1); - } - } - - /* get root device */ - if ((currootnp = meta_get_current_root_dev(*spp, ep)) != NULL) { - /* - * Root is either a stripe or a slice - * If root device is the 1st component of the stripe - * Then fail as root cannot be expanded - */ - if ((stripep = meta_get_stripe(*spp, stripenp, ep)) == NULL) - return (-1); - - rp = &stripep->rows.rows_val[0]; - cp = &rp->comps.comps_val[0]; - if (metachkcomp(cp->compnamep, ep) == 0) { - /* Component is a disk */ - if (strcmp(currootnp->cname, - cp->compnamep->cname) == 0) { - md_eprintf(gettext( - "%s: volume mounted as root cannot be " - "expanded\n"), stripenp->cname); - md_exit(*spp, 1); - } - } - } - - /* attach components */ - if (meta_stripe_attach(*spp, stripenp, compnlp, interlace, options, - ep) != 0) { - return (-1); - } - - /* update md.cf file */ - if (meta_update_md_cf(*spp, ep) != 0) - return (-1); - - /* return success */ - return (0); -} - -/* - * attach components to raid - */ -static int -raid_attach( - mdsetname_t **spp, - mdname_t *raidnp, - int argc, - char *argv[], - mdcmdopts_t options, - md_error_t *ep -) -{ - int c; - mdnamelist_t *compnlp = NULL; - mdnamelist_t *p; - - /* reset and parse args */ - optind = 1; - opterr = 1; - while ((c = getopt(argc, argv, "s:ai:")) != -1) { - switch (c) { - case 'n': - case 's': - break; - - case 'a': - break; /* obsolete */ - - default: - usage(*spp, 1); - /*NOTREACHED*/ - break; - } - } - argc -= optind + 1; - argv += optind + 1; - if (argc <= 0) - usage(*spp, 1); - - /* get list of components */ - if (metanamelist(spp, &compnlp, argc, argv, - UNKNOWN, ep) < 0) - return (-1); - assert(compnlp != NULL); - for (p = compnlp; (p != NULL); p = p->next) { - mdname_t *compnp = p->namep; - - /* check for soft partitions */ - if (meta_sp_issp(*spp, compnp, ep) != 0) { - /* check disk */ - if (metachkcomp(compnp, ep) != 0) - return (-1); - } - } - - /* attach components */ - if (meta_raid_attach(*spp, raidnp, compnlp, options, ep) != 0) - return (-1); - - /* update md.cf file */ - if (meta_update_md_cf(*spp, ep) != 0) - return (-1); - - /* return success */ - return (0); -} - -/* - * attach submirror to mirror - */ -static int -mirror_attach( - mdsetname_t **spp, - mdname_t *mirnp, - int argc, - char *argv[], - mdcmdopts_t options, - md_error_t *ep -) -{ - int c; - mdname_t *submirnp; - - /* reset and parse args */ - optind = 1; - opterr = 1; - while ((c = getopt(argc, argv, "ns:")) != -1) { - switch (c) { - case 'n': - case 's': - break; - - default: - usage(*spp, 1); - /*NOTREACHED*/ - break; - } - } - argc -= optind + 1; - argv += optind + 1; - - /* get submirror */ - if (argc == 1) { - if (((submirnp = metaname(spp, argv[0], META_DEVICE, - ep)) == NULL) || - (metachkmeta(submirnp, ep) != 0)) { - return (-1); - } - } else if (argc == 0) { - submirnp = NULL; - } else { - usage(*spp, 1); - } - - /* attach submirror */ - if (meta_mirror_attach(*spp, mirnp, submirnp, options, ep) != 0) - return (-1); - - /* update md.cf file */ - if (meta_update_md_cf(*spp, ep) != 0) - return (-1); - - /* return success */ - return (0); -} - -/* - * attach devices - */ -int -main( - int argc, - char *argv[] -) -{ - char *sname = NULL; - mdsetname_t *sp = NULL; - mdcmdopts_t options = (MDCMD_PRINT|MDCMD_DOIT); - mdname_t *np; - char *miscname; - int c; - md_error_t status = mdnullerror; - md_error_t *ep = &status; - int error; - bool_t called_thru_rpc = FALSE; - char *cp; - - /* - * Get the locale set up before calling any other routines - * with messages to ouput. Just in case we're not in a build - * environment, make sure that TEXT_DOMAIN gets set to - * something. - */ -#if !defined(TEXT_DOMAIN) -#define TEXT_DOMAIN "SYS_TEST" -#endif - (void) setlocale(LC_ALL, ""); - (void) textdomain(TEXT_DOMAIN); - - /* initialize */ - if ((cp = strstr(argv[0], ".rpc_call")) == NULL) { - if (sdssc_bind_library() == SDSSC_OKAY) - if (sdssc_cmd_proxy(argc, argv, SDSSC_PROXY_PRIMARY, - &error) == SDSSC_PROXY_DONE) - exit(error); - } else { - *cp = '\0'; /* cut off ".rpc_call" */ - called_thru_rpc = TRUE; - } - - if (md_init(argc, argv, 0, 1, ep) != 0 || - meta_check_root(ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - /* find set and metadevice first */ - optind = 1; - opterr = 1; - while ((c = getopt(argc, argv, "hns:A:ai:?")) != -1) { - switch (c) { - case 'h': - usage(sp, 0); - break; - - case 'n': - if (called_thru_rpc == TRUE) { - options &= ~MDCMD_DOIT; - } else { - usage(sp, 1); - } - break; - - case 's': - sname = optarg; - break; - - case '?': - if (optopt == '?') - usage(sp, 0); - break; - } - } - if ((argc - optind) <= 0) - usage(sp, 1); - - if (sname != NULL) { - if ((sp = metasetname(sname, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - } - - if (((np = metaname(&sp, argv[optind], META_DEVICE, ep)) == NULL) || - (metachkmeta(np, ep) != 0)) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - assert(sp != NULL); - - if ((called_thru_rpc == FALSE) && - meta_is_mn_name(&sp, argv[optind], ep)) { - /* - * If we are dealing with a MN set and we were not - * called thru an rpc call, we are just to send this - * command string to the master of the set and let it - * deal with it. - * Note that if sp is NULL, meta_is_mn_name() derives sp - * from argv[optind] which is the metadevice arg - */ - int i; - int newargc; - int result; - char **newargv; - - if ((miscname = metagetmiscname(np, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - newargv = calloc(argc+1, sizeof (char *)); - newargv[0] = "metattach"; - newargv[1] = "-n"; /* always do "-n" first */ - newargc = 2; - for (i = 1; i < argc; i++, newargc++) - newargv[newargc] = argv[i]; - - result = meta_mn_send_command(sp, newargc, newargv, - MD_DISP_STDERR | MD_DRYRUN, NO_CONTEXT_STRING, ep); - - /* If we found a problem don't do it for real */ - if (result != 0) { - md_exit(sp, result); - } - - /* - * Do it for real now. Remove "-n" from the arguments and - * MD_DRYRUN from the flags. If we fail now, the master must - * panic as the mddbs may be inconsistent. - */ - newargv[1] = ""; /* this was "-n" before */ - result = meta_mn_send_command(sp, newargc, newargv, - MD_DISP_STDERR | MD_RETRY_BUSY | MD_PANIC_WHEN_INCONSISTENT, - NO_CONTEXT_STRING, ep); - - free(newargv); - - /* - * If the metattach command succeeds, for a mirror, send a - * resync starting message for the metadevice - */ - if ((result == 0) && (strcmp(miscname, MD_MIRROR) == 0)) - if ((result = meta_mn_send_resync_starting(np, ep)) - != 0) - mde_perror(ep, "Unable to start resync"); - md_exit(sp, result); - } - - if (meta_lock(sp, TRUE, ep)) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - if (meta_check_ownership(sp, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - if ((miscname = metagetmiscname(np, ep)) == NULL) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - - /* dispatch based on device type */ - if (strcmp(miscname, MD_STRIPE) == 0) { - if (stripe_attach(&sp, np, argc, argv, options, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - } else if (strcmp(miscname, MD_RAID) == 0) { - if (raid_attach(&sp, np, argc, argv, options, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - } else if (strcmp(miscname, MD_MIRROR) == 0) { - if (mirror_attach(&sp, np, argc, argv, options, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - } else if (strcmp(miscname, MD_TRANS) == 0) { - md_eprintf(gettext(MD_EOF_TRANS_MSG)); - md_exit(sp, 1); - } else if (strcmp(miscname, MD_SP) == 0) { - if (sp_attach(&sp, np, argc, argv, options, ep) != 0) { - mde_perror(ep, ""); - md_exit(sp, 1); - } - } else { - md_eprintf(gettext( - "%s: invalid metadevice type %s\n"), - np->cname, miscname); - md_exit(sp, 1); - } - - /* return success */ - md_exit(sp, 0); - /*NOTREACHED*/ - return (0); -} diff --git a/usr/src/cmd/lvm/util/sparc/Makefile b/usr/src/cmd/lvm/util/sparc/Makefile deleted file mode 100644 index ae48efee01cf..000000000000 --- a/usr/src/cmd/lvm/util/sparc/Makefile +++ /dev/null @@ -1,32 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 1996-2002 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# ident "%Z%%M% %I% %E% SMI" -# -# Makefile for logical volume management -# -# cmd/lvm/util/sparc/Makefile - -include ../Makefile.com diff --git a/usr/src/cmd/lvm/util/svc-metainit b/usr/src/cmd/lvm/util/svc-metainit deleted file mode 100644 index 12536428e869..000000000000 --- a/usr/src/cmd/lvm/util/svc-metainit +++ /dev/null @@ -1,75 +0,0 @@ -#!/sbin/sh -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -#pragma ident "%Z%%M% %I% %E% SMI" -# -# Copyright 2004 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# Metadisk initialization. - -METAINIT=/sbin/metainit -METADEV=/dev/md/admin - -. /lib/svc/share/smf_include.sh - -if [ ! -s /kernel/drv/md.conf ]; then - echo "/kernel/drv/md.conf is missing or empty." - exit 0 -fi - -if [ ! -c $METADEV ]; then - echo "$METADEV is missing or not a character device." - exit 0 -fi - -$METAINIT -r -error=$? -case "$error" in - 0|1) exit 0 - ;; - - 66) -echo "Insufficient metadevice database replicas located." -echo -echo "Use metadb to delete databases which are broken." -echo "Ignore any "Read-only file system" error messages." -echo "Reboot the system when finished to reload the metadevice database." -echo "After reboot, repair any broken database replicas which were deleted." - -echo "Insufficient metadevice database replicas located." >/dev/console -echo >/dev/console -echo "Use metadb to delete databases which are broken." >/dev/console -echo "Ignore any "Read-only file system" error messages." >/dev/console -echo "Reboot the system when finished to reload the metadevice database." \ ->/dev/console -echo "After reboot, repair any broken database replicas which were deleted." \ ->/dev/console - - exit $SMF_EXIT_ERR_CONFIG - ;; - - *) echo "Unknown $METAINIT -r failure $error." - exit 1 - ;; -esac diff --git a/usr/src/cmd/lvm/util/svc-metasync b/usr/src/cmd/lvm/util/svc-metasync deleted file mode 100644 index eeadffdbc703..000000000000 --- a/usr/src/cmd/lvm/util/svc-metasync +++ /dev/null @@ -1,151 +0,0 @@ -#!/bin/sh -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# ident "%Z%%M% %I% %E% SMI" -# -# Start mirror resync threads. - -DEVFSADM=/usr/sbin/devfsadm -METADEVADM=/usr/sbin/metadevadm -METASYNC=/usr/sbin/metasync -METADEV=/dev/md/admin -METASET=/usr/sbin/metaset -TMPFILE=/var/run/metaset.$$ - -. /lib/svc/share/smf_include.sh - -print_verbose() -{ - echo "Unable to resolve unnamed devices for volume management." - echo "Please refer to the Solaris Volume Manager documentation," - echo "Troubleshooting section, at http://docs.sun.com or from" - echo "your local copy." -} - -resolve_auto_take_sets() -{ - if [ -x $METASET ]; then - # Fixing up of the ctd names for devices in auto take - # sets relies heavily on the output of the metaset - # command. Any change to the output of the metaset command - # should modify this script as well in order ensure nothing - # breaks - # - # The following command saves all of the auto-take set names - # into the TMPFILE - name_str=`gettext "Set name"` - mn_str=`gettext "Multi-owner"` - $METASET | /bin/nawk -F ' |\t|,' -v snm="$name_str" \ - -v mstr="$mn_str" '$0 ~ snm { \ - if (index($0, mstr) == 0) print $4 \ - }' > $TMPFILE 2>&1 - - if [ -s "$TMPFILE" ]; then - localised_string=`gettext "Yes (auto)"` - for i in `cat $TMPFILE`; do - $METASET -s $i | grep "$localised_string" \ - > /dev/null 2>&1 - if [ $? -eq 0 ]; then - $METADEVADM -l -r -s $i - error=$? - case $error in - 0|2) ;; - 3) print_verbose - ;; - *) echo "$METADEVADM \ - -r failure $error." - ;; - esac - fi - done - fi - if [ -f "$TMPFILE" ]; then - /usr/bin/rm -f $TMPFILE - fi - fi -} - -if [ ! -s /kernel/drv/md.conf ]; then - echo "/kernel/drv/md.conf is missing." - exit 0 -fi - -if grep '^mddb_bootlist' /kernel/drv/md.conf >/dev/null 2>&1; then :; else - echo "No 'mddb_bootlist' entry in /kernel/drv/md.conf." - exit 0 -fi - -if [ ! -x $METADEVADM ]; then - echo "$METADEVADM is missing or not executable." - exit $SMF_EXIT_ERR_CONFIG -fi - -if [ ! -x $METASYNC ]; then - echo "$METASYNC is missing or not executable." - exit $SMF_EXIT_ERR_CONFIG -fi - -if [ ! -c $METADEV ]; then - echo "$METADEV is missing or not a character device." - exit 0 -fi - -$METADEVADM -l -r -error=$? -case $error in -0|2) ;; - -3) echo "Executing devfsadm" - $DEVFSADM - devfsadmerror=$? - if [ $devfsadmerror = 0 ]; then - echo "Executing metadevadm -r" - $METADEVADM -l -r - error=$? - fi - if [ $devfsadmerror != 0 -o $error = 3 ]; then - print_verbose - elif [ $error != 0 -a $error != 2 ]; then - echo "$METADEVADM -r failure $error." - fi - ;; - -*) echo "$METADEVADM -r failure $error." - exit 1 - ;; -esac - -resolve_auto_take_sets - -$METASYNC -r -error=$? -case $error in -0) ;; - -*) echo "Unknown $METASYNC -r failure $error." - exit 1 - ;; -esac - diff --git a/usr/src/cmd/mdb/Makefile.common b/usr/src/cmd/mdb/Makefile.common index ebdeac9f3aa2..dcff6b389812 100644 --- a/usr/src/cmd/mdb/Makefile.common +++ b/usr/src/cmd/mdb/Makefile.common @@ -17,10 +17,14 @@ # information: Portions Copyright [yyyy] [name of copyright owner] # # CDDL HEADER END +# + # # Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. -# Copyright 2013 Nexenta Systems, Inc. All rights reserved. # Copyright 2016 Joyent, Inc. +# Copyright 2016 Nexenta Systems, Inc. +# + # # MDB modules used for debugging user processes that every ISA's build # subdirectory will need to build. @@ -74,7 +78,6 @@ COMMON_MODULES_KVM = \ lofs \ logindmux \ mac \ - md \ mm \ mpt_sas \ mr_sas \ diff --git a/usr/src/cmd/mdb/common/modules/md/dumphotspare.c b/usr/src/cmd/mdb/common/modules/md/dumphotspare.c deleted file mode 100644 index 413ed79844ab..000000000000 --- a/usr/src/cmd/mdb/common/modules/md/dumphotspare.c +++ /dev/null @@ -1,113 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include "mdinclude.h" - -static void -printhsp(hot_spare_pool_t hsp, uintptr_t hsp_addr) -{ - int i = 0; - uintptr_t hs_addr; - int recid; - - mdb_inc_indent(2); - mdb_printf("hsp_next: %p\n", hsp.hsp_next); - mdb_printf("hsp_link:\n"); - mdb_inc_indent(2); - mdb_printf("ln_next: %p\n", hsp.hsp_link.ln_next); - mdb_printf("ln_setno: %u\n", hsp.hsp_link.ln_setno); - mdb_printf("ln_id: %u\n", hsp.hsp_link.ln_id); - mdb_inc_indent(2); - mdb_printf("--- on disk structures ---\n"); - mdb_printf("hsp_revision: %u\n", hsp.hsp_revision); - mdb_printf("hsp_self_id: %u \n", hsp.hsp_self_id); - mdb_printf("hsp_record_id: %d \n", hsp.hsp_record_id); - mdb_printf("hsp_refcount: %d\n", hsp.hsp_refcount); - mdb_printf("hsp_nhotspares: %d # Number of slices in the pool\n", - hsp.hsp_nhotspares); - mdb_inc_indent(1); - - hs_addr = hsp_addr + ((uintptr_t)&hsp.hsp_hotspares - (uintptr_t)&hsp); - - for (i = 0; i < hsp.hsp_nhotspares; i++) { - if (mdb_vread(&recid, sizeof (int), hs_addr) != - sizeof (int)) { - mdb_warn("failed to read recid at %p\n", hs_addr); - break; - } - mdb_printf("hsp_hotspares[%d]: %d", i, recid); - mdb_printf(" # should match an hs_record_id in s_hs list\n"); - hs_addr += (uintptr_t)sizeof (int); - } - mdb_dec_indent(1); - mdb_printf("--- end of on disk ---\n"); - mdb_dec_indent(2); - mdb_dec_indent(2); - mdb_dec_indent(2); -} - -static void -process_hsp(uintptr_t addr) -{ - hot_spare_pool_t hsp; - - if (mdb_vread(&hsp, sizeof (hot_spare_pool_t), addr) != - sizeof (hot_spare_pool_t)) { - mdb_warn("failed to read hot_spare_pool_t at %p\n", addr); - return; - } - mdb_inc_indent(2); - mdb_printf("%p\n", addr); - printhsp(hsp, addr); - mdb_dec_indent(2); -} -/* - * Dump out the hotspare pools - * usage: ::dumphotspare - */ -int -dumphotspare(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) -{ - if (argc != 0) /* ensure no options */ - return (DCMD_USAGE); - - snarf_sets(); - - if (!(flags & DCMD_ADDRSPEC)) { - if (mdb_walk_dcmd("hotsparepool", "dumphotspare", argc, - argv) == -1) { - mdb_warn("failed to walk hotsparepool"); - return (DCMD_ERR); - } - return (DCMD_OK); - } - - process_hsp(addr); - - return (DCMD_OK); -} diff --git a/usr/src/cmd/mdb/common/modules/md/dumpmirror.c b/usr/src/cmd/mdb/common/modules/md/dumpmirror.c deleted file mode 100644 index 53e70438b780..000000000000 --- a/usr/src/cmd/mdb/common/modules/md/dumpmirror.c +++ /dev/null @@ -1,230 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include "mdinclude.h" - -/* - * Display an arbitrary bitmap by showing the set bits in the array. - * Output will be - for ranges or for singleton bits. - */ -static void -print_mm_bm(unsigned char *bm, uint_t size, char *bm_name) -{ - int i; - int first_set = -1; - int need_comma = 0; - - mdb_printf("%s set bits: ", bm_name); - for (i = 0; i < size; i++) { - if (isset(bm, i)) { - if (first_set == -1) { - first_set = i; - } - } else { - if (first_set != -1) { - if (first_set != (i-1)) { - mdb_printf("%s%u-%u", - (need_comma ? "," : ""), - first_set, (i-1)); - } else { - mdb_printf("%s%u", - (need_comma ? "," : ""), first_set); - } - need_comma = 1; - first_set = -1; - } - } - } - if (first_set != -1) { - mdb_printf("%s%u-%u", (need_comma ? "," : ""), first_set, - size-1); - } - mdb_printf("\n"); -} - -/* - * Print uchar_t sized count fields (typically un_pernode_dirty_map entries) - */ - -static void -print_mm_cnt_c(unsigned char *bm, uint_t size, char *bm_name) -{ - int i; - int need_comma = 0; - - mdb_printf("%s set counts: ", bm_name); - for (i = 0; i < size; i++) { - if (bm[i]) { - mdb_printf("%s(%d,%3d)", (need_comma ? "," : ""), i, - (uint_t)bm[i]); - need_comma = 1; - } - } - mdb_printf("\n"); -} - -static void -print_mm_cnt_w(unsigned short *bm, uint_t size, char *bm_name) -{ - int i; - int need_comma = 0; - - mdb_printf("%s set counts: ", bm_name); - for (i = 0; i < size; i++) { - if (bm[i]) { - mdb_printf("%s(%d,%5d)", (need_comma ? "," : ""), i, - (uint_t)bm[i]); - need_comma = 1; - } - } - mdb_printf("\n"); -} - -/* - * Print the associated bitmaps for the specified mm_unit_t - * These are: - * un_pernode_dirty_bm - * un_goingclean_bm - * un_dirty_bm - * un_goingdirty_bm - * un_resync_bm - * - * Associated counts for unit: - * un_pernode_dirty_sum[] (uchar_t) - * un_outstanding_writes[] (ushort_t) - * - */ - -/* ARGSUSED */ -int -printmmbm(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) -{ - mm_unit_t mm, *mmp; - unsigned char *rr_dirty_bm, *rr_goingclean_bm, *rr_goingdirty_bm; - unsigned char *rr_resync_bm; - uintptr_t un_dbm, un_gcbm, un_gdbm, un_rrbm, un_pnds, un_ow; - uint_t num_rr, rr_bitmap_size; - int i; - uintptr_t un_pernode_bm; - unsigned char *rr_pernode_dirty, *rr_pnds; - unsigned short *rr_ow; - /* just enough for un_pernode_dirty_bm[] plus three digits */ - char pernode_str[25]; - - if (argc != 0) - return (DCMD_USAGE); - - if (!(flags & DCMD_ADDRSPEC)) { - mdb_warn("No mm_unit_t address specified"); - return (DCMD_ERR); - } - - if (mdb_vread(&mm, sizeof (mm_unit_t), addr) == -1) { - mdb_warn("failed to read mm_unit_t at %p\n", addr); - return (DCMD_ERR); - } - - mmp = &mm; - - num_rr = mm.un_rrd_num; - - un_dbm = (uintptr_t)mmp->un_dirty_bm; - un_gcbm = (uintptr_t)mmp->un_goingclean_bm; - un_gdbm = (uintptr_t)mmp->un_goingdirty_bm; - un_rrbm = (uintptr_t)mmp->un_resync_bm; - un_pnds = (uintptr_t)mmp->un_pernode_dirty_sum; - un_ow = (uintptr_t)mmp->un_outstanding_writes; - - rr_bitmap_size = howmany(num_rr, NBBY); - rr_dirty_bm = (unsigned char *)mdb_alloc(rr_bitmap_size, - UM_SLEEP|UM_GC); - rr_goingclean_bm = (unsigned char *)mdb_alloc(rr_bitmap_size, - UM_SLEEP|UM_GC); - rr_goingdirty_bm = (unsigned char *)mdb_alloc(rr_bitmap_size, - UM_SLEEP|UM_GC); - rr_resync_bm = (unsigned char *)mdb_alloc(rr_bitmap_size, - UM_SLEEP|UM_GC); - rr_pnds = (unsigned char *)mdb_alloc(num_rr, UM_SLEEP|UM_GC); - rr_ow = (unsigned short *)mdb_alloc(num_rr * sizeof (unsigned short), - UM_SLEEP|UM_GC); - - if (mdb_vread(rr_dirty_bm, rr_bitmap_size, un_dbm) == -1) { - mdb_warn("failed to read un_dirty_bm at %p\n", un_dbm); - return (DCMD_ERR); - } - if (mdb_vread(rr_goingclean_bm, rr_bitmap_size, un_gcbm) == -1) { - mdb_warn("failed to read un_goingclean_bm at %p\n", un_gcbm); - return (DCMD_ERR); - } - if (mdb_vread(rr_goingdirty_bm, rr_bitmap_size, un_gdbm) == -1) { - mdb_warn("failed to read un_goingdirty_bm at %p\n", un_gdbm); - return (DCMD_ERR); - } - if (mdb_vread(rr_resync_bm, rr_bitmap_size, un_rrbm) == -1) { - mdb_warn("failed to read un_resync_bm at %p\n", un_rrbm); - return (DCMD_ERR); - } - if (mdb_vread(rr_pnds, num_rr, un_pnds) == -1) { - mdb_warn("failed to read un_pernode_dirty_sum at %p\n", - un_pnds); - return (DCMD_ERR); - } - if (mdb_vread(rr_ow, num_rr * sizeof (unsigned short), un_ow) == -1) { - mdb_warn("failed to read un_outstanding_writes at %p\n", un_ow); - return (DCMD_ERR); - } - - print_mm_bm(rr_dirty_bm, num_rr, "un_dirty_bm"); - print_mm_bm(rr_goingclean_bm, num_rr, "un_goingclean_bm"); - print_mm_bm(rr_goingdirty_bm, num_rr, "un_goingdirty_bm"); - print_mm_bm(rr_resync_bm, num_rr, "un_resync_bm"); - - /* - * Load all the un_pernode_bm[] entries and iterate through the non- - * NULL entries - */ - rr_pernode_dirty = (unsigned char *)mdb_alloc(rr_bitmap_size, - UM_SLEEP|UM_GC); - - for (i = 0; i < 128; i++) { - un_pernode_bm = (uintptr_t)mmp->un_pernode_dirty_bm[i]; - if (un_pernode_bm) { - mdb_snprintf(pernode_str, sizeof (pernode_str), - "un_pernode_dirty_bm[%d]", i); - if (mdb_vread(rr_pernode_dirty, rr_bitmap_size, - un_pernode_bm) == -1) { - mdb_warn("failed to read %s at %p\n", - pernode_str, un_pernode_bm); - return (DCMD_ERR); - } - print_mm_bm(rr_pernode_dirty, num_rr, pernode_str); - } - } - print_mm_cnt_c(rr_pnds, num_rr, "un_pernode_dirty_sum"); - - print_mm_cnt_w(rr_ow, num_rr, "un_outstanding_writes"); - - return (DCMD_OK); -} diff --git a/usr/src/cmd/mdb/common/modules/md/dumpnamespace.c b/usr/src/cmd/mdb/common/modules/md/dumpnamespace.c deleted file mode 100644 index bc859f5cf9af..000000000000 --- a/usr/src/cmd/mdb/common/modules/md/dumpnamespace.c +++ /dev/null @@ -1,488 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include "mdinclude.h" -#include - -/* - * work out the offset size - */ -#define MY_DID_SHR_NAMSIZ(n) \ - (((sizeof (struct did_shr_name) - 1) + \ - n + (sizeof (uint_t) - 1)) & ~(sizeof (uint_t) - 1)) -#define MY_SHR_NAMSIZ(n) \ - (((sizeof (struct nm_shared_name) - 1) + \ - n + (sizeof (uint_t) - 1)) & ~(sizeof (uint_t) - 1)) -#define MY_DID_NAMSIZ(n) \ - (((sizeof (struct did_min_name) - 1) + \ - n + (sizeof (uint_t) - 1)) & ~(sizeof (uint_t) - 1)) -#define MY_NAMSIZ(n) \ - (((sizeof (struct nm_name) - 1) + \ - n + (sizeof (uint_t) - 1)) & ~(sizeof (uint_t) - 1)) - -static uintptr_t -print_did_shared_name(uintptr_t addr, int i) -{ - struct did_shr_name shn; - uintptr_t sn_name_addr; - void *sn_name; - uintptr_t next_addr = addr; - - if (mdb_vread(&shn, sizeof (struct did_shr_name), addr) != - sizeof (struct did_shr_name)) { - mdb_warn("failed to read did_shr_name at %p\n", addr); - return (NULL); - } - if (shn.did_size == 0) - return (NULL); - mdb_printf("device_id[%d] at %p\n", i, addr); - mdb_inc_indent(2); - mdb_printf("did_key: %d\n", shn.did_key); - mdb_printf("did_count: %u\n", shn.did_count); - mdb_printf("did_data: 0x%x \n", shn.did_data); - mdb_printf("did_size: %u\n", shn.did_size); - sn_name_addr = addr + ((uintptr_t)&shn.did_devid - (uintptr_t)&shn); - if (shn.did_size > 0) { - sn_name = mdb_alloc(shn.did_size + 1, UM_SLEEP | UM_GC); - if (mdb_readstr((char *)sn_name, shn.did_size + 1, - sn_name_addr) <= 0) { - mdb_warn("failed to read sn_name at %p\n", - sn_name_addr); - return (NULL); - } - mdb_printf("did_devid: %s at %p\n", (char *)sn_name, - sn_name_addr); - next_addr = addr + MY_DID_SHR_NAMSIZ(shn.did_size); - } - mdb_dec_indent(2); - return (next_addr); -} - -static uintptr_t -print_nm_shared_name(uintptr_t addr, int i) -{ - struct nm_shared_name shn; - uintptr_t sn_name_addr; - void *sn_name; - uintptr_t next_addr = addr; - - if (mdb_vread(&shn, sizeof (struct nm_shared_name), addr) != - sizeof (struct nm_shared_name)) { - mdb_warn("failed to read nm_shared_name at %p\n", addr); - return (NULL); - } - if (shn.sn_namlen == 0) - return (NULL); - mdb_printf("sr_name[%d] at %p\n", i, addr); - mdb_inc_indent(2); - mdb_printf("sn_key: %d \n", shn.sn_key); - mdb_printf("sn_count: %u\n", shn.sn_count); - mdb_printf("sn_data: 0x%x \n", shn.sn_data); - mdb_printf("sn_namlen: %u\n", shn.sn_namlen); - sn_name_addr = addr + ((uintptr_t)&shn.sn_name - (uintptr_t)&shn); - if (shn.sn_namlen > 0) { - sn_name = mdb_alloc(shn.sn_namlen + 1, UM_SLEEP | UM_GC); - if (mdb_readstr((char *)sn_name, shn.sn_namlen + 1, - sn_name_addr) <= 0) { - mdb_warn("failed to read sn_name at %p\n", - sn_name_addr); - } - mdb_printf("sn_name: %s at %p\n", (char *)sn_name, - sn_name_addr); - next_addr = addr + MY_SHR_NAMSIZ(shn.sn_namlen); - } - mdb_dec_indent(2); - return (next_addr); -} - -static uintptr_t -print_devid_name(uintptr_t addr, int i) -{ - struct did_min_name didmn; - uintptr_t did_name_addr; - void *min_name; - uintptr_t next_addr = addr; - - if (mdb_vread(&didmn, sizeof (struct did_min_name), addr) != - sizeof (struct did_min_name)) { - mdb_warn("failed to read did_min_name at %p\n", addr); - return (NULL); - } - if (didmn.min_namlen == 0) - return (NULL); - mdb_printf("minor_name[%d] at %p\n", i, addr); - mdb_inc_indent(2); - mdb_printf("min_key: %d \n", didmn.min_key); - mdb_printf("min_count: %u\n", didmn.min_count); - mdb_printf("min_devid_key: %d \n", didmn.min_devid_key); - mdb_printf("min_namlen: %u\n", didmn.min_namlen); - did_name_addr = addr + ((uintptr_t)&didmn.min_name - (uintptr_t)&didmn); - if (didmn.min_namlen > 0) { - min_name = mdb_alloc(didmn.min_namlen + 1, UM_SLEEP | UM_GC); - if (mdb_readstr((char *)min_name, didmn.min_namlen + 1, - did_name_addr) <= 0) { - mdb_warn("failed to read min_name at %p\n", - did_name_addr); - } - mdb_printf("min_name: %s at %p\n", (char *)min_name, - did_name_addr); - next_addr = addr + MY_DID_NAMSIZ(didmn.min_namlen); - } - mdb_dec_indent(2); - return (next_addr); -} - -static uintptr_t -print_nm_name(uintptr_t addr, int i) -{ - struct nm_name nm; - uintptr_t nm_name_addr; - void *n_name; - uintptr_t next_addr = addr; - - if (mdb_vread(&nm, sizeof (struct nm_name), addr) != - sizeof (struct nm_name)) { - mdb_warn("failed to read nm_name at %p\n", addr); - return (NULL); - } - if (nm.n_namlen == 0) - return (NULL); - mdb_printf("r_name[%d] at %p\n", i, addr); - mdb_inc_indent(2); - mdb_printf("n_key: %d \n", nm.n_key); - mdb_printf("n_count: %u\n", nm.n_count); - mdb_printf("n_minor: %x\n", nm.n_minor); - mdb_printf("n_drv_key: %d \n", nm.n_drv_key); - mdb_printf("n_dir_key: %d \n", nm.n_dir_key); - mdb_printf("n_namlen: %u\n", nm.n_namlen); - nm_name_addr = addr + ((uintptr_t)&nm.n_name - (uintptr_t)&nm); - if (nm.n_namlen > 0) { - n_name = mdb_alloc(nm.n_namlen + 1, UM_SLEEP | UM_GC); - if (mdb_readstr((char *)n_name, nm.n_namlen + 1, - nm_name_addr) <= 0) { - mdb_warn("failed to read n_name at %p\n", nm_name_addr); - } - mdb_printf("n_name: %s at %p\n", (char *)n_name, - nm_name_addr); - next_addr = addr + MY_NAMSIZ(nm.n_namlen); - } - - mdb_dec_indent(2); - return (next_addr); -} - -static uint_t -process_nmn_record_hdr(uintptr_t addr) -{ - struct nm_rec_hdr rhdr; - - /* - * we read this anyway as the first part of nm_rec, devid_min_rec, - * nm_shr_rec, and devid_shr_rec record is a nm_rec_hdr - */ - if (mdb_vread(&rhdr, sizeof (struct nm_rec_hdr), addr) != - sizeof (struct nm_rec_hdr)) { - mdb_warn("failed to read nm_rec_hdr at %p\n", addr); - return (0); - } - - mdb_printf("nmn_record: %p\n", addr); - mdb_inc_indent(2); - mdb_printf("r_revision: %4u\n", rhdr.r_revision); - mdb_printf("r_alloc_size: %4u\n", rhdr.r_alloc_size); - mdb_printf("r_used_size: %4u\n", rhdr.r_used_size); - mdb_printf("r_next_recid: %4x\n", rhdr.r_next_recid); - mdb_printf("xr_next_rec: %4u\n", rhdr.xr_next_rec); - mdb_printf("r_next_key: %4d\n", rhdr.r_next_key); - mdb_dec_indent(2); - return (rhdr.r_used_size); -} - -static void -process_nmn_record(uintptr_t addr, int shared, int devid) -{ - struct nm_shr_rec srhdr; - struct devid_shr_rec didsrhdr; - struct nm_rec nm_record; - struct devid_min_rec devid_record; - uintptr_t shn_addr; - int i; - uintptr_t next_addr, start_addr; - uint_t used_size; - - used_size = process_nmn_record_hdr(addr); - - if (devid) { - if (shared) { - if (mdb_vread(&didsrhdr, sizeof (struct devid_shr_rec), - addr) != sizeof (struct devid_shr_rec)) { - mdb_warn("failed to read devid_shr_rec at %p\n", - addr); - return; - } - } else { - if (mdb_vread(&devid_record, - sizeof (struct devid_min_rec), addr) - != sizeof (struct devid_min_rec)) { - mdb_warn("failed to read devid_min_rec at %p\n", - addr); - return; - } - } - } else { - if (shared) { - if (mdb_vread(&srhdr, sizeof (struct nm_shr_rec), addr) - != sizeof (struct nm_shr_rec)) { - mdb_warn("failed to read nm_shr_rec at %p\n", - addr); - return; - } - } else { - if (mdb_vread(&nm_record, sizeof (struct nm_rec), addr) - != sizeof (struct nm_rec)) { - mdb_warn("failed to read nm_rec at %p\n", addr); - return; - } - } - } - mdb_inc_indent(2); - if (devid) { - if (shared) { - /* - * Do the rest of the device_id records. - */ - next_addr = addr + ((uintptr_t)&didsrhdr.device_id[0] - - (uintptr_t)&didsrhdr); - start_addr = next_addr; - for (i = 0; ; i++) { - shn_addr = next_addr; - next_addr = print_did_shared_name(shn_addr, i); - if (next_addr == NULL) { - mdb_dec_indent(2); - return; - } - /* - * Causes us to print one extra record. - */ - if ((next_addr - start_addr > used_size) || - (next_addr == shn_addr)) { - break; - } - } - } else { - /* - * Now do the rest of the record. - */ - next_addr = addr + - ((uintptr_t)&devid_record.minor_name[0] - - (uintptr_t)&devid_record); - start_addr = next_addr; - for (i = 0; ; i++) { - shn_addr = next_addr; - next_addr = print_devid_name(shn_addr, i); - if (next_addr == NULL) { - mdb_dec_indent(2); - return; - } - if ((next_addr - start_addr > used_size) || - (next_addr == shn_addr)) { - break; - } - } - } - } else { - if (shared) { - /* - * Now do the rest of the sr_name records. - */ - next_addr = addr + ((uintptr_t)&srhdr.sr_name[0] - - (uintptr_t)&srhdr); - start_addr = next_addr; - for (i = 0; ; i++) { - shn_addr = next_addr; - next_addr = print_nm_shared_name(shn_addr, i); - if (next_addr == NULL) { - mdb_dec_indent(2); - return; - } - /* - * Causes us to print one extra record - */ - if ((next_addr - start_addr > used_size) || - (next_addr == shn_addr)) { - break; - } - } - } else { - /* - * Now do the rest of the record - */ - next_addr = addr + ((uintptr_t)&nm_record.r_name[0] - - (uintptr_t)&nm_record); - start_addr = next_addr; - for (i = 0; ; i++) { - shn_addr = next_addr; - next_addr = print_nm_name(shn_addr, i); - if (next_addr == NULL) { - mdb_dec_indent(2); - return; - } - if ((next_addr - start_addr > used_size) || - (next_addr == shn_addr)) { - break; - } - } - } - } - mdb_dec_indent(2); -} - -static void -process_nm_next_hdr(uintptr_t addr, int shared, int devid) -{ - uintptr_t next = addr; - struct nm_next_hdr nhdr; - - mdb_inc_indent(2); - mdb_printf("%p\n", next); - if (mdb_vread(&nhdr, sizeof (struct nm_next_hdr), next) != - sizeof (struct nm_next_hdr)) { - mdb_warn("failed to read nm_next_hdr at %p", next); - return; - } - (void) process_nmn_record_hdr((uintptr_t)nhdr.nmn_record); - next = (uintptr_t)nhdr.nmn_nextp; - while (next != (uintptr_t)0) { - - mdb_printf("\n"); - mdb_printf("nmn_nextp %p\n", nhdr.nmn_nextp); - if (mdb_vread(&nhdr, sizeof (struct nm_next_hdr), next) != - sizeof (struct nm_next_hdr)) { - mdb_warn("failed to read nm_next_hdr at %p\n", next); - break; - } - process_nmn_record((uintptr_t)nhdr.nmn_record, shared, devid); - next = (uintptr_t)nhdr.nmn_nextp; - } - mdb_printf("\n"); - mdb_dec_indent(2); -} -/* - * Start the processing of a nominated set - */ -static void -process_set(int setno) -{ - uintptr_t addr = (uintptr_t)mdset[setno].s_nm; - uintptr_t did_addr = (uintptr_t)mdset[setno].s_did_nm; - uintptr_t shared_addr, names_addr; - uintptr_t did_names_addr, did_shared_addr; - struct nm_header_hdr hdr, did_hdr; - - mdb_printf("------ Name Space for setno %d ------\n", setno); - - if (mdb_vread(&hdr, sizeof (struct nm_header_hdr), addr) != - sizeof (struct nm_header_hdr)) { - mdb_warn("failed to read nm_header_hdr at %p\n", addr); - return; - } - mdb_printf("hh_header: %p \n", hdr.hh_header); - if (did_addr != NULL) { /* device id's exist */ - if (mdb_vread(&did_hdr, sizeof (struct nm_header_hdr), - did_addr) != sizeof (struct nm_header_hdr)) { - mdb_warn("failed to read nm_header_hdr at %p\n", - did_addr); - return; - } - mdb_printf("did hh_header: %p \n", did_hdr.hh_header); - did_names_addr = - (uintptr_t)&(((struct nm_header_hdr *)did_addr)->hh_names); - did_shared_addr = - (uintptr_t)&(((struct nm_header_hdr *)did_addr)->hh_shared); - } - - names_addr = (uintptr_t)&(((struct nm_header_hdr *)addr)->hh_names); - shared_addr = (uintptr_t)&(((struct nm_header_hdr *)addr)->hh_shared); - mdb_printf("hh_names: %p \n", names_addr); - mdb_printf("hh_shared: %p\n", shared_addr); - - if (did_addr != NULL) { - mdb_printf("did hh_names: %p \n", did_names_addr); - mdb_printf("did hh_shared: %p\n", did_shared_addr); - } - - mdb_printf("hh_names:"); - process_nm_next_hdr(names_addr, 0, 0); - mdb_printf("\nhh_shared:"); - process_nm_next_hdr(shared_addr, 1, 0); - - if (did_addr != NULL) { - mdb_printf("did hh_names:"); - process_nm_next_hdr(did_names_addr, 0, 1); - mdb_printf("\ndid hh_shared:"); - process_nm_next_hdr(did_shared_addr, 1, 1); - } -} -/* - * Dump the name space for all sets or specified set (-s option) - * usage: ::dumpnamespace [-s setname] - */ -/* ARGSUSED */ -int -dumpnamespace(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) -{ - char *s_opt = NULL; - int j; - int setno; - - if (mdb_getopts(argc, argv, 's', MDB_OPT_STR, &s_opt, - NULL) != argc) { - /* left over arguments ?? */ - return (DCMD_USAGE); - } - - snarf_sets(); - - if (argc == 0) { - for (j = 0; j < md_nsets; j++) { - if (mdset[j].s_status & MD_SET_NM_LOADED) { - process_set(j); - } - } - } else { - setno = findset(s_opt); - if (setno == -1) { - mdb_warn("no such set: %s\n", s_opt); - return (DCMD_ERR); - } - if (mdset[setno].s_status & MD_SET_NM_LOADED) { - process_set(setno); - } - } - return (DCMD_OK); -} diff --git a/usr/src/cmd/mdb/common/modules/md/findset.c b/usr/src/cmd/mdb/common/modules/md/findset.c deleted file mode 100644 index 16a8bc6c90ed..000000000000 --- a/usr/src/cmd/mdb/common/modules/md/findset.c +++ /dev/null @@ -1,62 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include "mdinclude.h" - -/* - * Function: findset - * Purpose: Return the setno of a set given the name of the set. - * Returns: - * setno - the number of the set - * -1 - could not find the named set - */ -int -findset(char *setn) -{ - int i; - char setname[1024]; - - if (setn == NULL) { - return (-1); - } - - for (i = 0; i < md_nsets; i++) { - if (set_dbs[i].s_setname == 0) { - continue; - } - if (mdb_vread(&setname, 1024, - (uintptr_t)set_dbs[i].s_setname) == -1) { - mdb_warn("failed to read setname at %s\n", - set_dbs[i].s_setname); - } - if (strcmp(setname, setn) == 0) { - return (i); - } - } - return (-1); -} diff --git a/usr/src/cmd/mdb/common/modules/md/md.c b/usr/src/cmd/mdb/common/modules/md/md.c deleted file mode 100644 index 371dfa14f3ff..000000000000 --- a/usr/src/cmd/mdb/common/modules/md/md.c +++ /dev/null @@ -1,128 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include - - -int md_verbose = 0; /* be verbose about the addresses */ - -extern int metaset(uintptr_t, uint_t, int, const mdb_arg_t *); -extern int metastat(uintptr_t, uint_t, int, const mdb_arg_t *); -extern int set_io(uintptr_t, uint_t, int, const mdb_arg_t *); -extern int dumpnamespace(uintptr_t, uint_t, int, const mdb_arg_t *); -extern int dumpsetaddr(uintptr_t, uint_t, int, const mdb_arg_t *); -extern int dumphotspare(uintptr_t, uint_t, int, const mdb_arg_t *); -extern int printmmbm(uintptr_t, uint_t, int, const mdb_arg_t *); -extern void set_io_help(); - -/* from mdbgen */ -extern int mddb_db_walk_init(mdb_walk_state_t *); -extern int mddb_db_walk_step(mdb_walk_state_t *); -extern int mddb_de_ic_walk_init(mdb_walk_state_t *); -extern int mddb_de_ic_walk_step(mdb_walk_state_t *); -extern int hotsparepool_walk_init(mdb_walk_state_t *); -extern int hotsparepool_walk_step(mdb_walk_state_t *); -extern void hotsparepool_walk_fini(mdb_walk_state_t *); -extern int didnamespace_walk_init(mdb_walk_state_t *); -extern int didnamespace_walk_step(mdb_walk_state_t *); -extern void didnamespace_walk_fini(mdb_walk_state_t *); -extern int namespace_walk_init(mdb_walk_state_t *); -extern int namespace_walk_step(mdb_walk_state_t *); -extern void namespace_walk_fini(mdb_walk_state_t *); -extern int sets_walk_init(mdb_walk_state_t *); -extern int sets_walk_step(mdb_walk_state_t *); -extern void sets_walk_fini(mdb_walk_state_t *); -extern int units_walk_init(mdb_walk_state_t *); -extern int units_walk_step(mdb_walk_state_t *); -extern void units_walk_fini(mdb_walk_state_t *); -extern int simple_de_ic(uintptr_t, uint_t, int, const mdb_arg_t *); -int md_set_verbose(uintptr_t, uint_t, int, const mdb_arg_t *); - - -const mdb_dcmd_t dcmds[] = { - { "md_verbose", NULL, "toggle verbose mode for SVM dcmds", - md_set_verbose }, - { "metaset", NULL, "list SVM metasets", metaset }, - { "metastat", "[-v]", "list SVM metadevices", - metastat }, - { "set_io", NULL, "show the pending IO counts", set_io, - set_io_help }, - { "dumpnamespace", "[-s setname]", "dump the SVM name space", - dumpnamespace }, - { "dumphotspare", NULL, "dump the hot spare pools", - dumphotspare }, - { "dumpsetaddr", "[-s setname]", "dump the SVM set addresses", - dumpsetaddr }, - { "simple_de_ic", NULL, "simple mddb_de_ic_t", - simple_de_ic }, - { "printmmbm", NULL, "print bitmaps for given mm_unit_t", - printmmbm }, - { NULL } -}; - -static const mdb_walker_t walkers[] = { - { "mddb_db", "walk list of mddb_db_t structures", - mddb_db_walk_init, mddb_db_walk_step, NULL, NULL }, - { "mddb_de_ic", "walk list of mddb_de_t structures", - mddb_de_ic_walk_init, mddb_de_ic_walk_step, NULL, NULL }, - { "hotsparepool", "walk list of hotspare pools", - hotsparepool_walk_init, hotsparepool_walk_step, - hotsparepool_walk_fini, NULL }, - { "didnamespace", "walk the did namespace", - didnamespace_walk_init, didnamespace_walk_step, - didnamespace_walk_fini, NULL }, - { "namespace", "walk the namespace", - namespace_walk_init, namespace_walk_step, namespace_walk_fini, - NULL }, - { "md_sets", "walk list of sets", - sets_walk_init, sets_walk_step, sets_walk_fini, NULL }, - { "md_units", "walk list of unit structures", - units_walk_init, units_walk_step, units_walk_fini, NULL }, - { NULL } -}; - -static const mdb_modinfo_t modinfo = { - MDB_API_VERSION, dcmds, walkers -}; - -const mdb_modinfo_t * -_mdb_init(void) -{ - return (&modinfo); -} - - -/* ARGSUSED */ -int -md_set_verbose(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) -{ - md_verbose = !md_verbose; - - if ((flags & DCMD_ADDRSPEC) != 0 || argc != 0) - return (DCMD_USAGE); - - mdb_printf("Verbose mode is now %d\n", md_verbose); - return (DCMD_OK); -} diff --git a/usr/src/cmd/mdb/common/modules/md/mdinclude.h b/usr/src/cmd/mdb/common/modules/md/mdinclude.h deleted file mode 100644 index e0fa59d192cf..000000000000 --- a/usr/src/cmd/mdb/common/modules/md/mdinclude.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _MDINCLUDE_H -#define _MDINCLUDE_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#ifdef __cplusplus -extern "C" { -#endif - -#include - -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -/* these are defined in snarf.c and md.c */ -extern md_set_t mdset[MD_MAXSETS]; -extern set_t md_nsets; -extern unit_t md_nunits; -extern int md_verbose; -extern mddb_set_t set_dbs[MD_MAXSETS]; - -extern int snarf_sets(void); -extern int findset(char *); - -#ifdef __cplusplus -} -#endif - -#endif /* _MDINCLUDE_H */ diff --git a/usr/src/cmd/mdb/common/modules/md/metaset.c b/usr/src/cmd/mdb/common/modules/md/metaset.c deleted file mode 100644 index 65511492306d..000000000000 --- a/usr/src/cmd/mdb/common/modules/md/metaset.c +++ /dev/null @@ -1,90 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include "mdinclude.h" - -mddb_set_t set_db; - -/* print out the correct set */ -int -print_set(uintptr_t addr) -{ - char machine[1024]; - - if (mdb_vread(&set_db, sizeof (mddb_set_t), addr) == -1) { - if (addr != NULL) { - mdb_warn("failed to read mddb_set_t at 0x%p\n", addr); - return (DCMD_ERR); - } else { - return (DCMD_OK); - } - } - - if (set_db.s_setname != 0) { - if (mdb_readstr(machine, 1024, - (uintptr_t)set_db.s_setname) == -1) { - mdb_warn("failed to read setname at 0x%p\n", - set_db.s_setname); - } else { - mdb_printf("Setname: %s Setno: %u\t%p\n", - machine, set_db.s_setno, addr); - } - } else { - mdb_printf("Setname: NULL Setno: %u\t%p\n", - set_db.s_setno, addr); - } - - mdb_inc_indent(2); - mdb_printf("s_un = %p\n", mdset[set_db.s_setno].s_un); - mdb_printf("s_hsp = %p\n", mdset[set_db.s_setno].s_hsp); - mdb_dec_indent(2); - return (DCMD_OK); -} - -/* - * print all sets or the specified set with -s option - * usage: ::metaset - */ -/* ARGSUSED */ -int -metaset(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) -{ - snarf_sets(); - - if (!(flags & DCMD_ADDRSPEC)) { - if (mdb_walk_dcmd("md_sets", "metaset", argc, - argv) == -1) { - mdb_warn("failed to walk sets"); - return (DCMD_ERR); - } - return (DCMD_OK); - } - print_set(addr); - - return (DCMD_OK); -} diff --git a/usr/src/cmd/mdb/common/modules/md/metastat.c b/usr/src/cmd/mdb/common/modules/md/metastat.c deleted file mode 100644 index d3f27ec23381..000000000000 --- a/usr/src/cmd/mdb/common/modules/md/metastat.c +++ /dev/null @@ -1,491 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include "mdinclude.h" - -typedef struct submirror_cb { - minor_t un_self_id; - int un_nsm; - ushort_t mm_un_nsm; -}submirror_cb_t; - -void -print_setname(int setno) -{ - char setname[1024]; - - if (setno != 0) { - if (mdb_readstr(setname, 1024, - (uintptr_t)set_dbs[setno].s_setname) == -1) { - mdb_warn("failed to read setname at 0x%p\n", - set_dbs[setno].s_setname); - } - mdb_printf("%s/", setname); - } -} - -void -print_stripe(void *un_addr, void *mdcptr, uint_t verbose) -{ - ms_unit_t ms; - int setno; - minor_t un_self_id; - md_parent_t un_parent; - diskaddr_t un_total_blocks; - - /* read in the device */ - un_self_id = ((mdc_unit_t *)mdcptr)->un_self_id; - un_parent = ((mdc_unit_t *)mdcptr)->un_parent; - un_total_blocks = ((mdc_unit_t *)mdcptr)->un_total_blocks; - if (mdb_vread(&ms, sizeof (ms_unit_t), - (uintptr_t)un_addr) == -1) { - mdb_warn("failed to read ms_unit_t at %p\n", un_addr); - return; - } - - setno = MD_MIN2SET(un_self_id); - print_setname(setno); - - mdb_printf("d%u: ", MD_MIN2UNIT(un_self_id)); - if (un_parent == ((unit_t)-1)) { - mdb_printf("Concat/Stripe"); - } else { - mdb_printf("Subdevice of d%u", MD_MIN2UNIT(un_parent)); - } - if (verbose) { - mdb_printf("\t< %p::print ms_unit_t >\n", un_addr); - } else { - mdb_printf("\t< %p>\n", un_addr); - } - mdb_inc_indent(2); - mdb_printf("Size: %llu blocks\n", un_total_blocks); - mdb_printf("Rows: %u\n", ms.un_nrows); - mdb_dec_indent(2); -} - -/* ARGSUSED */ -int -print_submirror(uintptr_t addr, void *arg, submirror_cb_t *data) -{ - uintptr_t un_addr; - mdc_unit_t mdc_sm; - - if (mdb_vread(&un_addr, sizeof (void *), addr) == -1) { - mdb_warn("failed to read submirror at %p\n", addr); - return (WALK_ERR); - } - if (un_addr != NULL) { - if (mdb_vread(&mdc_sm, sizeof (mdc_unit_t), un_addr) == -1) { - mdb_warn("failed to read mdc_unit_t at %p", un_addr); - return (WALK_ERR); - } - if (mdc_sm.un_parent == data->un_self_id) { - /* this is one of the sub mirrors */ - mdb_printf("Submirror %u: d%u ", - data->un_nsm, MD_MIN2UNIT(mdc_sm.un_self_id)); - mdb_printf("Size: %llu\n", mdc_sm.un_total_blocks); - data->un_nsm++; - if (data->un_nsm == data->mm_un_nsm) - return (WALK_DONE); - } - } - return (WALK_NEXT); -} - -/* - * Construct an RLE count for the number of 'cleared' bits in the given 'bm' - * Output the RLE count in form: [......] - * RLE is Run Length Encoding, a method for compactly describing a bitmap - * as a series of numbers indicating the count of consecutive set or cleared - * bits. - * - * Input: - * bitmap to scan - * length of bitmap (in bits) - * RLE count array to be updated - * Descriptive text for bitmap RLE count display - */ -static void -print_comp_bm(unsigned char *bm, uint_t size, ushort_t *comp_bm, char *opstr) -{ - int cnt_clean, tot_dirty, cur_idx; - int i, cur_clean, cur_dirty, printit, max_set_cnt, max_reset_cnt; - - cnt_clean = 1; - printit = 0; - cur_clean = 0; - cur_dirty = 0; - cur_idx = 0; - tot_dirty = 0; - max_set_cnt = max_reset_cnt = 0; - for (i = 0; i < size; i++) { - if (isset(bm, i)) { - /* If we're counting clean bits, flush the count out */ - if (cnt_clean) { - cnt_clean = 0; - comp_bm[cur_idx] = cur_clean; - printit = 1; - if (cur_clean > max_reset_cnt) { - max_reset_cnt = cur_clean; - } - } - cur_clean = 0; - cur_dirty++; - tot_dirty++; - } else { - if (!cnt_clean) { - cnt_clean = 1; - comp_bm[cur_idx] = cur_dirty; - printit = 1; - if (cur_dirty > max_set_cnt) { - max_set_cnt = cur_dirty; - } - } - cur_dirty = 0; - cur_clean++; - } - if (printit) { - mdb_printf("%u.", comp_bm[cur_idx++]); - printit = 0; - } - } - - mdb_printf("\nTotal %s bits = %lu\n", opstr, tot_dirty); - mdb_printf("Total %s transactions = %lu\n", opstr, cur_idx); - mdb_printf("Maximum %s set count = %lu, reset count = %lu\n", opstr, - max_set_cnt, max_reset_cnt); -} - -void -print_mirror(void *un_addr, void *mdcptr, uint_t verbose) -{ - mm_unit_t mm, *mmp; - void **ptr; - int setno = 0; - minor_t un_self_id; - diskaddr_t un_total_blocks; - ushort_t mm_un_nsm; - submirror_cb_t data; - uint_t num_rr, rr_blksize; - ushort_t *comp_rr; - unsigned char *rr_dirty_bm, *rr_goingclean_bm; - uintptr_t un_dbm, un_gcbm; - - /* read in the device */ - if (mdb_vread(&mm, sizeof (mm_unit_t), - (uintptr_t)un_addr) == -1) { - mdb_warn("failed to read mm_unit_t at %p\n", un_addr); - return; - } - - mmp = &mm; - - un_self_id = ((mdc_unit_t *)mdcptr)->un_self_id; - un_total_blocks = ((mdc_unit_t *)mdcptr)->un_total_blocks; - mm_un_nsm = mm.un_nsm; - setno = MD_MIN2SET(un_self_id); - print_setname(setno); - - mdb_printf("d%u: Mirror", MD_MIN2UNIT(un_self_id)); - if (verbose) { - mdb_printf("\t< %p::print mm_unit_t >\n", un_addr); - } else { - mdb_printf("\t< %p >\n", un_addr); - } - mdb_inc_indent(2); - mdb_printf("Size: %llu blocks\n", un_total_blocks); - - /* - * Dump out the current un_dirty_bm together with its size - * Also, attempt to Run Length encode the bitmap to see if this - * is a viable option - */ - num_rr = mm.un_rrd_num; - rr_blksize = mm.un_rrd_blksize; - - un_dbm = (uintptr_t)mmp->un_dirty_bm; - un_gcbm = (uintptr_t)mmp->un_goingclean_bm; - - mdb_printf("RR size: %lu bits\n", num_rr); - mdb_printf("RR block size: %lu blocks\n", rr_blksize); - - rr_dirty_bm = (unsigned char *)mdb_alloc(num_rr, UM_SLEEP|UM_GC); - rr_goingclean_bm = (unsigned char *)mdb_alloc(num_rr, UM_SLEEP|UM_GC); - comp_rr = (ushort_t *)mdb_alloc(num_rr * sizeof (ushort_t), - UM_SLEEP|UM_GC); - - if (mdb_vread(rr_dirty_bm, num_rr, un_dbm) == -1) { - mdb_warn("failed to read un_dirty_bm at %p\n", un_dbm); - return; - } - if (mdb_vread(rr_goingclean_bm, num_rr, un_gcbm) == -1) { - mdb_warn("failed to read un_goingclean_bm at %p\n", un_gcbm); - return; - } - - print_comp_bm(rr_dirty_bm, num_rr, comp_rr, "dirty"); - - print_comp_bm(rr_goingclean_bm, num_rr, comp_rr, "clean"); - - /* - * find the sub mirrors, search through each metadevice looking - * at the un_parent. - */ - ptr = mdset[setno].s_un; - - data.un_self_id = un_self_id; - data.un_nsm = 0; - data.mm_un_nsm = mm_un_nsm; - - if (mdb_pwalk("md_units", (mdb_walk_cb_t)print_submirror, &data, - (uintptr_t)ptr) == -1) { - mdb_warn("unable to walk units\n"); - return; - } - - mdb_dec_indent(2); -} - -void -print_raid(void *un_addr, void *mdcptr, uint_t verbose) -{ - mr_unit_t mr; - minor_t un_self_id; - diskaddr_t un_total_blocks; - mdc_unit_t mdc_sc; - void **ptr; - void *addr; - int setno = 0; - int i; - minor_t sc_un_self_id; - md_parent_t sc_parent; - diskaddr_t sc_total_blocks; - - /* read in the device */ - if (mdb_vread(&mr, sizeof (mr_unit_t), (uintptr_t)un_addr) == -1) { - mdb_warn("failed to read mr_unit_t at %p\n", un_addr); - return; - } - un_self_id = ((mdc_unit_t *)mdcptr)->un_self_id; - un_total_blocks = ((mdc_unit_t *)mdcptr)->un_total_blocks; - setno = MD_MIN2SET(un_self_id); - print_setname(setno); - - mdb_printf("d%u: Raid", MD_MIN2UNIT(un_self_id)); - if (verbose) { - mdb_printf("\t< %p ::print mr_unit_t>\n", un_addr); - } else { - mdb_printf("\t< %p >\n", un_addr); - } - mdb_inc_indent(2); - mdb_printf("Size: %llu\n", un_total_blocks); - - /* - * find the sub components if any, search through each metadevice - * looking at the un_parent. - */ - ptr = mdset[setno].s_un; - for (i = 0; i < md_nunits; i++, ptr++) { - if (mdb_vread(&addr, sizeof (void *), (uintptr_t)ptr) == -1) { - mdb_warn("failed to read addr at %p\n", ptr); - continue; - } - if (addr != NULL) { - if (mdb_vread(&mdc_sc, sizeof (mdc_unit_t), - (uintptr_t)addr) == -1) { - mdb_warn("failed to read mdc_unit_t at %p", - un_addr); - continue; - } - sc_parent = mdc_sc.un_parent; - sc_un_self_id = mdc_sc.un_self_id; - sc_total_blocks = mdc_sc.un_total_blocks; - if (sc_parent == un_self_id) { - /* this is one of the sub components */ - mdb_printf("Subdevice %u ", - MD_MIN2UNIT(sc_un_self_id)); - mdb_printf("Size: %llu\n", sc_total_blocks); - } - } - } - mdb_dec_indent(2); -} - -void -print_sp(void *un_addr, void *mdcptr, uint_t verbose) -{ - mp_unit_t mp; - minor_t un_self_id; - diskaddr_t un_total_blocks; - int setno = 0; - uintptr_t extaddr; - int i; - - /* read in the device */ - if (mdb_vread(&mp, sizeof (mp_unit_t), (uintptr_t)un_addr) == -1) { - mdb_warn("failed to read mp_unit_t at %p\n", un_addr); - return; - } - un_self_id = ((mdc_unit_t *)mdcptr)->un_self_id; - un_total_blocks = ((mdc_unit_t *)mdcptr)->un_total_blocks; - setno = MD_MIN2SET(un_self_id); - print_setname(setno); - - mdb_printf("d%u: Soft Partition", MD_MIN2UNIT(un_self_id)); - if (verbose) { - mdb_printf("\t< %p ::print mp_unit_t >\n", un_addr); - } else { - mdb_printf("\t< %p >\n", un_addr); - } - mdb_inc_indent(2); - mdb_printf("Size: %llu\n", un_total_blocks); - mdb_inc_indent(2); - mdb_printf("Extent\tStart Block\tBlock count\n"); - extaddr = (uintptr_t)un_addr + sizeof (mp_unit_t) - sizeof (mp_ext_t); - for (i = 0; i < mp.un_numexts; i++) { - mp_ext_t mpext; - - if (mdb_vread(&mpext, sizeof (mp_ext_t), extaddr) == -1) { - mdb_warn("failed to read mp_ext_t at %p\n", extaddr); - return; - } - mdb_printf(" %d \t %llu\t %llu\n", - i, mpext.un_poff, mpext.un_len); - extaddr += sizeof (mp_ext_t); - } - mdb_dec_indent(2); - mdb_dec_indent(2); - -} - -void -print_trans(void *un_addr, void *mdcptr, uint_t verbose) -{ - mt_unit_t mt; - minor_t un_self_id; - int setno = 0; - - /* read in the device */ - if (mdb_vread(&mt, sizeof (mt_unit_t), (uintptr_t)un_addr) == -1) { - mdb_warn("failed to read mt_unit_t at %p\n", un_addr); - return; - } - un_self_id = ((mdc_unit32_od_t *)mdcptr)->un_self_id; - setno = MD_MIN2SET(un_self_id); - print_setname(setno); - - mdb_printf("d%u: Trans", MD_MIN2UNIT(un_self_id)); - if (verbose) { - mdb_printf("\t< %p ::print mt_unit_t>\n", un_addr); - } else { - mdb_printf("\t< %p >\n", un_addr); - } - -} - -void -print_device(void *un_addr, void *mdcptr, uint_t verbose) -{ - u_longlong_t un_type; - - un_type = ((mdc_unit_t *)mdcptr)->un_type; - - switch (un_type) { - case MD_DEVICE: /* stripe/concat */ - print_stripe(un_addr, mdcptr, verbose); - break; - case MD_METAMIRROR: - print_mirror(un_addr, mdcptr, verbose); - break; - case MD_METATRANS: - print_trans(un_addr, mdcptr, verbose); - break; - case MD_METARAID: - print_raid(un_addr, mdcptr, verbose); - break; - case MD_METASP: - print_sp(un_addr, mdcptr, verbose); - break; - case MD_UNDEFINED: - mdb_warn("undefined metadevice at %p\n", un_addr); - break; - default: - mdb_warn("invalid metadevice at %p\n", un_addr); - break; - } -} - -/* ARGSUSED */ -/* - * usage: ::metastat [-v] - */ -int -metastat(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) -{ - mdc_unit_t mdc; - uintptr_t un_addr; - uint_t verbose = FALSE; - - snarf_sets(); - - if (mdb_getopts(argc, argv, 'v', MDB_OPT_SETBITS, TRUE, &verbose, NULL) - != argc) { - return (DCMD_USAGE); - } - - if (!(flags & DCMD_ADDRSPEC)) { - if (mdb_walk_dcmd("md_units", "metastat", argc, - argv) == -1) { - mdb_warn("failed to walk units"); - return (DCMD_ERR); - } - return (DCMD_OK); - } - if (!(flags & DCMD_LOOP)) { - /* user passed set addr */ - if (mdb_pwalk_dcmd("md_units", "metastat", argc, - argv, addr) == -1) { - mdb_warn("failed to walk units"); - return (DCMD_ERR); - } - return (DCMD_OK); - } - - if (mdb_vread(&un_addr, sizeof (void *), addr) == -1) { - mdb_warn("failed to read un_addr at %p", addr); - return (DCMD_ERR); - } - - if (un_addr != NULL) { - if (mdb_vread(&mdc, sizeof (mdc_unit_t), un_addr) == -1) { - mdb_warn("failed to read mdc_unit_t at %p", un_addr); - return (DCMD_ERR); - } - print_device((void *)un_addr, (void *)&mdc, verbose); - mdb_dec_indent(2); - } - return (DCMD_OK); -} diff --git a/usr/src/cmd/mdb/common/modules/md/set_io_cnt.c b/usr/src/cmd/mdb/common/modules/md/set_io_cnt.c deleted file mode 100644 index 2b375b950327..000000000000 --- a/usr/src/cmd/mdb/common/modules/md/set_io_cnt.c +++ /dev/null @@ -1,134 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include "mdinclude.h" - -extern int active_sets; -md_set_io_t md_setio[MD_MAXSETS]; - -/* IO array status: io_state */ -static const mdb_bitmask_t io_state_bits[] = { - { "MD_SET_ACTIVE", MD_SET_ACTIVE, MD_SET_ACTIVE }, - { "MD_SET_RELEASE", MD_SET_RELEASE, MD_SET_RELEASE }, - { NULL, 0, 0 } -}; - - -void -set_io_help(void) -{ - mdb_printf("::set_io [-s name] [-a num] [-m num]\n"); - mdb_printf("-a num - print out num elements in the md_set_io array\n"); - mdb_printf("-s name - print out the information for set named name\n"); - mdb_printf("-m num - only print out element num\n"); -} - -/* ARGSUSED */ -int -set_io(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) -{ - GElf_Sym setiosym; - uint64_t i; - size_t offset = 0; - uint64_t opt_a = 0; /* up to active_sets */ - char *opt_s = (char *)NULL; /* a named set */ - uint64_t opt_m = 0; /* array element */ - int setno = 0; - int argnum = 0; - - argnum = mdb_getopts(argc, argv, - 'a', MDB_OPT_UINT64, &opt_a, - 's', MDB_OPT_STR, &opt_s, - 'm', MDB_OPT_UINT64, &opt_m, NULL); - - if (argnum != argc) { - mdb_printf("invalid arguments\n"); - return (DCMD_USAGE); - } - - if ((opt_s != 0) && (opt_m != 0)) { - mdb_printf("-s and -m cannot both be specified\n"); - return (DCMD_USAGE); - } - - snarf_sets(); - - if (opt_a == 0) - opt_a = active_sets; - - /* find the array */ - if (mdb_lookup_by_name("md_set_io", &setiosym) == -1) { - mdb_warn("SVM - no set io counts set\n"); - return (DCMD_ERR); - } - - if (md_verbose) { - mdb_printf("Base address for the md_set_io array: %p\n", - setiosym.st_value); - } - if (opt_s != NULL) { - setno = findset(opt_s); - if (setno == -1) { - mdb_warn("no such set: %s\n", opt_s); - return (DCMD_ERR); - } - opt_m = setno; - } - - if (opt_m > 0) { - mdb_printf("%lld]\t%ld\t%ld", opt_m, - md_setio[opt_m].io_cnt, md_setio[opt_m].io_state); - mdb_printf("\t%hb\n", io_state_bits); - return (DCMD_OK); - } - - if (opt_a == 0) { - mdb_warn("No active set!\n"); - return (DCMD_ERR); - } - - for (i = 0; i < opt_a; i++) { - if (mdb_vread(&md_setio[i], sizeof (md_set_io_t), - setiosym.st_value + offset) == -1) { - mdb_warn("failed to read md_set_io_t at 0x%x\n", - setiosym.st_value + offset); - } - mdb_printf("%lld]\t%ld\t%ld", i, md_setio[i].io_cnt, - md_setio[i].io_state); - mdb_printf("\t%hb", io_state_bits); - if (md_verbose) { - mdb_printf(" - io_cnt: %p", - setiosym.st_value + offset + sizeof (kmutex_t) + - sizeof (kcondvar_t)); - mdb_printf(" %d", sizeof (md_set_io_t)); - } - mdb_printf("\n"); - offset += sizeof (md_set_io_t); - } - return (DCMD_OK); -} diff --git a/usr/src/cmd/mdb/common/modules/md/simple_de_ic.c b/usr/src/cmd/mdb/common/modules/md/simple_de_ic.c deleted file mode 100644 index a157ec16b191..000000000000 --- a/usr/src/cmd/mdb/common/modules/md/simple_de_ic.c +++ /dev/null @@ -1,59 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include "mdinclude.h" - -/* ARGSUSED */ -int -simple_de_ic(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) -{ - mddb_de_ic_t value; - char *s = "addr+"; - uint_t noaddr = 0; - - if (!(flags & DCMD_ADDRSPEC)) { - noaddr = 1; - } else { - if (mdb_vread(&value, sizeof (mddb_de_ic_t), addr) != - sizeof (mddb_de_ic_t)) { - mdb_warn("failed to read mddb_de_ic_t at %ll#r\n", - addr); - return (DCMD_ERR); - } - mdb_printf(" at %#lr", addr); - } - - if (noaddr) { - mdb_printf("\n\tde_recid%20s%-25#r\n", - s, (uintptr_t)&value.de_recid - (uintptr_t)&value); - } else { - mdb_printf("\n\tde_recid: %28#r\n", value.de_recid); - } - - return (DCMD_OK); -} diff --git a/usr/src/cmd/mdb/common/modules/md/snarf.c b/usr/src/cmd/mdb/common/modules/md/snarf.c deleted file mode 100644 index b8e401694af0..000000000000 --- a/usr/src/cmd/mdb/common/modules/md/snarf.c +++ /dev/null @@ -1,205 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include "mdinclude.h" - -/* array of the sets */ -md_set_t mdset[MD_MAXSETS]; -mddb_set_t set_dbs[MD_MAXSETS]; -/* for the addresses of each set above */ -uintptr_t mdset_addrs[MD_MAXSETS]; - -unit_t md_nunits = 0; -set_t md_nsets = 0; -int snarfed = 0; -int active_sets = 0; - -/* - * routines to snarf the metaset information - * - * usage: ::dumpsetaddr [-s setname] - */ -/* ARGSUSED */ -int -dumpsetaddr(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) -{ - int i; - int setno; - char *s_opt = (char *)NULL; - - if (mdb_getopts(argc, argv, 's', MDB_OPT_STR, &s_opt, - NULL) != argc) { - /* left over arguments ?? */ - return (DCMD_USAGE); - } - - if (!snarfed) { - mdb_warn("No sets read in yet - try ::metaset\n"); - return (DCMD_ERR); - } - if (argc == 0) { /* dump all sets */ - for (i = 0; i < md_nsets; i++) { - if (mdset_addrs[i] != (uintptr_t)0) - mdb_printf("%d %p\n", i, mdset_addrs[i]); - } - } else { - setno = findset(s_opt); - if (setno == -1) { - mdb_warn("no such set: %s\n", s_opt); - return (DCMD_ERR); - } - if (mdset_addrs[setno] != (uintptr_t)0) - mdb_printf("%d %p\n", setno, - mdset_addrs[setno]); - } - return (DCMD_OK); -} - - -/* - * Function: snarf_ui_anchor - * Purpose: to read in the s_ui part of a metaset. - * Returns: - number of configured metadevices - * -1 - not configured - */ -int -snarf_ui_anchor(int i) -{ - int j; - int num_found = 0; - void **ptr = mdset[i].s_ui; - void *addr; - - for (j = 0; j < md_nunits; j++) { - if (mdb_vread(&addr, sizeof (void *), (uintptr_t)ptr) == -1) { - ptr++; - continue; - } - if (addr != NULL) { - num_found++; - } - ptr++; - } - return (num_found); -} - -/* - * Function: snarf_sets - * Purpose: Entry point into the module that reads the kernel's version - * of the SVM configuration. - * First of all populates the mdset array and then for each - * component that makes up an "md_set_t" reads it in, via calls - * to other functions. - */ -int -snarf_sets(void) -{ - GElf_Sym setsym; - GElf_Sym nmdsym; - GElf_Sym mdsetsym; - int i; - size_t offset = 0; - - if (snarfed) - return (DCMD_OK); - - /* find the SVM hook - md_set */ - if (mdb_lookup_by_name("md_set", &setsym) == -1) { - mdb_warn("SVM is not configured on this machine\n"); - return (DCMD_ERR); - } - /* find out how many metadevices are configured per set */ - if (mdb_lookup_by_name("md_nunits", &nmdsym) == -1) { - mdb_warn("unable to find md_nunits\n"); - return (DCMD_ERR); - } - if (mdb_vread(&md_nunits, sizeof (unit_t), nmdsym.st_value) == -1) { - mdb_warn("failed to read md_nunits at %p\n", nmdsym.st_value); - return (DCMD_ERR); - } - - if (mdb_lookup_by_name("md_nsets", &mdsetsym) == -1) { - mdb_warn("unable to find md_nsets\n"); - return (DCMD_ERR); - } - if (mdb_vread(&md_nsets, sizeof (set_t), mdsetsym.st_value) == -1) { - mdb_warn("failed to read md_nsets at %p\n", mdsetsym.st_value); - return (DCMD_ERR); - } - - if (md_verbose) { - mdb_printf("mdset array addr: 0x%lx size is: 0x%lx\n", - (uintptr_t)setsym.st_value, sizeof (md_set_t)); - } - - offset = setsym.st_value; - - for (i = 0; i < md_nsets; i++) { - if (mdb_vread(&mdset[i], sizeof (md_set_t), offset) == -1) { - mdb_warn("failed to read md_set_t at 0x%lx\n", - (uintptr_t)(setsym.st_value + offset)); - } - /* Should check the status flags */ - if (mdset[i].s_status & MD_SET_NM_LOADED) { - if (md_verbose) - mdb_printf("Set %d (0x%lx) has a name space\n", - i, (uintptr_t)(setsym.st_value + offset)); - } else { - offset += sizeof (md_set_t); - continue; - } - - if (mdb_vread(&set_dbs[i], sizeof (mddb_set_t), - (uintptr_t)mdset[i].s_db) == -1) { - if (mdset[i].s_db != 0) { - mdb_warn("failed to read mddb_set_t at 0x%p\n", - mdset[i].s_db); - return (DCMD_ERR); - } else { - mdb_warn("%d - no set configured\n", i); - return (DCMD_ERR); - } - } - active_sets++; - - mdset_addrs[i] = (uintptr_t)(offset); - - (void) snarf_ui_anchor(i); - - /* have the set now read in the various bits and pieces */ - offset += sizeof (md_set_t); - } - snarfed = 1; - - if (md_verbose) { - mdb_printf("Number of active sets: %d\n", active_sets); - mdb_printf("Max number of metadevices: %u\n", md_nunits); - mdb_printf("Max number of sets: %u\n", md_nsets); - } - return (DCMD_OK); -} diff --git a/usr/src/cmd/mdb/common/modules/md/walk_didnm.c b/usr/src/cmd/mdb/common/modules/md/walk_didnm.c deleted file mode 100644 index 4205d8d748bf..000000000000 --- a/usr/src/cmd/mdb/common/modules/md/walk_didnm.c +++ /dev/null @@ -1,90 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include "mdinclude.h" - -/* - * walk the device id namespace - */ -int -didnamespace_walk_init(mdb_walk_state_t *wsp) -{ - uintptr_t addr; - - snarf_sets(); - wsp->walk_data = mdb_alloc(sizeof (int), UM_SLEEP); - /* walk_data will hold the set number of the set being printed */ - *((int *)wsp->walk_data) = 0; - addr = (uintptr_t)mdset[0].s_did_nm; - - wsp->walk_addr = addr; - return (WALK_NEXT); -} - -int -didnamespace_walk_step(mdb_walk_state_t *wsp) -{ - int status; - struct nm_header_hdr hdr; - - if (wsp->walk_addr == NULL) { - if (*((int *)wsp->walk_data) < md_nsets) { - *((int *)wsp->walk_data) += 1; - wsp->walk_addr = - (uintptr_t)mdset[*((int *)wsp->walk_data)].s_did_nm; - if (wsp->walk_addr == NULL) - return (WALK_NEXT); - } else { - return (WALK_DONE); - } - } - - mdb_printf("DID Namespace for set number %d\n", - *((int *)wsp->walk_data)); - if (mdb_vread(&hdr, sizeof (struct nm_header_hdr), wsp->walk_addr) != - sizeof (struct nm_header_hdr)) { - mdb_warn("failed to read nm_header_hdr at %p", - wsp->walk_addr); - return (WALK_DONE); - } - - - - status = wsp->walk_callback(wsp->walk_addr, (&hdr)->hh_header, - wsp->walk_cbdata); - - *((int *)wsp->walk_data) += 1; - wsp->walk_addr = (uintptr_t)mdset[*((int *)wsp->walk_data)].s_did_nm; - return (status); -} - -void -didnamespace_walk_fini(mdb_walk_state_t *wsp) -{ - mdb_free(wsp->walk_data, sizeof (int)); -} diff --git a/usr/src/cmd/mdb/common/modules/md/walk_directory_block.c b/usr/src/cmd/mdb/common/modules/md/walk_directory_block.c deleted file mode 100644 index 64b016c8257f..000000000000 --- a/usr/src/cmd/mdb/common/modules/md/walk_directory_block.c +++ /dev/null @@ -1,71 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include "mdinclude.h" - -/* - * Generic walker svm mddb directory block walker. - */ -int -mddb_db_walk_init(mdb_walk_state_t *wsp) -{ - - /* Must have a start addr. */ - if (wsp->walk_addr == NULL) { - mdb_warn("start address required\n"); - return (WALK_ERR); - } - return (WALK_NEXT); -} - - -/* - * svm mddb directory block walker step routine. - */ -int -mddb_db_walk_step(mdb_walk_state_t *wsp) -{ - mddb_db_t db_entry; - int status; - - if (wsp->walk_addr == NULL) - return (WALK_DONE); - - if (mdb_vread(&db_entry, sizeof (mddb_db_t), wsp->walk_addr) == -1) { - mdb_warn("failed to read mddb_db_t at %p", - wsp->walk_addr); - return (WALK_ERR); - } - - status = wsp->walk_callback(wsp->walk_addr, (&db_entry)->db_next, - wsp->walk_cbdata); - - wsp->walk_addr = (uintptr_t)db_entry.db_next; - - return (status); -} diff --git a/usr/src/cmd/mdb/common/modules/md/walk_directory_entry.c b/usr/src/cmd/mdb/common/modules/md/walk_directory_entry.c deleted file mode 100644 index aa75cb2d1252..000000000000 --- a/usr/src/cmd/mdb/common/modules/md/walk_directory_entry.c +++ /dev/null @@ -1,71 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include "mdinclude.h" - -/* - * Generic walker svm mddb directory entry walker. - */ -int -mddb_de_ic_walk_init(mdb_walk_state_t *wsp) -{ - - /* Must have a start addr. */ - if (wsp->walk_addr == NULL) { - mdb_warn("start address required\n"); - return (WALK_ERR); - } - return (WALK_NEXT); -} - - -/* - * svm mddb directory entry walker step routine. - */ -int -mddb_de_ic_walk_step(mdb_walk_state_t *wsp) -{ - mddb_de_ic_t de_entry; - int status; - - /* Check if we're at the last element */ - if (wsp->walk_addr == NULL) - return (WALK_DONE); - - if (mdb_vread(&de_entry, sizeof (mddb_de_ic_t), wsp->walk_addr) == -1) { - mdb_warn("failed to read mddb_de_ic_t at %p", wsp->walk_addr); - return (WALK_ERR); - } - - status = wsp->walk_callback(wsp->walk_addr, &de_entry, - wsp->walk_cbdata); - - wsp->walk_addr = (uintptr_t)de_entry.de_next; - - return (status); -} diff --git a/usr/src/cmd/mdb/common/modules/md/walk_hsp.c b/usr/src/cmd/mdb/common/modules/md/walk_hsp.c deleted file mode 100644 index 10dfa129a511..000000000000 --- a/usr/src/cmd/mdb/common/modules/md/walk_hsp.c +++ /dev/null @@ -1,91 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include "mdinclude.h" - -/* - * walk the hotspare pools - */ -/* ARGSUSED */ -int -hotsparepool_walk_init(mdb_walk_state_t *wsp) -{ - uintptr_t addr; - - snarf_sets(); - addr = (uintptr_t)mdset[0].s_hsp; - wsp->walk_data = mdb_alloc(sizeof (int), UM_SLEEP); - /* walk_data hold the number of the set we're walking */ - *((int *)wsp->walk_data) = 0; - mdb_printf("Hotspare Pools for set number 0\n"); - wsp->walk_addr = addr; - return (WALK_NEXT); -} - -int -hotsparepool_walk_step(mdb_walk_state_t *wsp) -{ - int status; - hot_spare_pool_t hsp; - - if (wsp->walk_addr == NULL) { - *((int *)wsp->walk_data) += 1; - if (*((int *)wsp->walk_data) < md_nsets) { - wsp->walk_addr = - (uintptr_t)mdset[*((int *)wsp->walk_data)].s_hsp; - if (wsp->walk_addr == NULL) - return (WALK_NEXT); - mdb_printf("Hotspare Pools for set number %d\n", - *((int *)wsp->walk_data)); - } else { - return (WALK_DONE); - } - } - - if (mdb_vread(&hsp, sizeof (hot_spare_pool_t), wsp->walk_addr) != - sizeof (hot_spare_pool_t)) { - mdb_warn("failed to read hot_spare_pool_t at %p", - wsp->walk_addr); - return (WALK_DONE); - } - - - - status = wsp->walk_callback(wsp->walk_addr, (&hsp)->hsp_next, - wsp->walk_cbdata); - - wsp->walk_addr = (uintptr_t)(&hsp)->hsp_next; - - return (status); -} - -void -hotsparepool_walk_fini(mdb_walk_state_t *wsp) -{ - mdb_free(wsp->walk_data, sizeof (int)); -} diff --git a/usr/src/cmd/mdb/common/modules/md/walk_nm.c b/usr/src/cmd/mdb/common/modules/md/walk_nm.c deleted file mode 100644 index 404c04f43434..000000000000 --- a/usr/src/cmd/mdb/common/modules/md/walk_nm.c +++ /dev/null @@ -1,89 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ -#pragma ident "%Z%%M% %I% %E% SMI" - -#include "mdinclude.h" - -/* - * walk the namespace - */ -/* ARGSUSED */ -int -namespace_walk_init(mdb_walk_state_t *wsp) -{ - uintptr_t addr; - - snarf_sets(); - wsp->walk_data = mdb_alloc(sizeof (int), UM_SLEEP); - /* cycle through sets, start with set 0 */ - *((int *)wsp->walk_data) = 0; - addr = (uintptr_t)mdset[0].s_nm; - - wsp->walk_addr = addr; - return (WALK_NEXT); -} - -int -namespace_walk_step(mdb_walk_state_t *wsp) -{ - int status; - struct nm_header_hdr hdr; - - if (wsp->walk_addr == NULL) { - if (*((int *)wsp->walk_data) < md_nsets) { - *((int *)wsp->walk_data) += 1; - wsp->walk_addr = - (uintptr_t)mdset[*((int *)wsp->walk_data)].s_nm; - if (wsp->walk_addr == NULL) - return (WALK_NEXT); - } else { - return (WALK_DONE); - } - } - - mdb_printf("Namespace for set number %d\n", *((int *)wsp->walk_data)); - if (mdb_vread(&hdr, sizeof (struct nm_header_hdr), wsp->walk_addr) != - sizeof (struct nm_header_hdr)) { - mdb_warn("failed to read nm_header_hdr at %p", - wsp->walk_addr); - return (WALK_DONE); - } - - - - status = wsp->walk_callback(wsp->walk_addr, (&hdr)->hh_header, - wsp->walk_cbdata); - - *((int *)wsp->walk_data) += 1; - wsp->walk_addr = (uintptr_t)mdset[*((int *)wsp->walk_data)].s_nm; - return (status); -} - -void -namespace_walk_fini(mdb_walk_state_t *wsp) -{ - mdb_free(wsp->walk_data, sizeof (int)); -} diff --git a/usr/src/cmd/mdb/common/modules/md/walk_sets.c b/usr/src/cmd/mdb/common/modules/md/walk_sets.c deleted file mode 100644 index 01a50527d2da..000000000000 --- a/usr/src/cmd/mdb/common/modules/md/walk_sets.c +++ /dev/null @@ -1,69 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include "mdinclude.h" - -/* - * walk the sets - */ -/* ARGSUSED */ -int -sets_walk_init(mdb_walk_state_t *wsp) -{ - uintptr_t addr; - - snarf_sets(); - addr = (uintptr_t)mdset[0].s_db; - wsp->walk_data = mdb_alloc(sizeof (int), UM_SLEEP); - /* walk_data will hold the set number of the set being printed */ - *((int *)wsp->walk_data) = 0; - wsp->walk_addr = addr; - return (WALK_NEXT); -} - -int -sets_walk_step(mdb_walk_state_t *wsp) -{ - int status; - - if (*((int *)wsp->walk_data) >= md_nsets) - return (WALK_DONE); - - status = wsp->walk_callback(wsp->walk_addr, NULL, wsp->walk_cbdata); - - *((int *)wsp->walk_data) += 1; - wsp->walk_addr = (uintptr_t)mdset[*((int *)wsp->walk_data)].s_db; - - return (status); -} - -void -sets_walk_fini(mdb_walk_state_t *wsp) -{ - mdb_free(wsp->walk_data, sizeof (int)); -} diff --git a/usr/src/cmd/mdb/common/modules/md/walk_units.c b/usr/src/cmd/mdb/common/modules/md/walk_units.c deleted file mode 100644 index 18f524efdc99..000000000000 --- a/usr/src/cmd/mdb/common/modules/md/walk_units.c +++ /dev/null @@ -1,118 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include "mdinclude.h" - - -typedef struct unit_data { - int nunits; - int do_all; - int setno; -} unit_data_t; - -/* - * walk the units - */ -/* ARGSUSED */ -int -units_walk_init(mdb_walk_state_t *wsp) -{ - uintptr_t addr; - int i; - - snarf_sets(); - wsp->walk_data = mdb_alloc(sizeof (unit_data_t), UM_SLEEP); - /* - * walk_data contains the following information: - * nunits : the number of units of the set we've printed out. - * setno: set number we're printing out the information from. - * do_all: print all the sets on the system or not. - */ - ((unit_data_t *)wsp->walk_data)->nunits = 0; - if (wsp->walk_addr == NULL) { - /* if no address is specified, walk all units of all sets */ - mdb_printf("Units for set number 0\n"); - addr = (uintptr_t)mdset[0].s_un; - wsp->walk_addr = addr; - ((unit_data_t *)wsp->walk_data)->setno = 0; - ((unit_data_t *)wsp->walk_data)->do_all = 1; - } else { - /* walk the specified set */ - ((unit_data_t *)wsp->walk_data)->do_all = 0; - for (i = 0; i < md_nsets; i++) { - if (mdset[i].s_db == (void **)wsp->walk_addr) { - wsp->walk_addr = (uintptr_t)mdset[i].s_un; - ((unit_data_t *)wsp->walk_data)->setno = i; - return (WALK_NEXT); - } - } - } - return (WALK_NEXT); -} - -int -units_walk_step(mdb_walk_state_t *wsp) -{ - int status; - unit_data_t *un = (unit_data_t *)wsp->walk_data; - void **ptr; - - if (un->nunits >= md_nunits) { - un->setno += 1; - if ((un->setno < md_nsets) && (un->do_all == 1)) { - un->nunits = 0; - wsp->walk_addr = (uintptr_t)mdset[un->setno].s_un; - if (wsp->walk_addr != NULL) - mdb_printf("Units for set number %d\n", - un->setno); - } else { - return (WALK_DONE); - } - } - - if (wsp->walk_addr == NULL) { - un->nunits = md_nunits; - return (WALK_NEXT); - } - - status = wsp->walk_callback(wsp->walk_addr, NULL, wsp->walk_cbdata); - - if (status != WALK_DONE) { - ptr = (void **)wsp->walk_addr; - ptr++; - wsp->walk_addr = (uintptr_t)ptr; - un->nunits += 1; - } - return (status); -} - -void -units_walk_fini(mdb_walk_state_t *wsp) -{ - mdb_free(wsp->walk_data, sizeof (unit_data_t)); -} diff --git a/usr/src/cmd/mdb/intel/amd64/md/Makefile b/usr/src/cmd/mdb/intel/amd64/md/Makefile deleted file mode 100644 index 879bc72856a0..000000000000 --- a/usr/src/cmd/mdb/intel/amd64/md/Makefile +++ /dev/null @@ -1,50 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# - -MODULE = md.so -MDBTGT = kvm - -MODSRCS = dumphotspare.c \ - dumpmirror.c \ - dumpnamespace.c \ - findset.c \ - md.c \ - metaset.c \ - metastat.c \ - set_io_cnt.c \ - snarf.c \ - simple_de_ic.c \ - walk_hsp.c \ - walk_sets.c \ - walk_units.c \ - walk_didnm.c \ - walk_nm.c \ - walk_directory_block.c \ - walk_directory_entry.c - -include ../../../../Makefile.cmd -include ../../../../Makefile.cmd.64 -include ../../Makefile.amd64 -include ../../../Makefile.module diff --git a/usr/src/cmd/mdb/intel/ia32/md/Makefile b/usr/src/cmd/mdb/intel/ia32/md/Makefile deleted file mode 100644 index 8531855e44d2..000000000000 --- a/usr/src/cmd/mdb/intel/ia32/md/Makefile +++ /dev/null @@ -1,49 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# - -MODULE = md.so -MDBTGT = kvm - -MODSRCS = dumphotspare.c \ - dumpmirror.c \ - dumpnamespace.c \ - findset.c \ - md.c \ - metaset.c \ - metastat.c \ - set_io_cnt.c \ - snarf.c \ - simple_de_ic.c \ - walk_hsp.c \ - walk_sets.c \ - walk_units.c \ - walk_didnm.c \ - walk_nm.c \ - walk_directory_block.c \ - walk_directory_entry.c - -include ../../../../Makefile.cmd -include ../../Makefile.ia32 -include ../../../Makefile.module diff --git a/usr/src/cmd/mdb/sparc/v9/md/Makefile b/usr/src/cmd/mdb/sparc/v9/md/Makefile deleted file mode 100644 index d0ad7e39060b..000000000000 --- a/usr/src/cmd/mdb/sparc/v9/md/Makefile +++ /dev/null @@ -1,51 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# - -MODULE = md.so -MDBTGT = kvm - -MODSRCS = dumphotspare.c \ - dumpmirror.c \ - dumpnamespace.c \ - findset.c \ - md.c \ - metaset.c \ - metastat.c \ - set_io_cnt.c \ - snarf.c \ - simple_de_ic.c \ - walk_hsp.c \ - walk_sets.c \ - walk_units.c \ - walk_didnm.c \ - walk_nm.c \ - walk_directory_block.c \ - walk_directory_entry.c - - -include ../../../../Makefile.cmd -include ../../../../Makefile.cmd.64 -include ../../Makefile.sparcv9 -include ../../../Makefile.module diff --git a/usr/src/cmd/rcm_daemon/Makefile.com b/usr/src/cmd/rcm_daemon/Makefile.com index 4b12d1cd279b..b060ee92a190 100644 --- a/usr/src/cmd/rcm_daemon/Makefile.com +++ b/usr/src/cmd/rcm_daemon/Makefile.com @@ -18,8 +18,10 @@ # # CDDL HEADER END # + # # Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. +# Copyright 2016 Nexenta Systems, Inc. # include ../../Makefile.cmd @@ -32,7 +34,7 @@ RCM_SRC = \ $(COMMON)/rcm_impl.c \ $(COMMON)/rcm_subr.c \ $(COMMON)/rcm_lock.c \ - $(COMMON)/rcm_script.c + $(COMMON)/rcm_script.c RCM_OBJ = \ rcm_event.o \ @@ -56,7 +58,6 @@ COMMON_MOD_SRC = \ $(COMMON)/pool_rcm.c \ $(COMMON)/mpxio_rcm.c \ $(COMMON)/ip_anon_rcm.c \ - $(COMMON)/svm_rcm.c \ $(COMMON)/bridge_rcm.c sparc_MOD_SRC = $(COMMON)/ttymux_rcm.c @@ -82,7 +83,6 @@ COMMON_MOD_OBJ = \ pool_rcm.o \ mpxio_rcm.o \ ip_anon_rcm.o \ - svm_rcm.o \ bridge_rcm.o sparc_MOD_OBJ = ttymux_rcm.o @@ -103,7 +103,6 @@ COMMON_RCM_MODS = \ SUNW_pool_rcm.so \ SUNW_mpxio_rcm.so \ SUNW_ip_anon_rcm.so \ - SUNW_svm_rcm.so \ SUNW_bridge_rcm.so sparc_RCM_MODS = SUNW_ttymux_rcm.so @@ -130,9 +129,8 @@ rcm_daemon := LDFLAGS += $(MAPFILES:%=-M%) LINTFLAGS += -u -erroff=E_FUNC_ARG_UNUSED -LDLIBS_MODULES = +LDLIBS_MODULES = SUNW_pool_rcm.so := LDLIBS_MODULES += -L$(ROOT)/usr/lib -lpool -SUNW_svm_rcm.so := LDLIBS_MODULES += -L$(ROOT)/usr/lib -lmeta SUNW_network_rcm.so := LDLIBS_MODULES += -L$(ROOT)/lib -ldladm SUNW_vlan_rcm.so := LDLIBS_MODULES += -L$(ROOT)/lib -ldladm SUNW_vnic_rcm.so := LDLIBS_MODULES += -L$(ROOT)/lib -ldladm diff --git a/usr/src/cmd/rcm_daemon/common/svm_rcm.c b/usr/src/cmd/rcm_daemon/common/svm_rcm.c deleted file mode 100644 index 13a5933ca8ab..000000000000 --- a/usr/src/cmd/rcm_daemon/common/svm_rcm.c +++ /dev/null @@ -1,3124 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "rcm_module.h" - -/* - * This module is the RCM Module for SVM. The policy adopted by this module - * is to block offline requests for any SVM resource that is in use. A - * resource is considered to be in use if it contains a metadb or if it is - * a non-errored component of a metadevice that is open. - * - * The module uses the library libmeta to access the current state of the - * metadevices. On entry, and when svm_register() is called, the module - * builds a cache of all of the SVM resources and their dependencies. Each - * metadevice has an entry of type deventry_t which is accessed by a hash - * function. When the cache is built each SVM resource is registered with - * the RCM framework. The check_device code path uses meta_invalidate_name to - * ensure that the caching in libmeta will not conflict with the cache - * we build within this code. - * - * When an RCM operation occurs that affects a registered SVM resource, the RCM - * framework will call the appropriate routine in this module. The cache - * entry will be found and if the resource has dependants, a callback will - * be made into the RCM framework to pass the request on to the dependants, - * which may themselves by SVM resources. - * - * Locking: - * The cache is protected by a mutex - */ - -/* - * Private constants - */ - -/* - * Generic Messages - */ -#define MSG_UNRECOGNIZED gettext("SVM: \"%s\" is not a SVM resource") -#define MSG_NODEPS gettext("SVM: can't find dependents") -#define MSG_NORECACHE gettext("SVM: WARNING: couldn't re-cache.") -#define MSG_OPENERR gettext("SVM: can't open \"%s\"") -#define MSG_CACHEFAIL gettext("SVM: can't malloc cache") - -#define ERR_UNRECOGNIZED gettext("unrecognized SVM resource") -#define ERR_NODEPS gettext("can't find SVM resource dependents") - -/* - * Macros to produce a quoted string containing the value of a preprocessor - * macro. For example, if SIZE is defined to be 256, VAL2STR(SIZE) is "256". - * This is used to construct format strings for scanf-family functions below. - */ -#define QUOTE(x) #x -#define VAL2STR(x) QUOTE(x) - -typedef enum { - SVM_SLICE = 0, - SVM_STRIPE, - SVM_CONCAT, - SVM_MIRROR, - SVM_RAID, - SVM_TRANS, - SVM_SOFTPART, - SVM_HS -} svm_type_t; - -/* Hash table parameters */ -#define HASH_DEFAULT 251 - -/* Hot spare pool users */ -typedef struct hspuser { - struct hspuser *next; /* next user */ - char *hspusername; /* name */ - dev_t hspuserkey; /* key */ -} hspuser_t; - -/* Hot spare pool entry */ -typedef struct hspentry { - struct hspentry *link; /* link through all hsp entries */ - struct hspentry *next; /* next hsp entry for a slice */ - char *hspname; /* name */ - hspuser_t *hspuser; /* first hsp user */ -} hspentry_t; - -/* Hash table entry */ -typedef struct deventry { - struct deventry *next; /* next entry with same hash */ - svm_type_t devtype; /* device type */ - dev_t devkey; /* key */ - char *devname; /* name in /dev */ - char *devicesname; /* name in /devices */ - struct deventry *dependent; /* 1st dependent */ - struct deventry *next_dep; /* next dependent */ - struct deventry *antecedent; /* antecedent */ - hspentry_t *hsp_list; /* list of hot spare pools */ - int flags; /* flags */ -} deventry_t; - -/* flag values */ -#define REMOVED 0x1 -#define IN_HSP 0x2 -#define TRANS_LOG 0x4 -#define CONT_SOFTPART 0x8 -#define CONT_METADB 0x10 - -/* - * Device redundancy flags. If the device can be removed from the - * metadevice configuration then it is considered a redundant device, - * otherwise not. - */ -#define NOTINDEVICE -1 -#define NOTREDUNDANT 0 -#define REDUNDANT 1 - -/* Cache */ -typedef struct cache { - deventry_t **hashline; /* hash table */ - int32_t size; /* sizer of hash table */ - uint32_t registered; /* cache regsitered */ -} cache_t; - -/* - * Forward declarations of private functions - */ - -static int svm_register(rcm_handle_t *hd); -static int svm_unregister(rcm_handle_t *hd); -static int svm_unregister_device(rcm_handle_t *hd, deventry_t *d); -static deventry_t *cache_dependent(cache_t *cache, char *devname, int devflags, - deventry_t *dependents); -static deventry_t *cache_device(cache_t *cache, char *devname, - svm_type_t devtype, md_dev64_t devkey, int devflags); -static hspentry_t *find_hsp(char *hspname); -static hspuser_t *add_hsp_user(char *hspname, deventry_t *deventry); -static hspentry_t *add_hsp(char *hspname, deventry_t *deventry); -static void free_names(mdnamelist_t *nlp); -static int cache_all_devices(cache_t *cache); -static int cache_hsp(cache_t *cache, mdhspnamelist_t *nlp, md_hsp_t *hsp); -static int cache_trans(cache_t *cache, mdnamelist_t *nlp, md_trans_t *trans); -static int cache_mirror(cache_t *cache, mdnamelist_t *nlp, - md_mirror_t *mirror); -static int cache_raid(cache_t *cache, mdnamelist_t *nlp, md_raid_t *raid); -static int cache_stripe(cache_t *cache, mdnamelist_t *nlp, - md_stripe_t *stripe); -static int cache_sp(cache_t *cache, mdnamelist_t *nlp, md_sp_t *soft_part); -static int cache_all_devices_in_set(cache_t *cache, mdsetname_t *sp); -static cache_t *create_cache(); -static deventry_t *create_deventry(char *devname, svm_type_t devtype, - md_dev64_t devkey, int devflags); -static void cache_remove(cache_t *cache, deventry_t *deventry); -static deventry_t *cache_lookup(cache_t *cache, char *devname); -static void cache_sync(rcm_handle_t *hd, cache_t **cachep); -static char *cache_walk(cache_t *cache, uint32_t *i, deventry_t **hashline); -static void free_cache(cache_t **cache); -static void free_deventry(deventry_t **deventry); -static uint32_t hash(uint32_t h, char *s); -static void svm_register_device(rcm_handle_t *hd, char *devname); -static int add_dep(int *ndeps, char ***depsp, deventry_t *deventry); -static int get_dependents(deventry_t *deventry, char *** dependentsp); -char *add_to_usage(char ** usagep, char *string); -char *add_to_usage_fmt(char **usagep, char *fmt, char *string); -static int is_open(dev_t devkey); -static int svm_offline(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags, - char **errorp, rcm_info_t **infop); -static int svm_online(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags, - char **errorp, rcm_info_t **infop); -static int svm_get_info(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags, - char **usagep, char **errorp, nvlist_t *props, rcm_info_t **infop); -static int svm_suspend(rcm_handle_t *hd, char *rsrc, id_t id, - timespec_t *interval, uint_t flags, char **errorp, - rcm_info_t **infop); -static int svm_resume(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags, - char **errorp, rcm_info_t **infop); -static int svm_remove(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags, - char **errorp, rcm_info_t **infop); -static int check_device(deventry_t *deventry); -static int check_mirror(mdsetname_t *sp, mdname_t *np, md_error_t *ep); - -/* - * Module-Private data - */ -static struct rcm_mod_ops svm_ops = -{ - RCM_MOD_OPS_VERSION, - svm_register, - svm_unregister, - svm_get_info, - svm_suspend, - svm_resume, - svm_offline, - svm_online, - svm_remove, - NULL, - NULL, - NULL -}; - -static cache_t *svm_cache = NULL; -static mutex_t svm_cache_lock; -static hspentry_t *hsp_head = NULL; - -/* - * Module Interface Routines - */ - -/* - * rcm_mod_init() - * - * Create a cache, and return the ops structure. - * Input: None - * Return: rcm_mod_ops structure - */ -struct rcm_mod_ops * -rcm_mod_init() -{ - /* initialize the lock mutex */ - if (mutex_init(&svm_cache_lock, USYNC_THREAD, NULL)) { - rcm_log_message(RCM_ERROR, - gettext("SVM: can't init mutex")); - return (NULL); - } - - /* need to initialize the cluster library to avoid seg faults */ - if (sdssc_bind_library() == SDSSC_ERROR) { - rcm_log_message(RCM_ERROR, - gettext("SVM: Interface error with libsds_sc.so," - " aborting.")); - return (NULL); - } - - /* Create a cache */ - if ((svm_cache = create_cache()) == NULL) { - rcm_log_message(RCM_ERROR, - gettext("SVM: module can't function, aborting.")); - return (NULL); - } - - /* Return the ops vectors */ - return (&svm_ops); -} - -/* - * rcm_mod_info() - * - * Return a string describing this module. - * Input: None - * Return: String - * Locking: None - */ -const char * -rcm_mod_info() -{ - return (gettext("Solaris Volume Manager module 1.9")); -} - -/* - * rcm_mod_fini() - * - * Destroy the cache and mutex - * Input: None - * Return: RCM_SUCCESS - * Locking: None - */ -int -rcm_mod_fini() -{ - (void) mutex_lock(&svm_cache_lock); - if (svm_cache) { - free_cache(&svm_cache); - } - (void) mutex_unlock(&svm_cache_lock); - (void) mutex_destroy(&svm_cache_lock); - return (RCM_SUCCESS); -} - -/* - * svm_register() - * - * Make sure the cache is properly sync'ed, and its registrations are in - * order. - * - * Input: - * rcm_handle_t *hd - * Return: - * RCM_SUCCESS - * Locking: the cache is locked throughout the execution of this routine - * because it reads and possibly modifies cache links continuously. - */ -static int -svm_register(rcm_handle_t *hd) -{ - uint32_t i = 0; - deventry_t *l = NULL; - char *devicename; - - - rcm_log_message(RCM_TRACE1, "SVM: register\n"); - /* Guard against bad arguments */ - assert(hd != NULL); - - /* Lock the cache */ - (void) mutex_lock(&svm_cache_lock); - - /* If the cache has already been registered, then just sync it. */ - if (svm_cache && svm_cache->registered) { - cache_sync(hd, &svm_cache); - (void) mutex_unlock(&svm_cache_lock); - return (RCM_SUCCESS); - } - - /* If not, register the whole cache and mark it as registered. */ - while ((devicename = cache_walk(svm_cache, &i, &l)) != NULL) { - svm_register_device(hd, devicename); - } - svm_cache->registered = 1; - - /* Unlock the cache */ - (void) mutex_unlock(&svm_cache_lock); - - return (RCM_SUCCESS); -} - -/* - * svm_unregister() - * - * Manually walk through the cache, unregistering all the special files and - * mount points. - * - * Input: - * rcm_handle_t *hd - * Return: - * RCM_SUCCESS - * Locking: the cache is locked throughout the execution of this routine - * because it reads and modifies cache links continuously. - */ -static int -svm_unregister(rcm_handle_t *hd) -{ - deventry_t *l = NULL; - uint32_t i = 0; - - rcm_log_message(RCM_TRACE1, "SVM: unregister\n"); - /* Guard against bad arguments */ - assert(hd != NULL); - - /* Walk the cache, unregistering everything */ - (void) mutex_lock(&svm_cache_lock); - if (svm_cache != NULL) { - while (cache_walk(svm_cache, &i, &l) != NULL) { - (void) svm_unregister_device(hd, l); - } - svm_cache->registered = 0; - } - (void) mutex_unlock(&svm_cache_lock); - return (RCM_SUCCESS); -} - -/* - * svm_offline() - * - * Determine dependents of the resource being offlined, and offline - * them all. - * - * Input: - * rcm_handle_t *hd handle - * char* *rsrc resource name - * id_t id 0 - * char **errorp ptr to error message - * rcm_info_t **infop ptr to info string - * Output: - * char **errorp pass back error message - * Return: - * int RCM_SUCCESS or RCM_FAILURE - * Locking: the cache is locked for most of this routine, except while - * processing dependents. - */ -/*ARGSUSED*/ -static int -svm_offline(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags, - char **errorp, rcm_info_t **infop) -{ - int rv = RCM_SUCCESS; - int ret; - char **dependents; - deventry_t *deventry; - hspentry_t *hspentry; - hspuser_t *hspuser; - - /* Guard against bad arguments */ - assert(hd != NULL); - assert(rsrc != NULL); - assert(id == (id_t)0); - assert(errorp != NULL); - - /* Trace */ - rcm_log_message(RCM_TRACE1, "SVM: offline(%s), flags(%d)\n", - rsrc, flags); - - /* Lock the cache */ - (void) mutex_lock(&svm_cache_lock); - - /* Lookup the resource in the cache. */ - if ((deventry = cache_lookup(svm_cache, rsrc)) == NULL) { - rcm_log_message(RCM_ERROR, MSG_UNRECOGNIZED); - *errorp = strdup(ERR_UNRECOGNIZED); - (void) mutex_unlock(&svm_cache_lock); - rv = RCM_FAILURE; - rcm_log_message(RCM_TRACE1, "SVM: svm_offline(%s) exit %d\n", - rsrc, rv); - return (rv); - } - /* If it is a TRANS device, do not allow the offline */ - if (deventry->devtype == SVM_TRANS) { - rv = RCM_FAILURE; - (void) mutex_unlock(&svm_cache_lock); - goto exit; - } - - if (deventry->flags&IN_HSP) { - /* - * If this is in a hot spare pool, check to see - * if any of the hot spare pool users are open - */ - hspentry = deventry->hsp_list; - while (hspentry) { - hspuser = hspentry->hspuser; - while (hspuser) { - /* Check if open */ - if (is_open(hspuser->hspuserkey)) { - rv = RCM_FAILURE; - (void) mutex_unlock(&svm_cache_lock); - goto exit; - } - hspuser = hspuser->next; - } - hspentry = hspentry->next; - } - } - - /* Fail if the device contains a metadb replica */ - if (deventry->flags&CONT_METADB) { - /* - * The user should delete the replica before continuing, - * so force the error. - */ - rcm_log_message(RCM_TRACE1, "SVM: %s has a replica\n", - deventry->devname); - rv = RCM_FAILURE; - (void) mutex_unlock(&svm_cache_lock); - goto exit; - } - - /* Get dependents */ - if (get_dependents(deventry, &dependents) != 0) { - rcm_log_message(RCM_ERROR, MSG_NODEPS); - rv = RCM_FAILURE; - (void) mutex_unlock(&svm_cache_lock); - goto exit; - } - - if (dependents) { - /* Check if the device is broken (needs maintanence). */ - if (check_device(deventry) == REDUNDANT) { - /* - * The device is broken, the offline request should - * succeed, so ignore any of the dependents. - */ - rcm_log_message(RCM_TRACE1, - "SVM: ignoring dependents\n"); - (void) mutex_unlock(&svm_cache_lock); - free(dependents); - goto exit; - } - (void) mutex_unlock(&svm_cache_lock); - ret = rcm_request_offline_list(hd, dependents, flags, infop); - if (ret != RCM_SUCCESS) { - rv = ret; - } - free(dependents); - } else { - /* If no dependents, check if the metadevice is open */ - if ((deventry->devkey) && (is_open(deventry->devkey))) { - rv = RCM_FAILURE; - (void) mutex_unlock(&svm_cache_lock); - goto exit; - } - (void) mutex_unlock(&svm_cache_lock); - } -exit: - rcm_log_message(RCM_TRACE1, "SVM: svm_offline(%s) exit %d\n", rsrc, rv); - if (rv != RCM_SUCCESS) - *errorp = strdup(gettext("unable to offline")); - return (rv); -} - -/* - * svm_online() - * - * Just pass the online notification on to the dependents of this resource - * - * Input: - * rcm_handle_t *hd handle - * char* *rsrc resource name - * id_t id 0 - * char **errorp ptr to error message - * rcm_info_t **infop ptr to info string - * Output: - * char **errorp pass back error message - * Return: - * int RCM_SUCCESS or RCM_FAILURE - * Locking: the cache is locked for most of this routine, except while - * processing dependents. - */ -/*ARGSUSED*/ -static int -svm_online(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags, char **errorp, - rcm_info_t **infop) -{ - int rv = RCM_SUCCESS; - char **dependents; - deventry_t *deventry; - - /* Guard against bad arguments */ - assert(hd != NULL); - assert(rsrc != NULL); - assert(id == (id_t)0); - - /* Trace */ - rcm_log_message(RCM_TRACE1, "SVM: online(%s)\n", rsrc); - - /* Lookup this resource in the cache (cache gets locked) */ - (void) mutex_lock(&svm_cache_lock); - deventry = cache_lookup(svm_cache, rsrc); - if (deventry == NULL) { - (void) mutex_unlock(&svm_cache_lock); - rcm_log_message(RCM_ERROR, MSG_UNRECOGNIZED, rsrc); - *errorp = strdup(ERR_UNRECOGNIZED); - return (RCM_FAILURE); - } - - /* Get dependents */ - if (get_dependents(deventry, &dependents) != 0) { - (void) mutex_unlock(&svm_cache_lock); - rcm_log_message(RCM_ERROR, MSG_NODEPS); - *errorp = strdup(ERR_NODEPS); - return (RCM_FAILURE); - } - (void) mutex_unlock(&svm_cache_lock); - - if (dependents) { - rv = rcm_notify_online_list(hd, dependents, flags, infop); - if (rv != RCM_SUCCESS) - *errorp = strdup(gettext("unable to online")); - free(dependents); - } - - return (rv); -} - -/* - * svm_get_info() - * - * Gather usage information for this resource. - * - * Input: - * rcm_handle_t *hd handle - * char* *rsrc resource name - * id_t id 0 - * char **errorp ptr to error message - * nvlist_t *props Not used - * rcm_info_t **infop ptr to info string - * Output: - * char **infop pass back info string - * Return: - * int RCM_SUCCESS or RCM_FAILURE - * Locking: the cache is locked throughout the whole function - */ -/*ARGSUSED*/ -static int -svm_get_info(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags, char **usagep, - char **errorp, nvlist_t *props, rcm_info_t **infop) -{ - int rv = RCM_SUCCESS; - deventry_t *deventry; - deventry_t *dependent; - hspentry_t *hspentry; - char **dependents; - - /* Guard against bad arguments */ - assert(hd != NULL); - assert(rsrc != NULL); - assert(id == (id_t)0); - assert(usagep != NULL); - assert(errorp != NULL); - - /* Trace */ - rcm_log_message(RCM_TRACE1, "SVM: get_info(%s)\n", rsrc); - - /* Lookup this resource in the cache (cache gets locked) */ - (void) mutex_lock(&svm_cache_lock); - deventry = cache_lookup(svm_cache, rsrc); - if (deventry == NULL) { - (void) mutex_unlock(&svm_cache_lock); - rcm_log_message(RCM_ERROR, MSG_UNRECOGNIZED, rsrc); - *errorp = strdup(ERR_UNRECOGNIZED); - return (RCM_FAILURE); - } - - *usagep = NULL; /* Initialise usage string */ - if (deventry->flags&CONT_METADB) { - *usagep = add_to_usage(usagep, gettext("contains metadb(s)")); - } - if (deventry->flags&CONT_SOFTPART) { - *usagep = add_to_usage(usagep, - gettext("contains soft partition(s)")); - } - if (deventry->devtype == SVM_SOFTPART) { - *usagep = add_to_usage_fmt(usagep, - gettext("soft partition based on \"%s\""), - deventry->antecedent->devname); - } - - if (deventry->flags&IN_HSP) { - int hspflag = 0; - hspentry = deventry->hsp_list; - while (hspentry) { - if (hspflag == 0) { - *usagep = add_to_usage(usagep, - gettext("member of hot spare pool")); - hspflag = 1; - } - *usagep = add_to_usage_fmt(usagep, "\"%s\"", - hspentry->hspname); - hspentry = hspentry->next; - } - } else { - dependent = deventry->dependent; - while (dependent) { - /* Resource has dependents */ - switch (dependent->devtype) { - case SVM_STRIPE: - *usagep = add_to_usage_fmt(usagep, - gettext("component of stripe \"%s\""), - dependent->devname); - break; - case SVM_CONCAT: - *usagep = add_to_usage_fmt(usagep, - gettext("component of concat \"%s\""), - dependent->devname); - break; - case SVM_MIRROR: - *usagep = add_to_usage_fmt(usagep, - gettext("submirror of \"%s\""), - dependent->devname); - break; - case SVM_RAID: - *usagep = add_to_usage_fmt(usagep, - gettext("component of RAID \"%s\""), - dependent->devname); - break; - case SVM_TRANS: - if (deventry->flags&TRANS_LOG) { - *usagep = add_to_usage_fmt(usagep, - gettext("trans log for \"%s\""), - dependent->devname); - } else { - *usagep = add_to_usage_fmt(usagep, - gettext("trans master for \"%s\""), - dependent->devname); - } - break; - case SVM_SOFTPART: - /* Contains soft parts, already processed */ - break; - default: - rcm_log_message(RCM_ERROR, - gettext("Unknown type %d\n"), - dependent->devtype); - } - dependent = dependent->next_dep; - } - } - - /* Get dependents and recurse if necessary */ - if (get_dependents(deventry, &dependents) != 0) { - (void) mutex_unlock(&svm_cache_lock); - rcm_log_message(RCM_ERROR, MSG_NODEPS); - *errorp = strdup(ERR_NODEPS); - return (RCM_FAILURE); - } - (void) mutex_unlock(&svm_cache_lock); - - if ((flags & RCM_INCLUDE_DEPENDENT) && (dependents != NULL)) { - rv = rcm_get_info_list(hd, dependents, flags, infop); - if (rv != RCM_SUCCESS) - *errorp = strdup(gettext("unable to get info")); - } - free(dependents); - - if (*usagep != NULL) - rcm_log_message(RCM_TRACE1, "SVM: usage = %s\n", *usagep); - return (rv); -} - -/* - * svm_suspend() - * - * Notify all dependents that the resource is being suspended. - * Since no real operation is involved, QUERY or not doesn't matter. - * - * Input: - * rcm_handle_t *hd handle - * char* *rsrc resource name - * id_t id 0 - * char **errorp ptr to error message - * rcm_info_t **infop ptr to info string - * Output: - * char **errorp pass back error message - * Return: - * int RCM_SUCCESS or RCM_FAILURE - * Locking: the cache is locked for most of this routine, except while - * processing dependents. - */ -static int -svm_suspend(rcm_handle_t *hd, char *rsrc, id_t id, timespec_t *interval, - uint_t flags, char **errorp, rcm_info_t **infop) -{ - int rv = RCM_SUCCESS; - deventry_t *deventry; - char **dependents; - - /* Guard against bad arguments */ - assert(hd != NULL); - assert(rsrc != NULL); - assert(id == (id_t)0); - assert(interval != NULL); - assert(errorp != NULL); - - /* Trace */ - rcm_log_message(RCM_TRACE1, "SVM: suspend(%s)\n", rsrc); - - /* Lock the cache and extract information about this resource. */ - (void) mutex_lock(&svm_cache_lock); - if ((deventry = cache_lookup(svm_cache, rsrc)) == NULL) { - (void) mutex_unlock(&svm_cache_lock); - rcm_log_message(RCM_ERROR, MSG_UNRECOGNIZED, rsrc); - *errorp = strdup(ERR_UNRECOGNIZED); - return (RCM_SUCCESS); - } - - /* Get dependents */ - if (get_dependents(deventry, &dependents) != 0) { - (void) mutex_unlock(&svm_cache_lock); - rcm_log_message(RCM_ERROR, MSG_NODEPS); - *errorp = strdup(ERR_NODEPS); - return (RCM_FAILURE); - } - (void) mutex_unlock(&svm_cache_lock); - - if (dependents) { - rv = rcm_request_suspend_list(hd, dependents, flags, - interval, infop); - if (rv != RCM_SUCCESS) - *errorp = strdup(gettext("unable to suspend")); - free(dependents); - } - - return (rv); -} - -/* - * svm_resume() - * - * Notify all dependents that the resource is being resumed. - * - * Input: - * rcm_handle_t *hd handle - * char* *rsrc resource name - * id_t id 0 - * char **errorp ptr to error message - * rcm_info_t **infop ptr to info string - * Output: - * char **errorp pass back error message - * Return: - * int RCM_SUCCESS or RCM_FAILURE - * Locking: the cache is locked for most of this routine, except while - * processing dependents. - * - */ -static int -svm_resume(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags, char **errorp, - rcm_info_t **infop) -{ - int rv = RCM_SUCCESS; - deventry_t *deventry; - char **dependents; - - /* Guard against bad arguments */ - assert(hd != NULL); - assert(rsrc != NULL); - assert(id == (id_t)0); - assert(errorp != NULL); - - /* Trace */ - rcm_log_message(RCM_TRACE1, "SVM: resume(%s)\n", rsrc); - - /* - * Lock the cache just long enough to extract information about this - * resource. - */ - (void) mutex_lock(&svm_cache_lock); - if ((deventry = cache_lookup(svm_cache, rsrc)) == NULL) { - (void) mutex_unlock(&svm_cache_lock); - rcm_log_message(RCM_ERROR, MSG_UNRECOGNIZED, rsrc); - *errorp = strdup(ERR_UNRECOGNIZED); - return (RCM_SUCCESS); - } - - /* Get dependents */ - - if (get_dependents(deventry, &dependents) != 0) { - (void) mutex_unlock(&svm_cache_lock); - rcm_log_message(RCM_ERROR, MSG_NODEPS); - *errorp = strdup(ERR_NODEPS); - return (RCM_FAILURE); - } - - (void) mutex_unlock(&svm_cache_lock); - if (dependents) { - rv = rcm_notify_resume_list(hd, dependents, flags, infop); - if (rv != RCM_SUCCESS) - *errorp = strdup(gettext("unable to resume")); - free(dependents); - } - - return (rv); -} - - -/* - * svm_remove() - * - * Remove the resource from the cache and notify all dependents that - * the resource has been removed. - * - * Input: - * rcm_handle_t *hd handle - * char* *rsrc resource name - * id_t id 0 - * char **errorp ptr to error message - * rcm_info_t **infop ptr to info string - * Output: - * char **errorp pass back error message - * Return: - * int RCM_SUCCESS or RCM_FAILURE - * Locking: the cache is locked for most of this routine, except while - * processing dependents. - */ -static int -svm_remove(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags, char **errorp, - rcm_info_t **infop) -{ - int rv = RCM_SUCCESS; - char **dependents; - deventry_t *deventry; - - /* Guard against bad arguments */ - assert(hd != NULL); - assert(rsrc != NULL); - assert(id == (id_t)0); - - /* Trace */ - rcm_log_message(RCM_TRACE1, "SVM: svm_remove(%s)\n", rsrc); - - /* Lock the cache while removing resource */ - (void) mutex_lock(&svm_cache_lock); - if ((deventry = cache_lookup(svm_cache, rsrc)) == NULL) { - (void) mutex_unlock(&svm_cache_lock); - return (RCM_SUCCESS); - } - - /* Get dependents */ - if (get_dependents(deventry, &dependents) != 0) { - (void) mutex_unlock(&svm_cache_lock); - rcm_log_message(RCM_ERROR, MSG_NODEPS); - deventry->flags |= REMOVED; - *errorp = strdup(ERR_NODEPS); - return (RCM_FAILURE); - } - - if (dependents) { - (void) mutex_unlock(&svm_cache_lock); - rv = rcm_notify_remove_list(hd, dependents, flags, infop); - (void) mutex_lock(&svm_cache_lock); - if (rv != RCM_SUCCESS) - *errorp = strdup(gettext("unable to remove")); - free(dependents); - } - - /* Mark entry as removed */ - deventry->flags |= REMOVED; - - (void) mutex_unlock(&svm_cache_lock); - rcm_log_message(RCM_TRACE1, "SVM: exit svm_remove(%s)\n", rsrc); - /* Clean up and return success */ - return (RCM_SUCCESS); -} - -/* - * Definitions of private functions - * - */ - -/* - * find_hsp() - * - * Find the hot spare entry from the linked list of all hotspare pools - * - * Input: - * char *hspname name of hot spare pool - * Return: - * hspentry_t hot spare entry - */ -static hspentry_t * -find_hsp(char *hspname) -{ - hspentry_t *hspentry = hsp_head; - - while (hspentry) { - if (strcmp(hspname, hspentry->hspname) == 0) - return (hspentry); - hspentry = hspentry->link; - } - return (NULL); -} - -/* - * add_hsp_user() - * - * Add a hot spare pool user to the list for the hsp specfied by - * hspname. The memory allocated here will be freed by free_cache() - * - * Input: - * char *hspname hot spare pool name - * deventry_t *deventry specified hsp user - * Return: - * hspuser_t entry in hsp user list - */ -static hspuser_t * -add_hsp_user(char *hspname, deventry_t *deventry) -{ - hspuser_t *newhspuser; - char *newhspusername; - hspuser_t *previous; - hspentry_t *hspentry; - - hspentry = find_hsp(hspname); - if (hspentry == NULL) - return (NULL); - rcm_log_message(RCM_TRACE1, "SVM: Enter add_hsp_user %s, %x, %x\n", - hspname, hspentry, hspentry->hspuser); - - newhspuser = (hspuser_t *)malloc(sizeof (*newhspuser)); - if (newhspuser == NULL) { - rcm_log_message(RCM_ERROR, - gettext("SVM: can't malloc hspuser")); - return (NULL); - } - (void) memset((char *)newhspuser, 0, sizeof (*newhspuser)); - - newhspusername = strdup(deventry->devname); - if (newhspusername == NULL) { - rcm_log_message(RCM_ERROR, - gettext("SVM: can't malloc hspusername")); - free(newhspuser); - return (NULL); - } - newhspuser->hspusername = newhspusername; - newhspuser->hspuserkey = deventry->devkey; - - if ((previous = hspentry->hspuser) == NULL) { - hspentry->hspuser = newhspuser; - } else { - hspuser_t *temp = previous->next; - previous->next = newhspuser; - newhspuser->next = temp; - } - rcm_log_message(RCM_TRACE1, "SVM: Added hsp_user %s (dev %x) to %s\n", - newhspusername, newhspuser->hspuserkey, hspname); - return (newhspuser); -} - -/* - * add_hsp() - * - * Add a hot spare pool entry to the list for the slice, deventry. - * Also add to the linked list of all hsp pools - * The memory alllocated here will be freed by free_cache() - * - * Input: - * char *hspname name of hsp pool entry - * deventry_t *deventry device entry for the slice - * Return: - * hspentry_t end of hsp list - * Locking: None - */ -static hspentry_t * -add_hsp(char *hspname, deventry_t *deventry) -{ - hspentry_t *newhspentry; - hspentry_t *previous; - char *newhspname; - - rcm_log_message(RCM_TRACE1, "SVM: Enter add_hsp %s\n", - hspname); - newhspentry = (hspentry_t *)malloc(sizeof (*newhspentry)); - if (newhspentry == NULL) { - rcm_log_message(RCM_ERROR, - gettext("SVM: can't malloc hspentry")); - return (NULL); - } - (void) memset((char *)newhspentry, 0, sizeof (*newhspentry)); - - newhspname = strdup(hspname); - if (newhspname == NULL) { - rcm_log_message(RCM_ERROR, - gettext("SVM: can't malloc hspname")); - free(newhspentry); - return (NULL); - } - newhspentry->hspname = newhspname; - - /* Add to linked list of all hotspare pools */ - newhspentry->link = hsp_head; - hsp_head = newhspentry; - - /* Add to list of hotspare pools containing this slice */ - if ((previous = deventry->hsp_list) == NULL) { - deventry->hsp_list = newhspentry; - } else { - hspentry_t *temp = previous->next; - previous->next = newhspentry; - newhspentry->next = temp; - } - rcm_log_message(RCM_TRACE1, "SVM: Exit add_hsp %s\n", - hspname); - return (newhspentry); -} - -/* - * cache_dependent() - * - * Add a dependent for a deventry to the cache and return the cache entry - * If the name is not in the cache, we assume that it a SLICE. If it - * turns out to be any other type of metadevice, when it is processed - * in cache_all_devices_in_set(), cache_device() will be called to - * set the type to the actual value. - * - * Input: - * cache_t *cache cache - * char *devname metadevice name - * int devflags metadevice flags - * deventry_t *dependent dependent of this metadevice - * Return: - * deventry_t metadevice entry added to cache - * Locking: None - */ -static deventry_t * -cache_dependent(cache_t *cache, char *devname, int devflags, - deventry_t *dependent) -{ - - deventry_t *newdeventry = NULL; - deventry_t *hashprev = NULL; - deventry_t *deventry = NULL; - deventry_t *previous = NULL; - uint32_t hash_index; - int comp; - - rcm_log_message(RCM_TRACE1, "SVM: Enter cache_dep %s, %x, %s\n", - devname, devflags, dependent->devname); - - hash_index = hash(cache->size, devname); - if (hash_index >= cache->size) { - rcm_log_message(RCM_ERROR, - gettext("SVM: can't hash device.")); - return (NULL); - } - - deventry = cache->hashline[hash_index]; - - /* if the hash table slot is empty, then this is easy */ - if (deventry == NULL) { - deventry = create_deventry(devname, SVM_SLICE, 0, devflags); - cache->hashline[hash_index] = deventry; - } else { - /* if the hash table slot isn't empty, find the immediate successor */ - hashprev = NULL; - while ((comp = strcmp(deventry->devname, devname)) < 0 && - deventry->next != NULL) { - hashprev = deventry; - deventry = deventry->next; - } - - if (comp == 0) { - /* if already in cache, just update the flags */ - deventry->flags |= devflags; - } else { - /* insert the entry if it's not already there */ - if ((newdeventry = create_deventry(devname, - SVM_SLICE, 0, devflags)) == NULL) { - rcm_log_message(RCM_ERROR, - gettext("SVM: can't create hash line.")); - return (NULL); - } - if (comp > 0) { - newdeventry->next = deventry; - if (hashprev) - hashprev->next = newdeventry; - else - cache->hashline[hash_index] = - newdeventry; - } else if (comp < 0) { - newdeventry->next = deventry->next; - deventry->next = newdeventry; - } - deventry = newdeventry; - } - } - /* complete deventry by linking the dependent to it */ - dependent->antecedent = deventry; - if ((previous = deventry->dependent) != NULL) { - deventry_t *temp = previous->next_dep; - previous->next_dep = dependent; - dependent->next_dep = temp; - } else deventry->dependent = dependent; - return (deventry); - -} - -/* - * cache_device() - * - * Add an entry to the cache for devname - * - * Input: - * cache_t *cache cache - * char *devname metadevice named - * svm_type_t devtype metadevice type - * md_dev64_t devkey dev_t of device - * int devflags device flags - * Return: - * deventry_t metadevice added to cache - * Locking: None - */ -static deventry_t * -cache_device(cache_t *cache, char *devname, svm_type_t devtype, - md_dev64_t devkey, int devflags) -{ - deventry_t *newdeventry = NULL; - deventry_t *previous = NULL; - deventry_t *deventry = NULL; - uint32_t hash_index; - int comp; - - rcm_log_message(RCM_TRACE1, "SVM: Enter cache_device %s, %x, %lx, %x\n", - devname, devtype, devkey, devflags); - - hash_index = hash(cache->size, devname); - if (hash_index >= cache->size) { - rcm_log_message(RCM_ERROR, - gettext("SVM: can't hash device.")); - return (NULL); - } - - deventry = cache->hashline[hash_index]; - - /* if the hash table slot is empty, then this is easy */ - if (deventry == NULL) { - deventry = create_deventry(devname, devtype, devkey, - devflags); - cache->hashline[hash_index] = deventry; - } else { - /* if the hash table slot isn't empty, find the immediate successor */ - previous = NULL; - while ((comp = strcmp(deventry->devname, devname)) < 0 && - deventry->next != NULL) { - previous = deventry; - deventry = deventry->next; - } - - if (comp == 0) { - /* - * If entry already exists, just set the type, key - * and flags - */ - deventry->devtype = devtype; - deventry->devkey = meta_cmpldev(devkey); - deventry->flags |= devflags; - } else { - /* insert the entry if it's not already there */ - if ((newdeventry = create_deventry(devname, devtype, - devkey, devflags)) == NULL) { - rcm_log_message(RCM_ERROR, - gettext("SVM: can't create hash line.")); - } - if (comp > 0) { - newdeventry->next = deventry; - if (previous) - previous->next = newdeventry; - else - cache->hashline[hash_index] = - newdeventry; - } else if (comp < 0) { - newdeventry->next = deventry->next; - deventry->next = newdeventry; - } - deventry = newdeventry; - } - } - return (deventry); -} -/* - * free_names() - * - * Free all name list entries - * - * Input: - * mdnamelist_t *np namelist pointer - * Return: None - */ - -static void -free_names(mdnamelist_t *nlp) -{ - mdnamelist_t *p; - - for (p = nlp; p != NULL; p = p->next) { - meta_invalidate_name(p->namep); - p->namep = NULL; - } - metafreenamelist(nlp); -} - -/* - * cache_hsp() - * - * Add an entry to the cache for each slice in the hot spare - * pool. Call add_hsp() to add the hot spare pool to the list - * of all hot spare pools. - * - * Input: - * cache_t *cache cache - * mdnamelist_t *nlp pointer to hsp name - * md_hsp_t *hsp - * Return: - * 0 if successful or error code - */ -static int -cache_hsp(cache_t *cache, mdhspnamelist_t *nlp, md_hsp_t *hsp) -{ - int i; - deventry_t *deventry; - md_hs_t *hs; - - for (i = 0; i < hsp->hotspares.hotspares_len; i++) { - hs = &hsp->hotspares.hotspares_val[i]; - if ((deventry = cache_device(cache, hs->hsnamep->bname, - SVM_SLICE, hs->hsnamep->dev, - IN_HSP)) == NULL) { - return (ENOMEM); - } - if (add_hsp(nlp->hspnamep->hspname, deventry) == NULL) { - return (ENOMEM); - } - } - return (0); -} - -/* - * cache_trans() - * - * Add an entry to the cache for trans metadevice, the master - * and the log. Call cache_dependent() to link that master and - * the log to the trans metadevice. - * - * Input: - * cache_t *cache cache - * mdnamelist_t *nlp pointer to trans name - * md_trans_t *trans - * Return: - * 0 if successful or error code - * - */ -static int -cache_trans(cache_t *cache, mdnamelist_t *nlp, md_trans_t *trans) -{ - deventry_t *antecedent; - - if ((antecedent = cache_device(cache, nlp->namep->bname, SVM_TRANS, - nlp->namep->dev, 0)) == NULL) { - return (ENOMEM); - } - - if (cache_device(cache, trans->masternamep->bname, SVM_SLICE, - trans->masternamep->dev, 0) == NULL) { - return (ENOMEM); - } - - if (cache_dependent(cache, trans->masternamep->bname, 0, - antecedent) == NULL) { - return (ENOMEM); - } - - if (trans->lognamep != NULL) { - if (cache_device(cache, trans->lognamep->bname, SVM_SLICE, - trans->lognamep->dev, TRANS_LOG) == NULL) { - return (ENOMEM); - } - - if (cache_dependent(cache, trans->lognamep->bname, 0, - antecedent) == NULL) { - return (ENOMEM); - } - } - return (0); -} - -/* - * cache_mirror() - * - * Add an entry to the cache for the mirror. For each - * submirror, call cache_dependent() to add an entry to the - * cache and to link it to mirror entry. - * - * Input: - * cache_t *cache cache - * mdnamelist_t *nlp pointer to mirror name - * md_mirror_t *mirror - * Return: - * 0 if successful or error code - * - */ -static int -cache_mirror(cache_t *cache, mdnamelist_t *nlp, md_mirror_t *mirror) -{ - int i; - deventry_t *antecedent; - - if ((antecedent = cache_device(cache, nlp->namep->bname, SVM_MIRROR, - nlp->namep->dev, 0)) == NULL) { - return (ENOMEM); - } - for (i = 0; i < NMIRROR; i++) { - md_submirror_t *submirror; - - submirror = &mirror->submirrors[i]; - if (submirror->state == SMS_UNUSED) - continue; - - if (!submirror->submirnamep) - continue; - - if (cache_dependent(cache, submirror->submirnamep->bname, - 0, antecedent) == NULL) { - return (ENOMEM); - } - } - return (0); -} - -/* - * cache_raid() - * - * Add an entry to the cache for the RAID metadevice. For - * each component of the RAID call cache_dependent() to add - * add it to the cache and to link it to the RAID metadevice. - * - * Input: - * cache_t *cache cache - * mdnamelist_t *nlp pointer to raid name - * md_raid_t *raid mirror - * Return: - * 0 if successful or error code - */ -static int -cache_raid(cache_t *cache, mdnamelist_t *nlp, md_raid_t *raid) -{ - int i; - deventry_t *antecedent; - - if ((antecedent = cache_device(cache, nlp->namep->bname, SVM_RAID, - nlp->namep->dev, 0)) == NULL) { - return (ENOMEM); - } - if (raid->hspnamep) { - if (add_hsp_user(raid->hspnamep->hspname, - antecedent) == NULL) { - return (ENOMEM); - } - } - for (i = 0; i < raid->cols.cols_len; i++) { - if (cache_dependent(cache, - raid->cols.cols_val[i].colnamep->bname, 0, - antecedent) == NULL) { - return (ENOMEM); - } - } - return (0); -} - -/* - * cache_stripe() - * - * Add a CONCAT or a STRIPE entry entry to the cache for the - * metadevice and call cache_dependent() to add each - * component to the cache. - * - * Input: - * cache_t *cache cache - * mdnamelist_t *nlp pointer to stripe name - * md_stripe_t *stripe - * Return: - * 0 if successful or error code - * - */ -static int -cache_stripe(cache_t *cache, mdnamelist_t *nlp, md_stripe_t *stripe) -{ - int i; - deventry_t *antecedent; - - if ((antecedent = cache_device(cache, nlp->namep->bname, SVM_CONCAT, - nlp->namep->dev, 0)) == NULL) { - return (ENOMEM); - } - - if (stripe->hspnamep) { - if (add_hsp_user(stripe->hspnamep->hspname, - antecedent) == NULL) { - return (ENOMEM); - } - } - for (i = 0; i < stripe->rows.rows_len; i++) { - md_row_t *rowp; - int j; - - rowp = &stripe->rows.rows_val[i]; - if (stripe->rows.rows_len == 1 && rowp->comps.comps_len > 1) { - if ((void*) cache_device(cache, nlp->namep->bname, - SVM_STRIPE, nlp->namep->dev, 0) == NULL) - return (ENOMEM); - } - for (j = 0; j < rowp->comps.comps_len; j++) { - md_comp_t *component; - - component = &rowp->comps.comps_val[j]; - if (cache_dependent(cache, - component->compnamep->bname, 0, - antecedent) == NULL) { - return (ENOMEM); - } - } - } - return (0); -} - -/* - * cache_sp() - * - * Add an entry to the cache for the softpart and also call - * cache_dependent() to set the CONT_SOFTPART flag in the - * cache entry for the metadevice that contains the softpart. - * - * Input: - * cache_t *cache cache - * mdnamelist_t *nlp pointer to soft part name - * md_sp_t *soft_part - * Return: - * 0 if successful or error code - * - */ -static int -cache_sp(cache_t *cache, mdnamelist_t *nlp, md_sp_t *soft_part) -{ - deventry_t *antecedent; - - if ((antecedent = cache_device(cache, nlp->namep->bname, - SVM_SOFTPART, nlp->namep->dev, 0)) == NULL) { - return (ENOMEM); - } - if (cache_dependent(cache, soft_part->compnamep->bname, - CONT_SOFTPART, antecedent) == NULL) { - return (ENOMEM); - } - return (0); -} - -/* - * cache_all_devices_in_set() - * - * Add all of the metadevices and mddb replicas in the set to the - * cache - * - * Input: - * cache_t *cache cache - * mdsetname_t *sp setname - * Return: - * 0 if successful or error code - */ - -static int -cache_all_devices_in_set(cache_t *cache, mdsetname_t *sp) -{ - md_error_t error = mdnullerror; - md_replicalist_t *replica_list = NULL; - md_replicalist_t *mdbp; - mdnamelist_t *nlp; - mdnamelist_t *trans_list = NULL; - mdnamelist_t *mirror_list = NULL; - mdnamelist_t *raid_list = NULL; - mdnamelist_t *stripe_list = NULL; - mdnamelist_t *sp_list = NULL; - mdhspnamelist_t *hsp_list = NULL; - - rcm_log_message(RCM_TRACE1, "SVM: cache_all_devices_in_set\n"); - - /* Add each mddb replica to the cache */ - if (metareplicalist(sp, MD_BASICNAME_OK, &replica_list, &error) < 0) { - /* there are no metadb's; that is ok, no need to check the rest */ - mdclrerror(&error); - return (0); - } - - for (mdbp = replica_list; mdbp != NULL; mdbp = mdbp->rl_next) { - if (cache_device(cache, mdbp->rl_repp->r_namep->bname, - SVM_SLICE, mdbp->rl_repp->r_namep->dev, - CONT_METADB) == NULL) { - metafreereplicalist(replica_list); - return (ENOMEM); - } - } - metafreereplicalist(replica_list); - - /* Process Hot Spare pools */ - if (meta_get_hsp_names(sp, &hsp_list, 0, &error) >= 0) { - mdhspnamelist_t *nlp; - - for (nlp = hsp_list; nlp != NULL; nlp = nlp->next) { - md_hsp_t *hsp; - - hsp = meta_get_hsp(sp, nlp->hspnamep, &error); - if (hsp != NULL) { - if (cache_hsp(cache, nlp, hsp) != 0) { - metafreehspnamelist(hsp_list); - return (ENOMEM); - } - } - meta_invalidate_hsp(nlp->hspnamep); - } - metafreehspnamelist(hsp_list); - } - - /* Process Trans devices */ - if (meta_get_trans_names(sp, &trans_list, 0, &error) >= 0) { - for (nlp = trans_list; nlp != NULL; nlp = nlp->next) { - mdname_t *mdn; - md_trans_t *trans; - - mdn = metaname(&sp, nlp->namep->cname, META_DEVICE, - &error); - if (mdn == NULL) { - continue; - } - - trans = meta_get_trans(sp, mdn, &error); - - if (trans != NULL && trans->masternamep != NULL) { - if (cache_trans(cache, nlp, trans) != NULL) { - free_names(trans_list); - return (ENOMEM); - } - } - } - free_names(trans_list); - } - - /* Process Mirrors */ - if (meta_get_mirror_names(sp, &mirror_list, 0, &error) >= 0) { - for (nlp = mirror_list; nlp != NULL; nlp = nlp->next) { - mdname_t *mdn; - md_mirror_t *mirror; - - mdn = metaname(&sp, nlp->namep->cname, META_DEVICE, - &error); - if (mdn == NULL) { - continue; - } - - mirror = meta_get_mirror(sp, mdn, &error); - - if (mirror != NULL) { - if (cache_mirror(cache, nlp, mirror) != 0) { - free_names(mirror_list); - return (ENOMEM); - } - } - } - free_names(mirror_list); - } - - /* Process Raid devices */ - if (meta_get_raid_names(sp, &raid_list, 0, &error) >= 0) { - for (nlp = raid_list; nlp != NULL; nlp = nlp->next) { - mdname_t *mdn; - md_raid_t *raid; - - mdn = metaname(&sp, nlp->namep->cname, META_DEVICE, - &error); - if (mdn == NULL) { - continue; - } - - raid = meta_get_raid(sp, mdn, &error); - - if (raid != NULL) { - if (cache_raid(cache, nlp, raid) != 0) { - free_names(raid_list); - return (ENOMEM); - } - } - } - free_names(raid_list); - } - - /* Process Slices */ - if (meta_get_stripe_names(sp, &stripe_list, 0, &error) >= 0) { - for (nlp = stripe_list; nlp != NULL; nlp = nlp->next) { - mdname_t *mdn; - md_stripe_t *stripe; - - mdn = metaname(&sp, nlp->namep->cname, META_DEVICE, - &error); - if (mdn == NULL) { - continue; - } - - stripe = meta_get_stripe(sp, mdn, &error); - - if (stripe != NULL) { - if (cache_stripe(cache, nlp, stripe) != 0) { - free_names(stripe_list); - return (ENOMEM); - } - } - } - free_names(stripe_list); - } - - /* Process Soft partitions */ - if (meta_get_sp_names(sp, &sp_list, 0, &error) >= 0) { - for (nlp = sp_list; nlp != NULL; nlp = nlp->next) { - mdname_t *mdn; - md_sp_t *soft_part; - - mdn = metaname(&sp, nlp->namep->cname, META_DEVICE, - &error); - if (mdn == NULL) { - continue; - } - - soft_part = meta_get_sp(sp, mdn, &error); - - if (soft_part != NULL) { - if (cache_sp(cache, nlp, soft_part) != 0) { - free_names(sp_list); - return (ENOMEM); - } - } - } - free_names(sp_list); - } - mdclrerror(&error); - return (0); -} - -/* - * create_all_devices() - * - * Cache all devices in all sets - * - * Input: - * cache_t cache - * Return: - * 0 if successful, error code if not - * Locking: None - */ -static int -cache_all_devices(cache_t *cache) -{ - int max_sets; - md_error_t error = mdnullerror; - int i; - - if ((max_sets = get_max_sets(&error)) == 0) { - return (0); - } - if (!mdisok(&error)) { - mdclrerror(&error); - return (0); - } - - rcm_log_message(RCM_TRACE1, - "SVM: cache_all_devices,max sets = %d\n", max_sets); - /* for each possible set number, see if we really have a diskset */ - for (i = 0; i < max_sets; i++) { - mdsetname_t *sp; - - if ((sp = metasetnosetname(i, &error)) == NULL) { - rcm_log_message(RCM_TRACE1, - "SVM: cache_all_devices no set: setno %d\n", i); - if (!mdisok(&error) && - ((error.info.errclass == MDEC_RPC) || - (mdiserror(&error, MDE_SMF_NO_SERVICE)))) { - /* - * metad rpc program not available - * - no metasets. metad rpc not available - * is indicated either by an RPC error or - * the fact that the service is not - * enabled. - */ - break; - } - - continue; - } - - if (cache_all_devices_in_set(cache, sp)) { - metaflushsetname(sp); - return (ENOMEM); - } - metaflushsetname(sp); - } - mdclrerror(&error); - rcm_log_message(RCM_TRACE1, "SVM: exit cache_all_devices\n"); - return (0); -} - -/* - * create_cache() - * - * Create an empty cache - * If the function fails free_cache() will be called to free any - * allocated memory. - * - * Input: None - * Return: - * cache_t cache created - * Locking: None - */ -static cache_t * -create_cache() -{ - cache_t *cache; - uint32_t size; - int ret; - - size = HASH_DEFAULT; - /* try allocating storage for a new, empty cache */ - if ((cache = (cache_t *)malloc(sizeof (cache_t))) == NULL) { - rcm_log_message(RCM_ERROR, MSG_CACHEFAIL); - return (NULL); - } - - (void) memset((char *)cache, 0, sizeof (*cache)); - cache->hashline = (deventry_t **)calloc(size, sizeof (deventry_t *)); - if (cache->hashline == NULL) { - rcm_log_message(RCM_ERROR, MSG_CACHEFAIL); - free(cache); - return (NULL); - } - cache->size = size; - - /* Initialise linked list of hsp entries */ - hsp_head = NULL; - - /* add entries to cache */ - ret = cache_all_devices(cache); - if (ret != 0) { - free_cache(&cache); - return (NULL); - } - - /* Mark the cache as new */ - cache->registered = 0; - - /* Finished - return the new cache */ - return (cache); -} - -/* - * create_deventry() - * - * Create a new deventry entry for device with name devname - * The memory alllocated here will be freed by free_cache() - * - * Input: - * char *devname device name - * svm_type_t devtype metadevice type - * md_dev64_t devkey device key - * int devflags device flags - * Return: - * deventry_t New deventry - * Locking: None - */ -static deventry_t * -create_deventry(char *devname, svm_type_t devtype, md_dev64_t devkey, - int devflags) -{ - const char *devprefix = "/dev/"; - deventry_t *newdeventry = NULL; - char *newdevname = NULL; - char *devicesname = NULL; - - newdeventry = (deventry_t *)malloc(sizeof (*newdeventry)); - if (newdeventry == NULL) { - rcm_log_message(RCM_ERROR, - gettext("SVM: can't malloc deventrys")); - goto errout; - } - (void) memset((char *)newdeventry, 0, sizeof (*newdeventry)); - - newdevname = strdup(devname); - if (newdevname == NULL) { - rcm_log_message(RCM_ERROR, - gettext("SVM: can't malloc devname")); - goto errout; - } - - /* - * When we register interest in a name starting with /dev/, RCM - * will use realpath to convert the name to a /devices name before - * storing it. metaclear removes both the /dev and the /devices - * form of the name of a metadevice from the file system. Thus, - * when we later call rcm_unregister_interest to get rid of a - * metacleared device, RCM will not be able to derive the /devices - * name for the /dev name. Thus, to unregister we will need to use - * the /devices name. We will save it now, so that we have it when - * it comes time to unregister. - */ - if (strncmp(devname, devprefix, strlen(devprefix)) == 0) { - devicesname = (char *)malloc(PATH_MAX); - if (devicesname == NULL) { - rcm_log_message(RCM_ERROR, - gettext("SVM: can't malloc PATH_MAX bytes")); - goto errout; - } - if (realpath(devname, devicesname) == NULL) { - free(devicesname); - devicesname = NULL; - } - } - newdeventry->devname = newdevname; - newdeventry->devicesname = devicesname; - newdeventry->devtype = devtype; - newdeventry->devkey = meta_cmpldev(devkey); - newdeventry->flags = devflags; - if (newdeventry->devicesname == NULL) { - rcm_log_message(RCM_TRACE1, - "SVM created deventry for %s\n", newdeventry->devname); - } else { - rcm_log_message(RCM_TRACE1, - "SVM created deventry for %s (%s)\n", - newdeventry->devname, newdeventry->devicesname); - } - return (newdeventry); - -errout: - if (devicesname != NULL) - free(devicesname); - if (newdevname != NULL) - free(newdevname); - if (newdeventry != NULL) - free(newdeventry); - return (NULL); -} - -/* - * cache_remove() - * - * Given a cache and a deventry, the deventry is - * removed from the cache's tables and memory for the deventry is - * free'ed. - * - * Input: - * cache_t *cache cache - * deventry_t *deventry deventry to be removed - * Return: None - * Locking: The cache must be locked by the caller prior to calling - * this routine. - */ -static void -cache_remove(cache_t *cache, deventry_t *deventry) -{ - deventry_t *olddeventry; - deventry_t *previous; - hspentry_t *hspentry; - hspentry_t *oldhspentry; - hspuser_t *hspuser; - hspuser_t *oldhspuser; - uint32_t hash_index; - - /* sanity check */ - if (cache == NULL || deventry == NULL || deventry->devname == NULL) - return; - - - /* If this is in the hash table, remove it from there */ - hash_index = hash(cache->size, deventry->devname); - if (hash_index >= cache->size) { - rcm_log_message(RCM_ERROR, - gettext("SVM: can't hash device.")); - return; - } - olddeventry = cache->hashline[hash_index]; - previous = NULL; - while (olddeventry) { - if (olddeventry->devname && - strcmp(olddeventry->devname, deventry->devname) == 0) { - break; - } - previous = olddeventry; - olddeventry = olddeventry->next; - } - if (olddeventry) { - if (previous) - previous->next = olddeventry->next; - else - cache->hashline[hash_index] = olddeventry->next; - - if (olddeventry->flags&IN_HSP) { - /* - * If this is in a hot spare pool, remove the list - * of hot spare pools that it is in along with - * all of the volumes that are users of the pool - */ - hspentry = olddeventry->hsp_list; - while (hspentry) { - oldhspentry = hspentry; - hspuser = hspentry->hspuser; - while (hspuser) { - oldhspuser = hspuser; - free(hspuser->hspusername); - hspuser = hspuser->next; - free(oldhspuser); - } - free(hspentry->hspname); - hspentry = hspentry->next; - free(oldhspentry); - } - } - free(olddeventry->devname); - free(olddeventry); - } - -} - -/* - * cache_lookup() - * - * Return the deventry corresponding to devname from the cache - * Input: - * cache_t cache cache - * char *devname name to lookup in cache - * Return: - * deventry_t deventry of name, NULL if not found - * Locking: cache lock held on entry and on exit - */ -static deventry_t * -cache_lookup(cache_t *cache, char *devname) -{ - int comp; - uint32_t hash_index; - deventry_t *deventry; - - hash_index = hash(cache->size, devname); - if (hash_index >= cache->size) { - rcm_log_message(RCM_ERROR, - gettext("SVM: can't hash resource.")); - return (NULL); - } - - deventry = cache->hashline[hash_index]; - while (deventry) { - comp = strcmp(deventry->devname, devname); - if (comp == 0) - return (deventry); - if (comp > 0) - return (NULL); - deventry = deventry->next; - } - return (NULL); -} - -/* - * cache_sync() - * - * Resync cache with the svm database. First a new cache is created - * that represents the current state of the SVM database. The - * function walks the new cache to look for new entries that must be - * registered. The new entries are kept in a list, because we cannot - * register them at this point. Entries that appear in both caches - * are removed from the old cache. Because of this at the end of the - * walk, the old cache will only contain devices that have been - * removed and need to be unregistered. - * - * Next the old cache is walked, so that we can unregister the devices - * that are no longer present. - * - * Finally, we process the list of new devices that must be - * registered. There is a reason why we must unregister the removed - * (metacleared) devices before registering the new ones. It has to - * do with the fact that rcm_register_interest calls realpath(3C) to - * convert a /dev name to a /devices name. It uses the /devices name - * for storing the device information. - * - * It can happen that between cache_syncs that the administrator - * metaclears one metadevice and metacreates a new one. For example, - * - * metaclear acct - * metainit engr 1 1 c1t12d0s0 - * - * The metaclear operation frees up the minor number that was being - * used by acct. The metainit operation can then reuse the minor - * number. This means that both metadevices would have the same - * /devices name even though they had different /dev names. Since - * rcm_register_interest uses /devices names for storing records, we - * need to unregister acct before registering engr. Otherwise we - * would get an EALREADY errno and a failed registration. This is why - * cache_sync creates a list of devices to be registered after all the - * removed devices have been unregistered. - * - * Input: - * rcm_handle_t *hd rcm handle - * cache_t **cachep pointer to cache - * Return: - * cache_t **cachep pointer to new cache - * Return: None - * Locking: The cache must be locked prior to entry - */ -static void -cache_sync(rcm_handle_t *hd, cache_t **cachep) -{ - char *devicename; - deventry_t *deventry; - cache_t *new_cache; - cache_t *old_cache = *cachep; - deventry_t *hashline = NULL; - deventry_t **register_list = NULL; - deventry_t *register_this; - uint32_t register_count = 0; /* # entrys in register_list */ - uint32_t allocated = 0; /* # entrys allocated in */ - /* register_list */ - uint32_t allocate_incr = 16; - uint32_t i = 0; - - /* Get a new cache */ - if ((new_cache = create_cache()) == NULL) { - rcm_log_message(RCM_WARNING, MSG_NORECACHE); - return; - } - - /* For every entry in the new cache... */ - while ((devicename = cache_walk(new_cache, &i, &hashline)) != NULL) { - register_this = NULL; - - /* Look for this entry in the old cache */ - deventry = cache_lookup(old_cache, devicename); - /* - * If no entry in old cache, register the resource. If there - * is an entry, but it is marked as removed, register it - * again and remove it from the old cache - */ - if (deventry == NULL) { - register_this = hashline; - } else { - if (deventry->flags&REMOVED) - register_this = hashline; - cache_remove(old_cache, deventry); - } - - /* Save this entry if we need to register it later. */ - if (register_this) { - if (register_count >= allocated) { - /* Need to extend our array */ - allocated += allocate_incr; - register_list = - (deventry_t **)realloc(register_list, - allocated * sizeof (*register_list)); - if (register_list == NULL) { - /* Out of memory. Give up. */ - rcm_log_message(RCM_WARNING, - MSG_NORECACHE); - free(new_cache); - return; - } - } - *(register_list + register_count) = register_this; - register_count++; - } - } - - /* - * For every device left in the old cache, just unregister if - * it has not already been removed - */ - i = 0; - hashline = NULL; - while ((devicename = cache_walk(old_cache, &i, &hashline)) != NULL) { - if (!(hashline->flags&REMOVED)) { - (void) svm_unregister_device(hd, hashline); - } - } - - /* Register the new devices. */ - for (i = 0; i < register_count; i++) { - deventry = *(register_list + i); - svm_register_device(hd, deventry->devname); - } - if (register_list) - free(register_list); - - /* Swap pointers */ - *cachep = new_cache; - - /* Destroy old cache */ - free_cache(&old_cache); - - /* Mark the new cache as registered */ - new_cache-> registered = 1; -} - -/* - * cache_walk() - * - * Perform one step of a walk through the cache. The i and hashline - * parameters are updated to store progress of the walk for future steps. - * They must all be initialized for the beginning of the walk - * (i = 0, line = NULL). Initialize variables to these values for these - * parameters, and then pass in the address of each of the variables - * along with the cache. A NULL return value will be given to indicate - * when there are no more cached items to be returned. - * - * Input: - * cache_t *cache cache - * uint32_t *i hash table index of prev entry - * deventry_t **line ptr to previous device entry - * Output: - * uint32_t *i updated hash table index - * deventry_t **line ptr to device entry - * Return: - * char* device name (NULL for end of cache) - * Locking: The cache must be locked prior to calling this routine. - */ -static char * -cache_walk(cache_t *cache, uint32_t *i, deventry_t **line) -{ - uint32_t j; - - /* sanity check */ - if (cache == NULL || i == NULL || line == NULL || - *i >= cache->size) - return (NULL); - - /* if initial values were given, look for the first entry */ - if (*i == 0 && *line == NULL) { - for (j = 0; j < cache->size; j++) { - if (cache->hashline[j]) { - *i = j; - *line = cache->hashline[j]; - return ((*line)->devname); - } - } - } else { - /* otherwise, look for the next entry for this hash value */ - if (*line && (*line)->next) { - *line = (*line)->next; - return ((*line)->devname); - } else { - /* next look further down in the hash table */ - for (j = (*i) + 1; j < cache->size; j++) { - if (cache->hashline[j]) { - *i = j; - *line = cache->hashline[j]; - return ((*line)->devname); - } - } - } - } - - /* - * We would have returned somewhere above if there were any more - * entries. So set the sentinel values and return a NULL. - */ - *i = cache->size; - *line = NULL; - return (NULL); -} - -/* - * free_cache() - * - * Given a pointer to a cache structure, this routine will free all - * of the memory allocated within the cache. - * - * Input: - * cache_t **cache ptr to cache - * Return: None - * Locking: cache lock held on entry - */ -static void -free_cache(cache_t **cache) -{ - uint32_t index; - cache_t *realcache; - - /* sanity check */ - if (cache == NULL || *cache == NULL) - return; - - /* de-reference the cache pointer */ - realcache = *cache; - - /* free the hash table */ - for (index = 0; index < realcache->size; index++) { - free_deventry(&realcache->hashline[index]); - } - free(realcache->hashline); - realcache->hashline = NULL; - - free(realcache); - *cache = NULL; -} - -/* - * free_deventry() - * - * This routine frees all of the memory allocated within a node of a - * deventry. - * - * Input: - * deventry_t **deventry ptr to deventry - * Return: None - * Locking: cache lock held on entry - */ -static void -free_deventry(deventry_t **deventry) -{ - deventry_t *olddeventry; - hspentry_t *hspentry; - hspentry_t *oldhspentry; - hspuser_t *hspuser; - hspuser_t *oldhspuser; - - if (deventry != NULL) { - while (*deventry != NULL) { - olddeventry = (*deventry)->next; - if ((*deventry)->flags&IN_HSP) { - /* - * If this is in a hot spare pool, remove the - * memory allocated to hot spare pools and - * the users of the pool - */ - hspentry = (*deventry)->hsp_list; - while (hspentry) { - oldhspentry = hspentry; - hspuser = hspentry->hspuser; - while (hspuser) { - oldhspuser = hspuser; - free(hspuser->hspusername); - hspuser = hspuser->next; - free(oldhspuser); - } - free(hspentry->hspname); - hspentry = hspentry->next; - free(oldhspentry); - } - } - if ((*deventry)->devicesname) - free((*deventry)->devicesname); - free((*deventry)->devname); - free (*deventry); - *deventry = olddeventry; - } - } -} - -/* - * hash() - * - * A rotating hashing function that converts a string 's' to an index - * in a hash table of size 'h'. - * - * Input: - * uint32_t h hash table size - * char *s string to be hashed - * Return: - * uint32_t hash value - * Locking: None - */ -static uint32_t -hash(uint32_t h, char *s) -{ - - int len; - int hash, i; - - len = strlen(s); - - for (hash = len, i = 0; i < len; ++i) { - hash = (hash<<4)^(hash>>28)^s[i]; - } - return (hash % h); -} - -/* - * svm_register_device() - * - * Register a device - * - * Input: - * rcm_handle_t *hd rcm handle - * char *devname device name - * Return: None - * Locking: None - */ -static void -svm_register_device(rcm_handle_t *hd, char *devname) -{ - /* Sanity check */ - if (devname == NULL) - return; - - rcm_log_message(RCM_TRACE1, "SVM: Registering %s(%d)\n", devname, - devname); - - if (rcm_register_interest(hd, devname, 0, NULL) != RCM_SUCCESS) { - rcm_log_message(RCM_ERROR, - gettext("SVM: failed to register \"%s\"\n"), devname); - } -} - -/* - * add_dep() - * - * Add an entry to an array of dependent names for a device. Used to - * build an array to call the rcm framework with when passing on a - * DR request. - * - * Input: - * int *ndeps ptr to current number of deps - * char ***depsp ptr to current dependent array - * deventry_t *deventry deventry of device to be added - * Output: - * int *ndeps ptr to updated no of deps - * char ***depsp ptr to new dependant array - * Return: - * int 0, of ok, -1 if failed to allocate memory - * Locking: None - */ -static int -add_dep(int *ndeps, char ***depsp, deventry_t *deventry) -{ - char **deps_new; - - *ndeps += 1; - deps_new = realloc(*depsp, ((*ndeps) + 1) * sizeof (char *)); - if (deps_new == NULL) { - rcm_log_message(RCM_ERROR, - gettext("SVM: cannot allocate dependent array (%s).\n"), - strerror(errno)); - return (-1); - } - deps_new[(*ndeps-1)] = deventry->devname; - deps_new[(*ndeps)] = NULL; - *depsp = deps_new; - return (0); -} - - -/* - * get_dependent() - * - * Create a list of all dependents of a device - * Do not add dependent if it is marked as removed - * - * Input: - * deventry_t *deventry device entry - * Output: - * char ***dependentsp pty to dependent list - * Return: - * int 0, if ok, -1 if failed - * Locking: None - */ -static int -get_dependents(deventry_t *deventry, char *** dependentsp) -{ - int ndeps = 0; - deventry_t *dependent; - char **deps = NULL; - - - dependent = deventry->dependent; - if (dependent == NULL) { - *dependentsp = NULL; - return (0); - } - while (dependent != NULL) { - /* - * do not add dependent if we have - * already received a remove notifification - */ - if (!(dependent->flags&REMOVED)) - if (add_dep(&ndeps, &deps, dependent) < 0) - return (-1); - dependent = dependent->next_dep; - } - if (ndeps == 0) { - *dependentsp = NULL; - } else { - *dependentsp = deps; - } - return (0); -} - -/* - * add_to_usage() - * Add string to the usage string pointed at by usagep. Allocate memory - * for the new usage string and free the memory used by the original - * usage string - * - * Input: - * char **usagep ptr to usage string - * char *string string to be added to usage - * Return: - * char ptr to new usage string - * Locking: None - */ -char * -add_to_usage(char ** usagep, char *string) -{ - int len; - char *new_usage = NULL; - - if (*usagep == NULL) { - len = 0; - } else { - len = strlen(*usagep) + 2; /* allow space for comma */ - } - len += strlen(string) + 1; - if (new_usage = calloc(1, len)) { - if (*usagep) { - (void) strcpy(new_usage, *usagep); - free(*usagep); - (void) strcat(new_usage, ", "); - } - (void) strcat(new_usage, string); - } - return (new_usage); -} - -/* - * add_to_usage_fmt() - * - * Add a formatted string , of the form "blah %s" to the usage string - * pointed at by usagep. Allocate memory for the new usage string and free - * the memory used by the original usage string. - * - * Input: - * char **usagep ptr to current usage string - * char *fmt format string - * char *string string to be added - * Return: - * char* new usage string - * Locking: None - */ -/*PRINTFLIKE2*/ -char * -add_to_usage_fmt(char **usagep, char *fmt, char *string) -{ - int len; - char *usage; - char *new_usage = NULL; - - len = strlen(fmt) - + strlen(string) + 1; - if (usage = calloc(1, len)) { - (void) sprintf(usage, fmt, string); - new_usage = add_to_usage(usagep, usage); - free(usage); - } - return (new_usage); -} - -/* - * is_open() - * - * Make ioctl call to find if a device is open - * - * Input: - * dev_t devkey dev_t for device - * Return: - * int 0 if not open, !=0 if open - * Locking: None - */ -static int -is_open(dev_t devkey) -{ - int fd; - md_isopen_t isopen_ioc; - - /* Open admin device */ - if ((fd = open(ADMSPECIAL, O_RDONLY, 0)) < 0) { - rcm_log_message(RCM_ERROR, MSG_OPENERR, ADMSPECIAL); - return (0); - } - - (void) memset(&isopen_ioc, 0, sizeof (isopen_ioc)); - isopen_ioc.dev = devkey; - if (ioctl(fd, MD_IOCISOPEN, &isopen_ioc) < 0) { - (void) close(fd); - return (0); - } - (void) close(fd); - return (isopen_ioc.isopen); -} - -/* - * check_softpart() - * - * Check the status of the passed in device within the softpartition. - * - * Input: - * mdsetname_t * the name of the set - * mdname_t * the softpartition device that is being examined - * char * the device which needs to be checked - * md_error_t * error pointer (not used) - * Return: - * int REDUNDANT - device is redundant and can be - * removed - * NOTREDUNDANT - device cannot be removed - * NOTINDEVICE - device is not part of this - * component - */ -static int -check_softpart(mdsetname_t *sp, mdname_t *np, char *uname, md_error_t *ep) -{ - md_sp_t *softp = NULL; - - rcm_log_message(RCM_TRACE1, "SVM: softpart checking %s %s\n", - np->bname, uname); - - softp = meta_get_sp(sp, np, ep); - - /* softp cannot be NULL, if it is then the RCM cache is corrupt */ - assert(softp != NULL); - - /* - * if the softpartition is not a parent then nothing can be done, user - * must close the device and then fix the under lying devices. - */ - if (!(MD_HAS_PARENT(softp->common.parent))) { - rcm_log_message(RCM_TRACE1, - "SVM: softpart is a top level device\n"); - return (NOTREDUNDANT); - } - - if (strcmp(softp->compnamep->bname, uname) != 0) { - /* - * This can occur if this function has been called by the - * check_raid5 code as it is cycling through each column - * in turn. - */ - rcm_log_message(RCM_TRACE1, - "SVM: %s is not in softpart (%s)\n", - uname, softp->compnamep->bname); - return (NOTINDEVICE); - } - - /* - * Check the status of the soft partition this only moves from - * an okay state if the underlying devices fails while the soft - * partition is open. - */ - if (softp->status != MD_SP_OK) { - rcm_log_message(RCM_TRACE1, - "SVM: softpart is broken (state: 0x%x)\n", - softp->status); - return (REDUNDANT); - } - - return (NOTREDUNDANT); -} - -/* - * check_raid5() - * - * Check the status of the passed in device within the raid5 in question. - * - * Input: - * mdsetname_t * the name of the set - * mdname_t * the raid5 device that is being examined - * char * the device which needs to be checked - * md_error_t * error pointer (not used) - * Return: - * int REDUNDANT - device is redundant and can be - * removed - * NOTREDUNDANT - device cannot be removed - */ -static int -check_raid5(mdsetname_t *sp, mdname_t *np, char *uname, md_error_t *ep) -{ - md_raid_t *raidp = NULL; - md_raidcol_t *colp = NULL; - int i; - int rval = 0; - - rcm_log_message(RCM_TRACE1, "SVM: raid5 checking %s %s\n", - np->bname, uname); - - raidp = meta_get_raid(sp, np, ep); - - /* raidp cannot be NULL, if it is then the RCM cache is corrupt */ - assert(raidp != NULL); - - /* - * Now check each column in the device. We cannot rely upon the state - * of the device because if a hotspare is in use all the states are - * set to Okay, both at the metadevice layer and the column layer. - */ - for (i = 0; (i < raidp->cols.cols_len); i++) { - colp = &raidp->cols.cols_val[i]; - np = colp->colnamep; - - rcm_log_message(RCM_TRACE1, - "SVM: raid5 checking %s state %s 0x%x\n", - np->bname, raid_col_state_to_name(colp, NULL, 0), - colp->state); - - /* - * It is possible for the column to be a softpartition, - * so need to check the softpartiton if this is the - * case. It is *not* valid for the column to be a - * stripe/concat/mirror, and so no check to see what - * type of metadevice is being used. - */ - if (metaismeta(np)) { - /* this is a metadevice ie a softpartiton */ - rval = check_softpart(sp, np, uname, ep); - if (rval == REDUNDANT) { - rcm_log_message(RCM_TRACE1, - "SVM: raid5 %s is broken\n", uname); - meta_invalidate_name(np); - return (REDUNDANT); - } else if (rval == NOTREDUNDANT && - colp->hsnamep != NULL) { - rcm_log_message(RCM_TRACE1, - "SVM: raid5 device is broken, hotspared\n"); - meta_invalidate_name(np); - return (REDUNDANT); - } - meta_invalidate_name(np); - continue; - } - meta_invalidate_name(np); - - if (strcmp(uname, np->bname) != 0) - continue; - - /* - * Found the device. Check if it is broken or hotspared. - */ - if (colp->state & RUS_ERRED) { - rcm_log_message(RCM_TRACE1, - "SVM: raid5 column device is broken\n"); - return (REDUNDANT); - } - - if (colp->hsnamep != NULL) { - rcm_log_message(RCM_TRACE1, - "SVM: raid5 column device is broken, hotspared\n"); - return (REDUNDANT); - } - } - return (NOTREDUNDANT); -} - -/* - * check_stripe() - * - * Check the status of the passed in device within the stripe in question. - * - * Input: - * mdsetname_t * the name of the set - * mdname_t * the stripe that is being examined - * char * the device which needs to be checked - * md_error_t * error pointer (not used) - * Return: - * int REDUNDANT - device is redundant and can be - * removed - * NOTREDUNDANT - device cannot be removed - * NOTINDEVICE - device is not part of this - * component - */ -static int -check_stripe(mdsetname_t *sp, mdname_t *np, char *uname, md_error_t *ep) -{ - md_stripe_t *stripep = NULL; - md_row_t *mrp = NULL; - md_comp_t *mcp; - mdname_t *pnp; - char *miscname; - int row; - int col; - - rcm_log_message(RCM_TRACE1, "SVM: concat/stripe checking %s %s\n", - np->bname, uname); - stripep = meta_get_stripe(sp, np, ep); - - /* stripep cannot be NULL, if it is then the RCM cache is corrupt */ - assert(stripep != NULL); - - /* - * If the stripe is not a parent then nothing can be done, user - * must close the device and then fix the devices. - */ - if (!(MD_HAS_PARENT(stripep->common.parent))) { - rcm_log_message(RCM_TRACE1, - "SVM: stripe is a top level device\n"); - return (NOTREDUNDANT); - } - - pnp = metamnumname(&sp, stripep->common.parent, 0, ep); - - if (pnp == NULL) { - /* - * Only NULL when the replicas are in an inconsistant state - * ie the device says it is the parent of X but X does not - * exist. - */ - rcm_log_message(RCM_TRACE1, "SVM: parent is not configured\n"); - return (NOTREDUNDANT); - } - - /* - * Get the type of the parent and make sure that it is a mirror, - * if it is then need to find out the number of submirrors, and - * if it is not a mirror then this is not a REDUNDANT device. - */ - if ((miscname = metagetmiscname(pnp, ep)) == NULL) { - /* - * Again something is wrong with the configuration. - */ - rcm_log_message(RCM_TRACE1, - "SVM: unable to find the type of %s\n", pnp->cname); - meta_invalidate_name(pnp); - return (NOTREDUNDANT); - } - - if (!(strcmp(miscname, MD_MIRROR) == 0 && - check_mirror(sp, pnp, ep) == REDUNDANT)) { - rcm_log_message(RCM_TRACE1, - "SVM: %s is a %s and not redundant\n", - pnp->cname, miscname); - meta_invalidate_name(pnp); - return (NOTREDUNDANT); - } - - meta_invalidate_name(pnp); - - for (row = 0; row < stripep->rows.rows_len; row++) { - mrp = &stripep->rows.rows_val[row]; - - /* now the components in the row */ - for (col = 0; col < mrp->comps.comps_len; col++) { - mcp = &mrp->comps.comps_val[col]; - - rcm_log_message(RCM_TRACE1, - "SVM: stripe comp %s check\n", - mcp->compnamep->bname); - - if (strcmp(mcp->compnamep->bname, uname) != 0) - continue; - - rcm_log_message(RCM_TRACE1, - "SVM: component state: %s\n", - comp_state_to_name(mcp, NULL, 0)); - - if (mcp->hsnamep != NULL) { - /* device is broken and hotspared */ - rcm_log_message(RCM_TRACE1, - "SVM: stripe %s broken, hotspare active\n", - uname); - return (REDUNDANT); - } - - /* - * LAST_ERRED is a special case. If the state of a - * component is CS_LAST_ERRED then this is the last - * copy of the data and we need to keep using it, even - * though we had errors. Thus, we must block the DR - * request. If you follow the documented procedure for - * fixing each component (fix devs in maintenance - * before last erred) then the mirror will - * automatically transition Last Erred components to - * the Erred state after which they can be DRed out. - */ - if (mcp->state == CS_ERRED) { - /* device is broken */ - rcm_log_message(RCM_TRACE1, - "SVM: stripe %s is broken\n", uname); - return (REDUNDANT); - } - - /* - * Short circuit - if here the component has been - * found in the column so no further processing is - * required here. - */ - return (NOTREDUNDANT); - } - } - - /* - * Only get to this point if the device (uname) has not been - * found in the stripe. This means that there is something - * wrong with the device dependency list. - */ - rcm_log_message(RCM_TRACE1, - "SVM: component %s is not part of %s\n", - uname, np->bname); - - return (NOTINDEVICE); -} - -/* - * check_mirror() - * - * Make sure that the mirror > 1 submirror. - * - * Input: - * mdsetname_t * the name of the set - * mdname_t * the stripe that is being examined - * Return: - * int REDUNDANT - mirror > 1 submirrors - * NOTREDUNDANT - mirror has 1 submirror - */ -static int -check_mirror(mdsetname_t *sp, mdname_t *np, md_error_t *ep) -{ - uint_t nsm = 0; /* number of submirrors */ - uint_t smi = 0; /* index into submirror array */ - md_mirror_t *mirrorp = NULL; - - rcm_log_message(RCM_TRACE1, "SVM: mirror checking %s\n", np->bname); - mirrorp = meta_get_mirror(sp, np, ep); - - /* mirrorp cannot be NULL, if it is then the RCM cache is corrupt */ - assert(mirrorp != NULL); - - /* - * Need to check how many submirrors that the mirror has. - */ - for (smi = 0, nsm = 0; (smi < NMIRROR); ++smi) { - md_submirror_t *mdsp = &mirrorp->submirrors[smi]; - mdname_t *submirnamep = mdsp->submirnamep; - - /* Is this submirror being used ? No, then continue */ - if (submirnamep == NULL) - continue; - nsm++; - } - - /* - * If there is only one submirror then there is no redundancy - * in the configuration and the user needs to take some other - * action before using cfgadm on the device ie close the metadevice. - */ - if (nsm == 1) { - rcm_log_message(RCM_TRACE1, - "SVM: only one submirror unable to allow action\n"); - return (NOTREDUNDANT); - } - - return (REDUNDANT); -} - -/* - * check_device() - * - * Check the current status of the underlying device. - * - * Input: - * deventry_t * the device that is being checked - * Return: - * int REDUNDANT - device is redundant and can be - * removed - * NOTREDUNDANT - device cannot be removed - * Locking: - * None - * - * The check_device code path (the functions called by check_device) use - * libmeta calls directly to determine if the specified device is - * redundant or not. The can lead to conflicts between data cached in - * libmeta and data that is being cached by this rcm module. Since the - * rcm cache is our primary source of information here, we need to make - * sure that we are not getting stale data from the libmeta caches. - * We use meta_invalidate_name throughout this code path to clear the - * cached data in libmeta in order to ensure that we are not using stale data. - */ -static int -check_device(deventry_t *deventry) -{ - mdsetname_t *sp; - md_error_t error = mdnullerror; - char sname[BUFSIZ+1]; - mdname_t *np; - deventry_t *dependent; - int rval = NOTREDUNDANT; - int ret; - - dependent = deventry->dependent; - - rcm_log_message(RCM_TRACE1, "SVM: check_device(%s)\n", - deventry->devname); - /* - * should not be null because the caller has already figured out - * there are dependent devices. - */ - assert(dependent != NULL); - - do { - - rcm_log_message(RCM_TRACE1, "SVM: check dependent: %s\n", - dependent->devname); - - if (dependent->flags & REMOVED) { - dependent = dependent->next_dep; - continue; - } - - /* - * The device *should* be a metadevice and so need to see if - * it contains a setname. - */ - ret = sscanf(dependent->devname, - "/dev/md/%" VAL2STR(BUFSIZ) "[^/]/dsk/", - sname); - - if (ret != 1) - (void) strcpy(sname, MD_LOCAL_NAME); - - if ((sp = metasetname(sname, &error)) == NULL) { - rcm_log_message(RCM_TRACE1, - "SVM: unable to get setname for \"%s\", error %s\n", - sname, mde_sperror(&error, "")); - break; - } - - rcm_log_message(RCM_TRACE1, "SVM: processing: %s\n", - dependent->devname); - - np = metaname(&sp, dependent->devname, META_DEVICE, &error); - - switch (dependent->devtype) { - case SVM_TRANS: - /* - * No code to check trans devices because ufs logging - * should be being used. - */ - rcm_log_message(RCM_TRACE1, - "SVM: Use UFS logging instead of trans devices\n"); - break; - case SVM_SLICE: - case SVM_STRIPE: - case SVM_CONCAT: - rval = check_stripe(sp, np, deventry->devname, &error); - break; - case SVM_MIRROR: - /* - * No check here as this is performed by the one - * above when the submirror is checked. - */ - rcm_log_message(RCM_TRACE1, - "SVM: Mirror check is done by the stripe check\n"); - break; - case SVM_RAID: - /* - * Raid5 devices can be built on soft partitions or - * slices and so the check here is for the raid5 - * device built on top of slices. Note, a raid5 cannot - * be built on a stripe/concat. - */ - rval = check_raid5(sp, np, deventry->devname, &error); - break; - case SVM_SOFTPART: - /* - * Raid5 devices can be built on top of soft partitions - * and so they have to be checked. - */ - rval = check_softpart(sp, np, deventry->devname, - &error); - break; - default: - rcm_log_message(RCM_TRACE1, - "SVM: unknown devtype: %d\n", dependent->devtype); - break; - } - - meta_invalidate_name(np); - - if (rval == REDUNDANT) - break; - } while ((dependent = dependent->next_dep) != NULL); - - rcm_log_message(RCM_TRACE1, "SVM: check_device return %d\n", rval); - return (rval); -} - -/* - * svm_unregister_device - * - * Unregister the device specified by the deventry - * - * Input: - * rcm_handle_t * information for RCM - * deventry_t * description of the device to be - * unregistered - * - * Return: - * int 0 - successfully unregistered - * != 0 - failed to unregister - * - * Locking: - * None - * - * If the deventry_t has a devicesname, we will first attempt to unregister - * using that name. If that fails then we'll attempt to unregister using - * devname. The reason for this strategy has to do with the way that - * rcm_register_interest works. If passed a /dev/ name, - * rcm_register_interest uses realpath() to convert it to a /devices name. - * Thus, we are more likely to succeed if we use devicesname first. - */ - -static int -svm_unregister_device(rcm_handle_t *hd, deventry_t *d) -{ - int deleted; - - if (d->devicesname) { - rcm_log_message(RCM_TRACE1, "SVM: unregister_device %s (%s)\n", - d->devname, d->devicesname); - } else { - rcm_log_message(RCM_TRACE1, "SVM: unregister_device %s\n", - d->devname); - } - deleted = -1; - if (d->devicesname != NULL) { - /* - * Try to unregister via the /devices entry first. RCM - * converts /dev/ entries to /devices entries before - * storing them. Thus, if this item has a /devices name - * available, we should use it for unregistering. - */ - deleted = rcm_unregister_interest(hd, - d->devicesname, 0); - } - if (deleted != 0) { - /* - * Either we did not have a /devices name or the attempt to - * unregister using the /devices name failed. Either way - * we'll now try to unregister using the conventional name. - */ - deleted = rcm_unregister_interest(hd, d->devname, 0); - } - if (deleted != 0) { - rcm_log_message(RCM_TRACE1, "SVM: unregister_device failed " - "for %s\n", d->devname); - } - return (deleted); -} diff --git a/usr/src/cmd/stat/common/dsr.c b/usr/src/cmd/stat/common/dsr.c index 5d6f51f835e6..e9db7e3042f6 100644 --- a/usr/src/cmd/stat/common/dsr.c +++ b/usr/src/cmd/stat/common/dsr.c @@ -18,13 +18,13 @@ * * CDDL HEADER END */ + /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2016 Nexenta Systems, Inc. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include #include @@ -71,13 +71,9 @@ typedef struct { static minor_match_t mm_disk = {"a", 1}; static minor_match_t mm_tape = {"", 0}; static minor_match_t mm_misc = {"0", 0}; -static char md_minor_name[MAXPATHLEN]; -static minor_match_t mm_md = {md_minor_name, 0}; -static minor_match_t *mma_disk_tape_misc[] = +static minor_match_t *mma_disk_tape_misc[] = {&mm_disk, &mm_tape, &mm_misc, NULL}; -static minor_match_t *mma_md[] = {&mm_md, NULL}; -static char *mdsetno2name(int setno); -#define DISKLIST_MOD 256 /* ^2 instunit mod hash */ +#define DISKLIST_MOD 256 /* ^2 instance mod hash */ static disk_list_t *disklist[DISKLIST_MOD]; @@ -91,7 +87,7 @@ static char *cur_special(char *, char *); /* * Clear the snapshot so a cache miss in lookup_ks_name() will cause a fresh - * snapshot in drvinstunit2dev(). + * snapshot in drvinstpart2dev(). */ void cleanup_iodevs_snapshot() @@ -110,82 +106,41 @@ cleanup_iodevs_snapshot() } /* - * Find information for (driver, instunit) device: return zero on failure. + * Find information for (driver, instance) device: return zero on failure. * - * NOTE: Failure of drvinstunit2dev works out OK for the caller if the kstat + * NOTE: Failure of drvinstpart2dev works out OK for the caller if the kstat * name is the same as public name: the caller will just use kstat name. */ static int -drvinstunitpart2dev(char *driver, int instunit, char *part, +drvinstpart2dev(char *driver, int instance, char *part, char **devpathp, char **adevpathp, char **devidp) { - int instance; - minor_match_t **mma; - minor_match_t *mm; + minor_match_t *mm, **mma = mma_disk_tape_misc; char *devpath; char *devid; - char *a, *s; - int mdsetno; - char *mdsetname = NULL; - char amdsetname[MAXPATHLEN]; char *devicespath; di_node_t node; /* setup "no result" return values */ - if (devpathp) + if (devpathp != NULL) *devpathp = NULL; - if (adevpathp) + if (adevpathp != NULL) *adevpathp = NULL; - if (devidp) + if (devidp != NULL) *devidp = NULL; - /* take snapshot if not established */ + /* take snapshot if not established */ if (di_dim == NULL) { di_dim = di_dim_init(); if (di_dim == NULL) return (0); } - /* - * Determine if 'instunit' is an 'instance' or 'unit' based on the - * 'driver'. The current code only detects 'md' metadevice 'units', - * and defaults to 'instance' for everything else. - * - * For a metadevice, 'driver' is either "md" or "/md". - */ - s = strstr(driver, "/md"); - if ((strcmp(driver, "md") == 0) || - (s && isdigit(*driver) && (strcmp(s, "/md") == 0))) { - /* - * "md" unit: Special case translation of "md" kstat names. - * For the local set the kstat name is "md", and for - * a shared set the kstat name is "/md": we map - * these to the minor paths "/pseudo/md@0:,blk" and - * "/pseudo/md@0:,,blk" respectively. - */ - if (isdigit(*driver)) { - mdsetno = atoi(driver); - - /* convert setno to setname */ - mdsetname = mdsetno2name(mdsetno); - } else - mdsetno = 0; - - driver = "md"; - instance = 0; - mma = mma_md; /* metadevice dynamic minor */ - (void) snprintf(md_minor_name, sizeof (md_minor_name), - "%d,%d,blk", mdsetno, instunit); - } else { - instance = instunit; - mma = mma_disk_tape_misc; /* disk/tape/misc minors */ - } - - if (part) { + if (part != NULL) { devpath = di_dim_path_dev(di_dim, driver, instance, part); } else { /* Try to find a minor_match that works */ - for (mm = *mma++; mm; mm = *mma++) { + for (mm = *mma++; mm != NULL; mm = *mma++) { if ((devpath = di_dim_path_dev(di_dim, driver, instance, mm->minor_name)) != NULL) break; @@ -198,11 +153,19 @@ drvinstunitpart2dev(char *driver, int instunit, char *part, * At this point we have a devpath result. Return the information about * the result that the caller is asking for. */ - if (devpathp) /* devpath */ + if (devpathp != NULL) /* devpath */ *devpathp = safe_strdup(devpath); - if (adevpathp) { /* abbreviated devpath */ - if ((part == NULL) && mm->minor_isdisk) { + if (adevpathp != NULL) { /* abbreviated devpath */ + char *a; + + a = strrchr(devpath, '/'); + if (a == NULL) { + free(devpath); + return (0); + } + a++; + if (part == NULL && mm->minor_isdisk) { /* * For disk kstats without a partition we return the * last component with trailing "s#" or "p#" stripped @@ -210,60 +173,24 @@ drvinstunitpart2dev(char *driver, int instunit, char *part, * For example for devpath of "/dev/dsk/c0t0d0s0" the * abbreviated devpath would be "c0t0d0". */ - a = strrchr(devpath, '/'); - if (a == NULL) { + char *s; + + if ((s = strrchr(a, 's')) == NULL && + (s = strrchr(a, 'p')) == NULL) { free(devpath); return (0); } - a++; - s = strrchr(a, 's'); - if (s == NULL) { - s = strrchr(a, 'p'); - if (s == NULL) { - free(devpath); - return (0); - } - } /* don't include slice information in devpath */ *s = '\0'; - } else { - /* - * remove "/dev/", and "/dsk/", from 'devpath' (like - * "/dev/md/dsk/d0") to form the abbreviated devpath - * (like "md/d0"). - */ - if ((s = strstr(devpath, "/dev/")) != NULL) - (void) strcpy(s + 1, s + 5); - if ((s = strstr(devpath, "/dsk/")) != NULL) - (void) strcpy(s + 1, s + 5); - - /* - * If we have an mdsetname, convert abbreviated setno - * notation (like "md/shared/1/d0" to abbreviated - * setname notation (like "md/red/d0"). - */ - if (mdsetname) { - a = strrchr(devpath, '/'); - (void) snprintf(amdsetname, sizeof (amdsetname), - "md/%s%s", mdsetname, a); - free(mdsetname); - a = amdsetname; - } else { - if (*devpath == '/') - a = devpath + 1; - else - a = devpath; - } } *adevpathp = safe_strdup(a); } - if (devidp) { /* lookup the devid */ + if (devidp != NULL) { /* lookup the devid */ /* take snapshot if not established */ - if (di_root == DI_NODE_NIL) { + if (di_root == DI_NODE_NIL) di_root = di_init("/", DINFOCACHE); - } - if (di_root) { + if (di_root != NULL) { /* get path to /devices devinfo node */ devicespath = di_dim_path_devices(di_dim, driver, instance, NULL); @@ -322,19 +249,16 @@ drvpid2port(uint_t pid, char **target_portp) } /* - * Find/create a disk_list entry for given a kstat name. + * Find/create a disk_list entry for the given kstat name. * The basic format of a kstat name is * - * "..,". + * "[..][,]". * - * The is a decimal number. The "..", + * The is a decimal number. The "..", * which describes mpxio path stat information, and "," parts are * optional. The consists of the letter 't' followed by a decimal number. * When available, we use the to find the 'target-port' via ioctls to * the scsi_vhci driver. - * - * NOTE: In the case of non-local metadevices, the format of "" in - * a kstat name is acutally "/md". */ disk_list_t * lookup_ks_name(char *ks_name, int want_devid) @@ -345,7 +269,7 @@ lookup_ks_name(char *ks_name, int want_devid) char *p; int len; char driver[KSTAT_STRLEN]; - int instunit; + int instance; disk_list_t **dlhp; /* disklist head */ disk_list_t *entry; char *devpath = NULL; @@ -355,27 +279,27 @@ lookup_ks_name(char *ks_name, int want_devid) char *target_port = NULL; char portform[MAXPATHLEN]; - /* Filter out illegal forms (like all digits). */ - if ((ks_name == NULL) || (*ks_name == 0) || - (strspn(ks_name, "0123456789") == strlen(ks_name))) + /* Filter out illegal forms (like all digits) */ + if (ks_name == NULL || *ks_name == '\0' || + strspn(ks_name, "0123456789") == strlen(ks_name)) goto fail; /* parse ks_name to create new entry */ pidp = strchr(ks_name, '.'); /* start of "." */ initiator = strrchr(ks_name, '.'); /* start of "." */ - if (pidp && (pidp == initiator)) /* can't have same start */ + if (pidp != NULL && pidp == initiator) /* can't have same start */ goto fail; part = strchr(ks_name, ','); /* start of "," */ - p = strchr(ks_name, ':'); /* start of ":" */ - if (part && p) + p = strchr(ks_name, ':'); /* start of ":" */ + if (part != NULL && p != NULL) goto fail; /* can't have both */ - if (p) + if (p != NULL) part = p; - if (part && pidp) + if (part != NULL && pidp != NULL) goto fail; /* and partition: bad */ - p = part ? part : pidp; + p = (part != NULL) ? part : pidp; if (p == NULL) p = &ks_name[strlen(ks_name) - 1]; /* last char */ else @@ -383,38 +307,36 @@ lookup_ks_name(char *ks_name, int want_devid) while ((p >= ks_name) && isdigit(*p)) p--; /* backwards over digits */ - p++; /* start of instunit */ + p++; /* start of instance */ if ((*p == '\0') || (*p == ',') || (*p == '.') || (*p == ':')) - goto fail; /* no */ + goto fail; /* no */ len = p - ks_name; (void) strncpy(driver, ks_name, len); driver[len] = '\0'; - instunit = atoi(p); - if (part) + instance = atoi(p); + if (part != NULL) part++; /* skip ',' */ - /* hash by instunit and search for existing entry */ - dlhp = &disklist[instunit & (DISKLIST_MOD - 1)]; + /* hash by instance and search for existing entry */ + dlhp = &disklist[instance & (DISKLIST_MOD - 1)]; for (entry = *dlhp; entry; entry = entry->next) { - if (strcmp(entry->ks_name, ks_name) == 0) { + if (strcmp(entry->ks_name, ks_name) == 0) return (entry); - } } /* not found, translate kstat_name components and create new entry */ /* translate kstat_name dev information */ - if (drvinstunitpart2dev(driver, instunit, part, - &devpath, &adevpath, want_devid ? &devid : NULL) == 0) { + if (drvinstpart2dev(driver, instance, part, + &devpath, &adevpath, want_devid ? &devid : NULL) == 0) goto fail; - } /* parse and translate path information */ - if (pidp) { + if (pidp != NULL) { /* parse path information: ".t#." */ pidp++; /* skip '.' */ initiator++; /* skip '.' */ - if ((*pidp != 't') || !isdigit(pidp[1])) + if (*pidp != 't' || !isdigit(pidp[1])) goto fail; /* not ".t#" */ pid = atoi(&pidp[1]); @@ -457,56 +379,6 @@ lookup_ks_name(char *ks_name, int want_devid) return (NULL); } -/* - * Convert metadevice setno to setname by looking in /dev/md for symlinks - * that point to "shared/setno" - the name of such a symlink is the setname. - * The caller is responsible for freeing the returned string. - */ -static char * -mdsetno2name(int setno) -{ - char setlink[MAXPATHLEN + 1]; - char link[MAXPATHLEN + 1]; - char path[MAXPATHLEN + 1]; - char *p; - DIR *dirp; - struct dirent *dp; - size_t len; - char *mdsetname = NULL; - - /* we are looking for a link to setlink */ - (void) snprintf(setlink, MAXPATHLEN, "shared/%d", setno); - - /* in the directory /dev/md */ - (void) strcpy(path, "/dev/md/"); - p = path + strlen(path); - dirp = opendir(path); - if (dirp == NULL) - return (NULL); - - /* loop through /dev/md directory entries */ - while ((dp = readdir(dirp)) != NULL) { - - /* doing a readlink of entry (fails for non-symlinks) */ - *p = '\0'; - (void) strcpy(p, dp->d_name); - if ((len = readlink(path, link, MAXPATHLEN)) == (size_t)-1) - continue; - - /* and looking for a link to setlink */ - link[len] = '\0'; - if (strcmp(setlink, link)) - continue; - - /* found- name of link is the setname */ - mdsetname = safe_strdup(dp->d_name); - break; - } - - (void) closedir(dirp); - return (mdsetname); -} - char * lookup_nfs_name(char *ks, kstat_ctl_t *kc) { diff --git a/usr/src/cmd/stmsboot/mpxio-upgrade b/usr/src/cmd/stmsboot/mpxio-upgrade index a959c554edd9..48df22f49511 100644 --- a/usr/src/cmd/stmsboot/mpxio-upgrade +++ b/usr/src/cmd/stmsboot/mpxio-upgrade @@ -19,9 +19,10 @@ # # CDDL HEADER END # + # # Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. -# Copyright 2015 Nexenta Systems, Inc. All rights reserved. +# Copyright 2016 Nexenta Systems, Inc. # . /lib/svc/share/fs_include.sh @@ -35,7 +36,6 @@ BOOTDEVICES=$SAVEDIR/boot-devices RECOVERFILE=$SAVEDIR/recover_instructions DEVFSADM=/usr/sbin/devfsadm DUMPADM=/usr/sbin/dumpadm -METADEVADM=/usr/sbin/metadevadm ISROOTDEV="" ISROOTDEVPATH="" usrmounted=0 @@ -98,7 +98,7 @@ mpxio_mount_root() fi ISPHYS=`echo $special |$AWK '/^\/dev\/dsk/ {print}'`; if [ -z "$ISPHYS" ]; then - # a metadevice, either /dev/md or /dev/vx + # a metadevice, /dev/vx new_special=$special $MOUNT -o remount,rw $new_special / >/dev/msglog 2>&1 else @@ -163,9 +163,6 @@ mpxio_mount_usr() ret_val=0 if [ -n "$mountp" ]; then case "$special" in - /dev/md/*) - new_special=$special - ;; /dev/vx/*) new_special=$special ;; @@ -345,13 +342,7 @@ mpxio_main() cecho "" cecho "stmsboot: vfstab has been updated" - if update_dumpconf; then - # update svm configuration to reflect new names - if [ -s /kernel/drv/md.conf ] && \ - [ -x $METADEVADM ]; then - $METADEVADM -r >/dev/msglog 2>&1 - fi - fi + update_dumpconf MACH=`$UNAME -p` if [ "$MACH" = "i386" ]; then diff --git a/usr/src/cmd/stmsboot/mpxio-upgrade.xml b/usr/src/cmd/stmsboot/mpxio-upgrade.xml index df1ae89e5ebf..d3c8fac461ff 100644 --- a/usr/src/cmd/stmsboot/mpxio-upgrade.xml +++ b/usr/src/cmd/stmsboot/mpxio-upgrade.xml @@ -23,8 +23,6 @@ CDDL HEADER END - ident "%Z%%M% %I% %E% SMI" - NOTE: This service manifest is not editable; its contents will be overwritten by package or patch operations, including operating system upgrade. Make customizations in a different @@ -44,14 +42,6 @@ - - - - &2 " This must be rectified before $0 can continue. -If / or /usr are on SVM (md(7d)) partitions, first run - /lib/svc/method/svc-metainit - To properly mount / and /usr, run: /lib/svc/method/fs-root then diff --git a/usr/src/cmd/svc/milestone/fs-usr b/usr/src/cmd/svc/milestone/fs-usr index cf8ce047feaf..b80e95c1f70f 100644 --- a/usr/src/cmd/svc/milestone/fs-usr +++ b/usr/src/cmd/svc/milestone/fs-usr @@ -19,13 +19,14 @@ # # CDDL HEADER END # + # # Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. -# # Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T. # All rights reserved. -# Copyright 2015 Nexenta Systems, Inc. All rights reserved. +# Copyright 2016 Nexenta Systems, Inc. # + . /lib/svc/share/smf_include.sh . /lib/svc/share/fs_include.sh @@ -94,14 +95,6 @@ if smf_is_globalzone && [ $rootiszfs = 0 ]; then [ -n "$otherops" ] && mntopts="${mntopts},${otherops}" [ "$fstype" = nfs ] && mntopts="${mntopts},llock" - # if root dev is a read-only metadevice then fail - case $special in - /dev/md/dsk/*) - dd if=/dev/null of=$special count=0 >/dev/null 2>&1 || - exit $SMF_EXIT_ERR_FATAL - ;; - esac - mountfs -m $mountp $fstype $mntopts - || exit $SMF_EXIT_ERR_FATAL fi @@ -122,14 +115,6 @@ if [ "$rootiszfs" = 0 ] ; then mntopts="remount" fi - # if usr dev is a read-only metadevice then fail - case $special in - /dev/md/dsk/*) - dd if=/dev/null of=$special count=0 \ - >/dev/null 2>&1 || exit $SMF_EXIT_ERR_FATAL - ;; - esac - mountfs - /usr $fstype $mntopts - || exit $SMF_EXIT_ERR_FATAL fi diff --git a/usr/src/cmd/svc/profile/generic_limited_net.xml b/usr/src/cmd/svc/profile/generic_limited_net.xml index 61e31a3b70c0..44814441e6c7 100644 --- a/usr/src/cmd/svc/profile/generic_limited_net.xml +++ b/usr/src/cmd/svc/profile/generic_limited_net.xml @@ -286,15 +286,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - - -hsp name="hsp0"> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -slice name="/dev/dsk/c0t0d3s7"/> - - - - - -.fi -.in -2 - -.SH FILES -.sp -.ne 2 -.na -\fB\fB/usr/share/lib/xml/dtd/volume-config.dtd\fR\fR -.ad -.sp .6 -.RS 4n - -.RE - -.SH SEE ALSO -.sp -.LP -\fBmetassist\fR(1M), \fBmetaclear\fR(1M), \fBmetadb\fR(1M), -\fBmetadetach\fR(1M), \fBmetahs\fR(1M), \fBmetainit\fR(1M), -\fBmetaoffline\fR(1M), \fBmetaonline\fR(1M), \fBmetaparam\fR(1M), -\fBmetarecover\fR(1M), \fBmetareplace\fR(1M), \fBmetaroot\fR(1M), -\fBmetaset\fR(1M), \fBmetasync\fR(1M), \fBmetattach\fR(1M), -\fBmount_ufs\fR(1M), \fBmddb.cf\fR(4) -.sp -.LP -\fISolaris Volume Manager Administration Guide\fR diff --git a/usr/src/man/man4/volume-request.4 b/usr/src/man/man4/volume-request.4 deleted file mode 100644 index bb8f53c7f09d..000000000000 --- a/usr/src/man/man4/volume-request.4 +++ /dev/null @@ -1,474 +0,0 @@ -'\" te -.\" Copyright (c) 2003, Sun Microsystems, Inc. All Rights Reserved. -.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. -.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. -.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] -.TH VOLUME-REQUEST 4 "April 9, 2016" -.SH NAME -volume-request, volume-defaults \- Solaris Volume Manager configuration -information for top down volume creation with metassist -.SH SYNOPSIS -.LP -.nf -\fB/usr/share/lib/xml/dtd/volume-request.dtd\fR -.fi - -.LP -.nf -\fB/usr/share/lib/xml/dtd/volume-defaults.dtd\fR -.fi - -.LP -.nf -\fB/etc/defaults/metassist.xml\fR -.fi - -.SH DESCRIPTION -.LP -A volume request file, XML-based and compliant with the -\fBvolume-request.dtd\fR Document Type Definition, describes the -characteristics of the volumes that \fBmetassist\fR should produce. -.sp -.LP -A system administrator would use the volume request file instead of providing -options at the command line to give more specific instructions about the -characteristics of the volumes to create. A volume request file can request -more than one volume, but all requested volumes must reside in the same disk -set. -.sp -.LP -If you start \fBmetassist\fR by providing a volume-request file as input, -\fBmetassist\fR can implement the configuration specified in the file, can -generate a command file that sets up the configuraiton for you to inspect or -edit, or can generate a volume configuration file for you to inspect or edit. -.sp -.LP -As a system administrator, you would want to create a volume request file if -you need to reuse configurations (and do not want to reenter the same command -arguments), or if you prefer to use a configuration file to specify volume -characteristics. -.sp -.LP -Volume request files must be valid XML that complies with the document type -definition in the volume-request.dtd file, located at -\fB/usr/share/lib/xml/dtd/volume-request.dtd\fR. You create a volume request -file, and provide it as input to metassist to create volumes from the top down. -.SS "Defining Volume Request" -.LP -The top level element \fB\fR surrounds the volume request data. -This element has no attributes. A volume request requires at least one - element, which must be the first element after -\fB\fR\&. -.sp -.LP -Optionally, the \fB\fR element can include one or more -\fB\fR and \fB\fR elements to specify which controllers -or disks associated with a specific controller can or cannot be used to create -the volume. -.sp -.LP -Optionally, the \fB\fR element can include a \fB\fR -element to specify characteristics of a hot spare pool if fault recovery is -used. -.sp -.LP -If not specified for a volume with fault-recovery, the first hot spare pool -found in the disk set is used. If no hot spare pool exists but one is required, -a hot spare pool is created. -.sp -.LP -Optionally, the volume-request can include one or more \fB\fR, -\fB\fR, \fB\fR, \fB\fR elements to specify volumes to -create. -.SS "Defining Disk Set" -.LP -Within the \fB\fR element, a \fB\fR element must -exist. The \fB\fR element, with the name attribute, specifies the name -of the disk set to be used. If this disk set does not exist, it is created. -This element and the name attribute are required. -.SS "Defining Availability" -.LP -Within the \fB\fR element and within other elements, you can -specify available or unavailable components (disks, or disks on a specific -controller path) for use or exclusion from use in a volume or hot spare pool. -.sp -.LP -The \fB\fR and \fB\fR elements require a name attribute -which specifies either a full \fBctd\fR name, or a partial \fBctd\fR name that -is used with the implied wildcard to complete the expression. For example, -specifying \fBc3t2d\fR0 as available would look like: -.sp -.in +2 -.nf - -.fi -.in -2 - -.sp -.LP -The \fB\fR element also makes any unnamed components unavailable. -Specifying all controllers except \fBc1\fR unavailable would look like: -.sp -.in +2 -.nf - -.fi -.in -2 - -.sp -.LP -Specifying all disks on controller 2 as unavailable would look like: -.sp -.in +2 -.nf - -.fi -.in -2 - -.sp -.LP -The \fB\fR element can also be used to further restrict the list -of available components. For example, specifying all controllers except \fBc1\fR -unavailable, and making all devices associated with c1t2 unavailable as well -would look like this: -.sp -.in +2 -.nf - - -.fi -.in -2 - -.sp -.LP -Components specified as available must be either part of the named disk set -used for this volume creation, or must be unused and not in any disk set. If -the components are selected for use, but are not in the specified diskset, the -\fBmetassist\fR command automatically adds them to the diskset. -.sp -.LP -It is unnecessary to specify components that are in other disk sets as -unavailable. \fBmetassist\fR automatically excludes them from consideration. -However, unused components or components that are not obviously used (for -example, an unmounted slice that is reserved for different uses) must be -explicitly specified as unavailable, or the \fBmetassist\fR command can include -them in the configuration. -.SS "Defining Hot Spare Pool" -.LP -The next element within the element, after the \fB\fR -and, optionally, \fB\fR and \fB\fR elements, is the -\fB\fR element. Its sole attribute specifies the name of the hot spare -pool: -.sp -.in +2 -.nf - -.fi -.in -2 - -.sp -.LP -The hot spare pool names must start with \fBhsp\fR and conclude with a number, -thus following the existing Solaris Volume Manager hot spare pool naming -requirements. -.sp -.LP -Within the \fB\fR element, you can specify one or more \fB\fR -and \fB\fR elements to specify which disks, or disks associated -with a specific controller can or cannot be used to create the hot spares -within the pool. -.sp -.LP -Also within the \fB\fR element, you can use the \fB\fR element to -specify hot spares to be included in the hot spare pool (see \fBDEFINING -SLICE\fR). Depending on the requirements placed on the hot spare pool by other -parts of the volume request, additional slices can be added to the hot spare -pool. -.SS "Defining Slice" -.LP -The \fB\fR element is used to define slices to include or exclude within -other elements. It requires only a name attribute to specify the ctd name of -the slice, and the context of the \fB\fR element determines the function -of the element. Sample slice elements might look like: -.sp -.in +2 -.nf - - -.fi -.in -2 - -.SS "Defining Stripe" -.LP -The \fB\fR element defines stripes (interlaced RAID 0 volumes) to be -used in a volume. It can contain either slice elements (to explicitly determine -which slices are used), or appropriate combinations of available and -unavailable elements if the specific determination of slices is to be left to -the metassist command. -.sp -.LP -The \fB\fR element takes an optional name attribute to specify a name. -If the name is not specified, an available name is automatically selected from -available Solaris Volume Manager names. If possible, names for related -components are related. -.sp -.LP -The \fB\fR element takes an optional size attribute that specifies the -size as value and units (for example, 10TB, 5GB). If slices for the -\fB\fR are explicitly specified, the size attribute is ignored. The -\fB\fR and \fB\fR elements can be used to constrain -slices for use in a stripe. -.sp -.LP -The \fB\fR elements takes optional \fBmincomp\fR and \fBmaxcomp\fR -attributes to specify both the minimum and maximum number of components that -can be included in it. As with size, if slices for the \fB\fR are -explicitly specified, the \fBmincomp\fR and \fBmaxcomp\fR attributes are -ignored. -.sp -.LP -The \fB\fR elements takes an optional interlace attribute as value and -units (for example, \fB16KB, 5BLOCKS, 20KB\fR). If this value is not specified, -the Solaris Volume Manager default value is used. -.sp -.LP -The \fB\fR element takes an optional usehsp attribute to specify if a -hot spare pool should be associated with this component. This attribute is -specified as a boolean value, as \fBusehsp="TRUE"\fR. If the component is not a -submirror, this attribute is ignored. -.SS "Defining Concat" -.LP -The \fB\fR element defines concats (non-interlaced RAID 0 volumes) to -be used in a configuration. It is specified in the same way as a \fB\fR -element, except that the \fBmincomp\fR, \fBmaxcomp\fR, and interlace attributes -are not valid. -.SS "Defining Mirror" -.LP -The \fB\fR element defines mirrors (RAID 1 volumes) to be used in a -volume configuration. It can contain combinations of \fB\fR and -\fB\fR elements (to explicitly determine which volumes are used as -submirrors). Alternatively, it can have a size attribute specified, along with -the appropriate combinations of available and unavailable elements to leave the -specific determination of components to the \fBmetassist\fR command. -.sp -.LP -The \fB\fR element takes an optional name attribute to specify a name. -If the name is not specified, an available name is automatically selected. -.sp -.LP -The \fB\fR element takes an optional size attribute that specifies the -size as value and units (for example, 10TB, 5GB). If \fB\fR and -\fB\fR elements for the mirror are not specified, this attribute is -required. Otherwise, it is ignored. -.sp -.LP -The \fB\fR element takes an optional nsubmirrors attribute to define -the number of submirrors (1-4) to include. Like the size attribute, this -attribute is ignored if the underlying \fB\fR and \fB -submirrors are explicitly specified. The \fB\fR element takes an -optional read attribute to define the mirror read options (\fBROUNDROBIN\fR, -\fBGEOMETRIC\fR, or \fBFIRST\fR) for the mirror. If this attribute is not -specified, the Solaris Volume Manager default value is used. -.sp -.LP -The \fB\fR element takes an optional write attribute to define the -mirror write options (\fBPARALLEL\fR, \fBSERIAL\fR, or \fBFIRST\fR) for the -mirror. If this attribute is not specified, the Solaris Volume Manager default -value is used. -.sp -.LP -The \fB\fR element takes an optional usehsp attribute to specify if a -hot spare pool should be associated with each submirror. This attribute is -specified as a boolean value, as \fBusehsp="TRUE"\fR. If the \fBusehsp\fR -attribute is specified in the configuration of the \fB\fR or -\fB\fR element used as a submirror, it overrides the value of -\fBusehsp\fR attributes for the mirror as a whole. -.SS "Defining Volume by Quality of Service" -.LP -The \fB\fR element defines volumes (high-level) by the quality of -service they should provide. (The \fB\fR element offers the same -functionality that options on the metassist command line can provide.) -.sp -.LP -The \fB\fR element can contain combinations of \fB\fR and -\fB\fR elements to determine which components can be included in -the configuration. -.sp -.LP -The \fB\fR element takes an optional name attribute to specify a name. -If the name is not specified, an available name is automatically selected. -.sp -.LP -The \fB\fR element takes a required size attribute that specifies the -size as value and units (for example, 10TB, 5GB). -.sp -.LP -The \fB\fR element takes an optional redundancy attribute to define the -number of additional copies of data (1-4) to include. In a worst-case scenario, -a volume can suffer failure of \fIn\fR\fB-1\fR components without data loss, -where \fBredundancy=\fR\fIn\fR. With fault recovery options, the volume could -withstand up to \fIn\fR\fB+hsps-1\fR non-concurrent failures without data loss. -Specifying \fBredundancy=0\fR results in a RAID 0 volume being created (a -stripe, specifically). -.sp -.LP -The \fB\fR element takes an optional faultrecovery attribute to -determine if additional components should be allocated to recover from -component failures in the volume. This is used to determine whether the volume -is associated with a hot spare pool. The faultrecovery attribute is a boolean -attribute, with a default value of \fBFALSE\fR. -.sp -.LP -The \fB\fR element takes an optional datapaths attribute to determine -if multiple data paths should be required to access the volume. The datapaths -attribute should be set to a numeric value. -.SS "Defining Default Values Globally" -.LP -Global defaults can be set in \fB/etc/default/metassist.xml\fR. This -volume-defaults file can contain most of the same elements as a volume-request -file, but differs structurally from a volume-request file: -.RS +4 -.TP -.ie t \(bu -.el o -The container element must be \fB\fR, not -\fB\fR\&. -.RE -.RS +4 -.TP -.ie t \(bu -.el o -The \fB\fR element can contain \fB\fR, -\fB\fR, \fB\fR, \fB\fR, \fB\fR, -\fB\fR, or \fB\fR elements. -.sp -Attributes specified by these elements define global default values, unless -overridden by the corresponding attributes and elements in a volume-request. -None of these elements is a container element. -.RE -.RS +4 -.TP -.ie t \(bu -.el o -The \fB\fR element can contain one or more \fB\fR -elements to provide disk set-specific defaults. The \fB\fR element can -contain \fB\fR, \fB\fR, \fB\fR, \fB\fR, -\fB\fR, \fB\fR, or \fB\fR elements. -.RE -.RS +4 -.TP -.ie t \(bu -.el o -Settings specified outside of a \fB\fR element apply to all disk sets, -but can be overridden within each \fB\fR element. -.RE -.SH EXAMPLES -.LP -\fBExample 1 \fRCreating a Redundant Volume -.sp -.LP -The following example shows a volume request file used to create a redundant -and fault tolerant volume of 1TB. - -.sp -.in +2 -.nf - - - - - - - - -.fi -.in -2 - -.LP -\fBExample 2 \fRCreating a Complex Configuration -.sp -.LP -The following example shows a sample volume-request file that specifies a disk -set name, and specifically itemizes characteristics of components to create. - -.sp -.in +2 -.nf - - - - - - - - - - - - - - - - - - -.fi -.in -2 - -.SH BOUNDARY VALUES -.in +2 -.nf -Attribute Minimum Maximum -mincomp 1 N/A -maxcomp N/A 32 -nsubmirrors 1 4 -passnum 0 9 -datapaths 1 4 -redundancy 0 4 -.fi -.in -2 -.sp - -.SH FILES -.ne 2 -.na -\fB\fB/usr/share/lib/xml/dtd/volume-request.dtd\fR\fR -.ad -.sp .6 -.RS 4n - -.RE - -.sp -.ne 2 -.na -\fB\fB/usr/share/lib/xml/dtd/volume-defaults.dtd\fR\fR -.ad -.sp .6 -.RS 4n - -.RE - -.sp -.ne 2 -.na -\fB\fB/etc/defaults/metassist.xml\fR\fR -.ad -.sp .6 -.RS 4n - -.RE - -.SH SEE ALSO -.LP -\fBmetassist\fR(1M), \fBmetaclear\fR(1M), \fBmetadb\fR(1M), -\fBmetadetach\fR(1M), \fBmetahs\fR(1M), \fBmetainit\fR(1M), -\fBmetaoffline\fR(1M), \fBmetaonline\fR(1M), \fBmetaparam\fR(1M), -\fBmetarecover\fR(1M), \fBmetareplace\fR(1M), \fBmetaroot\fR(1M), -\fBmetaset\fR(1M), \fBmetasync\fR(1M), \fBmetattach\fR(1M), -\fBmount_ufs\fR(1M), \fBmddb.cf\fR(4) -.sp -.LP -\fISolaris Volume Manager Administration Guide\fR diff --git a/usr/src/man/man5/filesystem.5 b/usr/src/man/man5/filesystem.5 index 5bbe8bc08db9..f09946b394d3 100644 --- a/usr/src/man/man5/filesystem.5 +++ b/usr/src/man/man5/filesystem.5 @@ -1,10 +1,25 @@ -'\" te -.\" Copyright (c) 2008, Sun Microsystems, Inc. All Rights Reserved. +.\" +.\" The contents of this file are subject to the terms of the +.\" Common Development and Distribution License (the "License"). +.\" You may not use this file except in compliance with the License. +.\" +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +.\" or http://www.opensolaris.org/os/licensing. +.\" See the License for the specific language governing permissions +.\" and limitations under the License. +.\" +.\" When distributing Covered Code, include this CDDL HEADER in each +.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE. +.\" If applicable, add the following below this CDDL HEADER, with the +.\" fields enclosed by brackets "[]" replaced with your own identifying +.\" information: Portions Copyright [yyyy] [name of copyright owner] +.\" +.\" .\" Copyright 1989 AT&T -.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. -.\" See the License for the specific language governing permissions and limitations under the License. When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the -.\" fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] -.TH FILESYSTEM 5 "Aug 26, 2013" +.\" Copyright (c) 2008, Sun Microsystems, Inc. All Rights Reserved. +.\" Copyright 2016 Nexenta Systems, Inc. +.\" +.TH FILESYSTEM 5 "Oct 7, 2016" .SH NAME filesystem \- File system organization .SH SYNOPSIS @@ -179,16 +194,6 @@ Frame buffer device files. File descriptors. .RE -.sp -.ne 2 -.na -\fB\fB/dev/md\fR\fR -.ad -.sp .6 -.RS 4n -Logical volume management meta-disk devices. -.RE - .sp .ne 2 .na @@ -638,16 +643,6 @@ Logical link control (\fBllc2\fR) driver configuration files. Configuration information for the printer subsystem. .RE -.sp -.ne 2 -.na -\fB\fB/etc/lvm\fR\fR -.ad -.sp .6 -.RS 4n -Solaris Logical Volume Manager configuration files. -.RE - .sp .ne 2 .na diff --git a/usr/src/man/man7d/Makefile b/usr/src/man/man7d/Makefile index b8a0301748f1..d8b246540020 100644 --- a/usr/src/man/man7d/Makefile +++ b/usr/src/man/man7d/Makefile @@ -11,10 +11,10 @@ # # Copyright 2011, Richard Lowe -# Copyright 2015 Nexenta Systems, Inc. All rights reserved. # Copyright 2016 Garrett D'Amore # Copyright 2016 Joyent, Inc. # Copyright 2016 Hans Rosenfeld +# Copyright 2016 Nexenta Systems, Inc. # include $(SRC)/Makefile.master @@ -81,8 +81,6 @@ _MANFILES= aac.7d \ lockstat.7d \ lofi.7d \ log.7d \ - md.7d \ - mediator.7d \ mem.7d \ mpt_sas.7d \ mr_sas.7d \ diff --git a/usr/src/man/man7d/md.7d b/usr/src/man/man7d/md.7d deleted file mode 100644 index ecf4977ed440..000000000000 --- a/usr/src/man/man7d/md.7d +++ /dev/null @@ -1,417 +0,0 @@ -'\" te -.\" Copyright (c) 2003, Sun Microsystems, Inc. All Rights Reserved -.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. -.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. -.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] -.TH MD 7D "Aug 29, 2003" -.SH NAME -md \- user configurable pseudo device driver -.SH DESCRIPTION -.sp -.LP -\fBmd\fR is a user configurable pseudo device driver that provides disk -concatenation, striping, mirroring, RAID5 metadevices, trans metadevices, and -hot spare utilities. Trans devices are no longer supported and have been -replaced by UFS logging. See \fBmount_ufs\fR(1M). -.sp -.LP -The block devices access the disk using the system's normal buffering mechanism -and are read and written without regard to physical disk records. There is also -a ``raw'' device which provides for direct transmission between the disk and -the user's read or write buffer. A single read or write call usually results in -one I/O operation; raw I/O is therefore considerably more efficient when many -bytes are transmitted. The names of the block devices are found in -\fB/dev/md/dsk\fR; the names of the raw devices are found in -\fB/dev/md/rdsk\fR. Metadevices have the appearance of whole disks; there are -no slices (partitions). -.sp -.LP -I/O requests (such as \fBlseek\fR(2)) to the metadevices must have an offset -that is a multiple of 512 bytes (DEV_BSIZE), or the driver returns an EINVAL -error. If the transfer length is not a multiple of 512 bytes, the tranfer count -is rounded up by the driver. -.sp -.LP -The \fBmd\fR pseudo device drivers support all disk devices on all Solaris 2.4 -or later Solaris systems. -.SH IOCTLS -.sp -.LP -This section provides a list of the ioctls supported by the metadisk driver. -.sp -.LP -The following ioctls are valid when issued to the raw metadevice, such as -\fB/dev/md/rdsk/d0\fR. See \fBdkio\fR(7I) for additional information. -.sp -.ne 2 -.na -\fB\fBDKIOCGGEOM\fR\fR -.ad -.RS 14n -This ioctl is used to get the disk geometry. The metadisk driver fills in the -\fBdkg_nhead\fR, \fBdkg_nsect\fR, \fBdkg_rpm\fR, \fBdkg_write_reinstruct\fR and -\fBdkg_read_reinstruct\fR from the first component of the metadevice (at -\fBmetainit\fR time). \fBdkg_ncyl\fR is calculated using the size of the -metadevice (reported by \fBmetastat\fR) divided by (dkg_nhead * dkg_nsect). The -total size is always a multiple of (dkg_nhead * dkg_nsect). If the first -component of a metadevice \fIdoes not\fR start on cylinder number 0, then the -dkg_ncyl is increased by one cylinder; because DKIOCGVTOC reports the -metadevice as starting on cylinder 1. The side effect here is that it looks -like cylinder 0 is not being used, but all the arithmetic works out correctly. -If the metadevice is not set up, then ENXIO is returned. -.RE - -.sp -.ne 2 -.na -\fB\fBDKIOCINFO\fR\fR -.ad -.RS 14n -When issued to the administrative device or metadevice, this ioctl sets -\fBdki_unit\fR to the unit number of the metadevice, \fBdki_ctype\fR to a value -of DKC_MD, and \fBdki_partition\fR to 0, because there are no slices. -.RE - -.sp -.ne 2 -.na -\fB\fBDKIOCGVTOC\fR\fR -.ad -.RS 14n -This ioctl returns the current vtoc. If one has not been written, then a -default vtoc is returned. \fBv_nparts\fR is always 1. \fBv_part[0].p_start\fR -is 0 if the first component of the metadevice starts on cylinder 0. Otherwise, -the \fBp_start\fR field is the starting sector of cylinder 1. -\fBv_part[0].p_size\fR is the same as the total size reported by -\fBmetastat\fR. -.RE - -.sp -.ne 2 -.na -\fB\fBDKIOCSVTOC\fR\fR -.ad -.RS 14n -This ioctl stores the vtoc in the metadevice state database so it is persistent -across reboots. -.RE - -.SH DIAGNOSTICS -.SS "Notice Log Messages" -.sp -.LP -The informative log messages include: -.sp -.in +2 -.nf -md: d\fInum\fR: Hotspared device \fIdev\fR with \fIdev\fR -.fi -.in -2 - -.sp -.LP -The first device name listed has been hot spare replaced with the second device -name listed. -.sp -.in +2 -.nf -md: d\fInum\fR: Hotspared device dev(\fInum\fR,\fInum\fR) with dev(\fInum\fR,\fInum\fR) -.fi -.in -2 - -.sp -.LP -The first device number listed has been hot spare replaced with the second -device number listed. -.sp -.in +2 -.nf -md: Could not load misc /\fIdev\fR -.fi -.in -2 - -.sp -.LP -The named \fBmisc\fR module is not loadable. It is possibly missing, or -something else has been copied over it. -.sp -.in +2 -.nf -md: d\fInum\fR: no mem for property \fIdev\fR -.fi -.in -2 - -.sp -.LP -Memory could not be allocated in the \fBprop_op\fR entry point. -.sp -.in +2 -.nf -md: db: Parsing error on '\fIdev\fR' - -.fi -.in -2 -.sp - -.sp -.LP -Set command in \fB/kernel/drv/md.conf\fR for the \fBmddb.bootlist\fR -\fI\fR is not in the correct format. \fBmetadb\fR \fB-p\fR can be run -to put the correct set commands into the \fB/kernel/drv/md.conf\fR file. -.sp -.in +2 -.nf -md: d\fInum\fR: \fIdev\fR(\fInum\fR,\fInum\fR) needs maintenance -md: d\fInum\fR: \fIdev\fR needs maintenance -.fi -.in -2 - -.sp -.LP -An I/O or open error has occurred on a device within a mirror causing a -component in the mirror to change to the Maintenance state. -.sp -.LP -\fBmd: d\fR\fInum\fR\fB: \fR\fIdev\fR\fB(\fR\fInum\fR\fB,\fR\fInum\fR\fB) last -erred\fR \fBmd: d\fR\fInum\fR\fB: \fR\fIdev\fR\fB last erred\fR -.sp -.LP -An I/O or open error has occurred on a device within a mirror and the data is -not replicated elsewhere in the mirror. This is causing the component in the -mirror to change to the Last Erred state. -.SS "Warning Log Messages" -.sp -.LP -The warning log messages include: -.sp -.in +2 -.nf -md: State database is stale -.fi -.in -2 - -.sp -.LP -This error message comes when there are not enough usable replicas for the -state database to be able to update records in the database. All accesses to -the metadevice driver will fail. To fix this problem, more replicas need to be -added or inaccessible replicas need to be deleted. -.sp -.in +2 -.nf -md: d\fInum\fR: read error on \fIdev\fRmd: d\fInum\fR: write error on \fIdev\fR -.fi -.in -2 - -.sp -.LP -A read or write error has occurred on the specified submirror, at the specified -device name. This happens if any read or write errors occur on a submirror. -.sp -.in +2 -.nf -md: d\fInum\fR: read error on dev(\fInum\fR,\fInum\fR)md: d\fInum\fR: write error on dev( -\fInum\fR,\fInum\fR) -.fi -.in -2 - -.sp -.LP -A read or write error has occurred on the specified submirror, at the specified -device number. This happens if any read or write errors occur on a submirror. -.sp -.in +2 -.nf -md: State database commit failed -md: State database delete failed -.fi -.in -2 - -.sp -.LP -These messages occur when there have been device errors on components where the -state database replicas reside. These errors only occur when more than half of -the replicas have had device errors returned to them. For instance, if you have -three components with state database replicas and two of the components report -errors, then these errors may occur. The state database commit or delete is -retried periodically. If a replica is added, then the commit or delete will -finish and the system will be operational. Otherwise the system will timeout -and panic. -.sp -.in +2 -.nf -md: d\fInum\fR: Cannot load \fIdev\fR driver -.fi -.in -2 -.sp - -.sp -.LP -Underlying named driver module is not loadable (for example, \fBsd\fR, -\fBid\fR, \fBxy\fR, or a third-party driver). This could indicate that the -driver module has been removed. -.sp -.in +2 -.nf -md: Open error of hotspare \fIdev\fRmd: Open error of hotspare dev(\fInum\fR,\fInum\fR) -.fi -.in -2 - -.sp -.LP -Named hotspare is not openable, or underlying driver is not loadable. -.SS "Panic Log Messages" -.sp -.LP -The panic log messages include: -.sp -.in +2 -.nf -md: d\fInum\fR: Unknown close typemd: d\fInum\fR: Unknown open type -.fi -.in -2 - -.sp -.LP -Metadevice is being opened/closed with an unknown open type (OTYP). -.sp -.in +2 -.nf -md: State database problem -.fi -.in -2 - -.sp -.LP -Failed metadevice state database commit or delete has been retried the default -100 times. -.SH FILES -.sp -.ne 2 -.na -\fB\fB/dev/md/dsk/\fR\fBd\fIn\fR\fR \fR -.ad -.RS 30n -block device (where \fIn\fR is the device number) -.RE - -.sp -.ne 2 -.na -\fB\fB/dev/md/rdsk/\fR\fBd\fIn\fR\fR \fR -.ad -.RS 30n -raw device (where \fIn\fR is the device number) -.RE - -.sp -.ne 2 -.na -\fB\fB/dev/md/\fIsetname\fR/dsk/\fR\fBd\fIn\fR\fR \fR -.ad -.RS 30n -block device (where \fIsetname\fR is the name of the diskset and \fIn\fR is the -device number) -.RE - -.sp -.ne 2 -.na -\fB\fB/dev/md/\fIsetname\fR/rdsk/\fR\fBd\fIn\fR\fR \fR -.ad -.RS 30n -raw device (where \fIsetname\fR is the name of the diskset and \fIn\fR is the -device number) -.RE - -.sp -.ne 2 -.na -\fB\fB/dev/md/admin\fR \fR -.ad -.RS 30n -administrative device -.RE - -.sp -.ne 2 -.na -\fB\fB/kernel/drv/md\fR \fR -.ad -.RS 30n -driver module -.RE - -.sp -.ne 2 -.na -\fB\fB/kernel/drv/md.conf\fR \fR -.ad -.RS 30n -driver configuration file -.RE - -.sp -.ne 2 -.na -\fB\fB/kernel/misc/md_stripe\fR \fR -.ad -.RS 30n -stripe driver misc module -.RE - -.sp -.ne 2 -.na -\fB\fB/kernel/misc/md_mirror\fR \fR -.ad -.RS 30n -mirror driver misc module -.RE - -.sp -.ne 2 -.na -\fB\fB/kernel/misc/md_hotspares\fR \fR -.ad -.RS 30n -hotspares driver misc module -.RE - -.sp -.ne 2 -.na -\fB\fB/kernel/misc/md_trans\fR \fR -.ad -.RS 30n -metatrans driver for UFS logging -.RE - -.sp -.ne 2 -.na -\fB\fB/kernel/misc/md_raid\fR \fR -.ad -.RS 30n -RAID5 driver misc module -.RE - -.SH SEE ALSO -.sp -.LP -\fBmdmonitord\fR(1M), \fBmetaclear\fR(1M), \fBmetadb\fR(1M), -\fBmetadetach\fR(1M), \fBmetahs\fR(1M), \fBmetainit\fR(1M), -\fBmetaoffline\fR(1M), \fBmetaonline\fR(1M), \fBmetaparam\fR(1M), -\fBmetarecover\fR(1M), \fBmetarename\fR(1M), \fBmetareplace\fR(1M), -\fBmetaroot\fR(1M), \fBmetassist\fR(1M), \fBmetaset\fR(1M), \fBmetastat\fR(1M), -\fBmetasync\fR(1M), \fBmetattach\fR(1M), \fBmd.cf\fR(4), \fBmd.tab\fR(4), -\fBattributes\fR(5), -.sp -.LP -\fISolaris Volume Manager Administration Guide\fR -.SH NOTES -.sp -.LP -Trans metadevices have been replaced by UFS logging. Existing trans devices are -\fBnot\fR logging--they pass data directly through to the underlying device. -See \fBmount_ufs\fR(1M) for more information about UFS logging. diff --git a/usr/src/man/man7d/mediator.7d b/usr/src/man/man7d/mediator.7d deleted file mode 100644 index 5d2dcbbfd6f2..000000000000 --- a/usr/src/man/man7d/mediator.7d +++ /dev/null @@ -1,253 +0,0 @@ -'\" te -.\" Copyright (c) 2008, Sun Microsystems, Inc. All Rights Reserved -.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. -.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. -.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] -.TH MEDIATOR 7D "Jun 20, 2008" -.SH NAME -mediator \- support for HA configurations consisting of two strings of drives -.SH DESCRIPTION -.sp -.LP -Beginning with a prior version, Solaris Volume Manager provided support for -high-availability (HA) configurations consisting of two hosts that share at -least three strings of drives and that run software enabling exclusive access -to the data on those drives from one host. (Note: Volume Manager, by itself, -does not actually provide a high-availability environment. The diskset feature -is an enabler for HA configurations.) -.sp -.LP -Volume Manager provides support for a low-end HA solution consisting of two -hosts that share only two strings of drives. The hosts in this type of -configuration, referred to as \fImediators\fR, run a special daemon, -\fBrpc.metamedd\fR(1M). The mediator hosts take on additional responsibilities -to ensure that data is available in the case of host or drive failures. -.sp -.LP -In a mediator configuration, two hosts are physically connected to two strings -of drives. This configuration can survive the failure of a single host or a -single string of drives, without administrative intervention. If both a host -and a string of drives fail (multiple failures), the integrity of the data -cannot be guaranteed. At this point, administrative intervention is required to -make the data accessible. -.sp -.LP -The following definitions pertain to a mediator configuration: -.sp -.ne 2 -.na -\fB\fBdiskset\fR\fR -.ad -.sp .6 -.RS 4n -A set of drives containing metadevices and hot spares that can be shared -exclusively (but not concurrently) by two hosts. -.RE - -.sp -.ne 2 -.na -\fB\fBVolume Manager\fR \fBstate\fR \fBdatabase\fR\fR -.ad -.sp .6 -.RS 4n -A replicated database that stores metadevice configuration and state -information. -.RE - -.sp -.ne 2 -.na -\fB\fBmediator\fR \fBhost\fR\fR -.ad -.sp .6 -.RS 4n -A host that runs the \fBrpc.metamedd(1M)\fR daemon and that has been added to a -diskset. The mediator host participates in checking the state database and the -mediator quorum. -.RE - -.sp -.ne 2 -.na -\fB\fBmediator\fR \fBquorum\fR\fR -.ad -.sp .6 -.RS 4n -The condition achieved when the number of accessible mediator hosts is equal to -half+1 the total number of configured mediator hosts. Because it is expected -that there will be two mediator hosts, this number will normally be 2 ([(2/2) + -1] = 2.) -.RE - -.sp -.ne 2 -.na -\fB\fBreplica\fR\fR -.ad -.sp .6 -.RS 4n -A single copy of the Volume Manager metadevice state database. -.RE - -.sp -.ne 2 -.na -\fB\fBreplica\fR \fBquorum\fR\fR -.ad -.sp .6 -.RS 4n -The condition achieved when the number of accessible replicas is equal to -half+1 the total number of configured replicas. For example, if a system is -configured with ten replicas, the quorum is met when six are accessible -([(10/2) + 1 = 6]). -.RE - -.sp -.LP -A mediator host running the \fBrpc.metamedd(1M)\fR daemon keeps track of -replica updates. As long as the following conditions are met, access to data -occurs without any administrative intervention: -.RS +4 -.TP -.ie t \(bu -.el o -The replica quorum is not met. -.RE -.RS +4 -.TP -.ie t \(bu -.el o -Half of the replicas are still accessible. -.RE -.RS +4 -.TP -.ie t \(bu -.el o -The mediator quorum is met. -.RE -.sp -.LP -The following conditions describe the operation of mediator hosts: -.RS +4 -.TP -1. -If the is met, access to the diskset is granted. At this point no mediator -host is involved. -.RE -.RS +4 -.TP -2. -If the replica quorum is not met, half of the replicas are accessible, the -mediator quorum is met, and the replica and mediator data match, access to the -diskset is granted. The mediator host contributes the deciding vote. -.RE -.RS +4 -.TP -3. -If the replica quorum is not met, half of the replicas are accessible, the -mediator quorum is not met, half of the mediator hosts is accessible, and the -replica and mediator data match, the system prompts you to grant or deny access -to the diskset. -.RE -.RS +4 -.TP -4. -If the replica quorum is not met, half of the replicas are accessible, the -mediator quorum is met, and the replica and mediator data do not match, access -to the diskset is read-only. You can delete replicas, release the diskset, and -retake the diskset to gain read-write access to the data in the diskset. -.RE -.RS +4 -.TP -5. -In all other cases, the diskset access is read-only. You can delete -replicas, release the diskset, and retake the diskset to gain read-write access -to the data in the diskset. -.RE -.sp -.LP -The \fBmetaset\fR(1M) command administers disksets and mediator hosts. The -following options to the \fBmetaset\fR command pertain only to administering -mediator hosts. -.sp -.ne 2 -.na -\fB\fB-a\fR \fB-m\fR \fB\fImediator_host_list\fR\fR\fR -.ad -.RS 28n -Adds mediator hosts to the named set. A \fImediator_host_list\fR is the -nodename of the mediator host to be added and up to 2 other aliases for the -mediator host. The nodename and aliases for each mediator host are separated by -commas. Up to 3 mediator hosts can be specified for the named diskset. -.RE - -.sp -.ne 2 -.na -\fB\fB-d\fR \fB-m\fR \fB\fImediator_host_list\fR\fR\fR -.ad -.RS 28n -Deletes mediator hosts from the named diskset. Mediator hosts are deleted from -the diskset by specifying the nodename of mediator host to delete. -.RE - -.sp -.ne 2 -.na -\fB\fB-q\fR\fR -.ad -.RS 28n -Displays an enumerated list of tags pertaining to ``tagged data'' that may be -encountered during a take of the ownership of a diskset. -.RE - -.sp -.ne 2 -.na -\fB\fB-t\fR [\fB-f\fR] \fB-y\fR\fR -.ad -.RS 28n -Takes ownership of a diskset safely, unless \fB-f\fR is used, in which case the -take is unconditional. If \fBmetaset\fR finds that another host owns the set, -this host will not be allowed to take ownership of the set. If the set is not -owned by any other host, all the disks within the set will be owned by the -host on which \fBmetaset\fR was executed. The metadevice state database is -read in and the shared metadevices contained in the set become accessible. The -\fB-t\fR option will take a diskset that has stale databases. When the -databases are stale, \fBmetaset\fR will exit with code 66, and a message will -be printed. At that point, the only operations permitted are the addition and -deletion of replicas. Once the addition or deletion of the replicas has been -completed, the diskset should be released and retaken to gain full access to -the data. If mediator hosts have been configured, some additional exit codes -are possible. If half of the replicas and half of the mediator hosts are -operating properly, the take will exit with code 3. At this point, you can add -or delete replicas, or use the \fB-y\fR option on a subsequent take. If the -take operation encounters ``tagged data,'' the take operation will exit with -code 2. You can then run the \fBmetaset\fR command with the \fB-q\fR option to -see an enumerated list of tags. -.RE - -.sp -.ne 2 -.na -\fB\fB-t\fR [\fB-f\fR] \fB-u\fR \fItagnumber\fR\fR -.ad -.RS 28n -Once a tag has been selected, a subsequent take with \fB-u\fR -\fB\fItagnumber\fR\fR can be executed to select the data associated with the -given \fItagnumber\fR. -.RE - -.SH SEE ALSO -.sp -.LP -\fBmetaset\fR(1M), \fBmd\fR(7D), \fBrpc.metamedd\fR(1M), \fBrpc.metad\fR(1M) -.sp -.LP -Sun Cluster documentation, \fISolaris Volume Manager Administration Guide\fR -.SH NOTES -.sp -.LP -Diskset administration, including the addition and deletion of hosts and -drives, requires all hosts in the set to be accessible from the network. diff --git a/usr/src/man/man7i/dkio.7i b/usr/src/man/man7i/dkio.7i index 455954c160ea..7a23ff61c836 100644 --- a/usr/src/man/man7i/dkio.7i +++ b/usr/src/man/man7i/dkio.7i @@ -1,9 +1,24 @@ -'\" te +.\" +.\" The contents of this file are subject to the terms of the +.\" Common Development and Distribution License (the "License"). +.\" You may not use this file except in compliance with the License. +.\" +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +.\" or http://www.opensolaris.org/os/licensing. +.\" See the License for the specific language governing permissions +.\" and limitations under the License. +.\" +.\" When distributing Covered Code, include this CDDL HEADER in each +.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE. +.\" If applicable, add the following below this CDDL HEADER, with the +.\" fields enclosed by brackets "[]" replaced with your own identifying +.\" information: Portions Copyright [yyyy] [name of copyright owner] +.\" +.\" .\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved. -.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. -.\" See the License for the specific language governing permissions and limitations under the License. When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the -.\" fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] -.TH DKIO 7I "Aug 3, 2009" +.\" Copyright 2016 Nexenta Systems, Inc. +.\" +.TH DKIO 7I "Oct 8, 2016" .SH NAME dkio \- disk control operations .SH SYNOPSIS @@ -19,13 +34,11 @@ dkio \- disk control operations .fi .SH DESCRIPTION -.sp .LP Disk drivers support a set of \fBioctl\fR(2) requests for disk controller, geometry, and partition information. Basic to these \fBioctl()\fR requests are the definitions in \fB\fR\&. .SH IOCTLS -.sp .LP The following \fBioctl()\fR requests set and/or retrieve the current disk controller, partitions, or geometry information on all architectures: @@ -84,8 +97,6 @@ struct dk_cinfo { #define DKC_SMSFLOPPY 12 #define DKC_SCSI_CCS 13 /* SCSI CCS compatible */ #define DKC_INTEL82072 14 /* native floppy chip */ - #define DKC_MD 16 /* meta-disk (virtual-disk) */ - /* driver */ #define DKC_INTEL82077 19 /* 82077 floppy disk */ /* controller */ #define DKC_DIRECT 20 /* Intel direct attached */ @@ -754,12 +765,10 @@ if (retval != EINVAL || retval != ENOTSUP) { .RE .SS "RETURN VALUES" -.sp .LP Upon successful completion, the value returned is \fB0\fR. Otherwise, \fB-1\fR is returned and \fBerrno\fR is set to indicate the error. .SS "x86 Only" -.sp .LP The following \fBioctl()\fR requests set and/or retrieve the current disk controller, partitions, or geometry information on the x86 architecture. @@ -903,12 +912,10 @@ recreation of all logical device nodes. .RE .SH SEE ALSO -.sp .LP \fBaddbadsec\fR(1M), \fBfdisk\fR(1M), \fBformat\fR(1M), \fBioctl\fR(2), \fBcdio\fR(7I), \fBcmdk\fR(7D), \fBfdio\fR(7I), \fBhdio\fR(7I), \fBsd\fR(7D) .SH NOTES -.sp .LP Blocksize information provided in \fBDKIOCGMEDIAINFO\fR is the size (in bytes) of the device's basic unit of operation and can differ from the blocksize that diff --git a/usr/src/pkg/manifests/SUNWcs.mf b/usr/src/pkg/manifests/SUNWcs.mf index 948b90ef1e05..7833f7ef96e5 100644 --- a/usr/src/pkg/manifests/SUNWcs.mf +++ b/usr/src/pkg/manifests/SUNWcs.mf @@ -940,7 +940,6 @@ file path=usr/lib/devfsadm/linkmod/SUNW_disk_link.so group=sys file path=usr/lib/devfsadm/linkmod/SUNW_fssnap_link.so group=sys file path=usr/lib/devfsadm/linkmod/SUNW_ieee1394_link.so group=sys file path=usr/lib/devfsadm/linkmod/SUNW_lofi_link.so group=sys -file path=usr/lib/devfsadm/linkmod/SUNW_md_link.so group=sys file path=usr/lib/devfsadm/linkmod/SUNW_misc_link.so group=sys file path=usr/lib/devfsadm/linkmod/SUNW_misc_link_$(ARCH).so group=sys file path=usr/lib/devfsadm/linkmod/SUNW_port_link.so group=sys diff --git a/usr/src/pkg/manifests/SUNWmd.mf b/usr/src/pkg/manifests/SUNWmd.mf index 10287616a74a..c896308592ca 100644 --- a/usr/src/pkg/manifests/SUNWmd.mf +++ b/usr/src/pkg/manifests/SUNWmd.mf @@ -23,7 +23,8 @@ # Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. # +# Was renamed to storage/svm, both now obsolete. + set name=pkg.fmri value=pkg:/SUNWmd@0.5.11,5.11-0.133 -set name=pkg.renamed value=true +set name=pkg.obsolete value=true set name=variant.arch value=$(ARCH) -depend fmri=pkg:/storage/svm@0.5.11,5.11-0.133 type=require diff --git a/usr/src/pkg/manifests/SUNWmda.mf b/usr/src/pkg/manifests/SUNWmda.mf index 625c51e35a0e..1c0abcfe10f4 100644 --- a/usr/src/pkg/manifests/SUNWmda.mf +++ b/usr/src/pkg/manifests/SUNWmda.mf @@ -23,7 +23,8 @@ # Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. # +# Was renamed to storage/metassist, both now obsolete. + set name=pkg.fmri value=pkg:/SUNWmda@0.5.11,5.11-0.133 -set name=pkg.renamed value=true +set name=pkg.obsolete value=true set name=variant.arch value=$(ARCH) -depend fmri=pkg:/storage/metassist@0.5.11,5.11-0.133 type=require diff --git a/usr/src/pkg/manifests/SUNWmddr.mf b/usr/src/pkg/manifests/SUNWmddr.mf index a3295da4e3f5..c0259a835308 100644 --- a/usr/src/pkg/manifests/SUNWmddr.mf +++ b/usr/src/pkg/manifests/SUNWmddr.mf @@ -23,7 +23,10 @@ # Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. # +# +# Was renamed to system/library/svm-rcm, both now obsolete. +# + set name=pkg.fmri value=pkg:/SUNWmddr@0.5.11,5.11-0.133 -set name=pkg.renamed value=true +set name=pkg.obsolete value=true set name=variant.arch value=$(ARCH) -depend fmri=pkg:/system/library/svm-rcm@0.5.11,5.11-0.133 type=require diff --git a/usr/src/pkg/manifests/consolidation-osnet-osnet-message-files.mf b/usr/src/pkg/manifests/consolidation-osnet-osnet-message-files.mf index 516885c8bfc4..ed49e49d5049 100644 --- a/usr/src/pkg/manifests/consolidation-osnet-osnet-message-files.mf +++ b/usr/src/pkg/manifests/consolidation-osnet-osnet-message-files.mf @@ -319,7 +319,6 @@ file path=usr/lib/locale/C/LC_MESSAGES/more.help group=sys file path=usr/lib/locale/C/LC_MESSAGES/priv_names group=sys file path=usr/lib/locale/C/LC_MESSAGES/uxlibc.src group=sys file path=usr/lib/locale/C/LC_TIME/SUNW_OST_OSCMD.po group=sys -file path=usr/lib/locale/C/LC_TIME/SUNW_OST_OSLIB.po group=sys file path=usr/share/lib/locale/com/sun/admin/pm/client/pmHelpResources.java \ group=lp file path=usr/share/lib/locale/com/sun/admin/pm/client/pmResources.java \ diff --git a/usr/src/pkg/manifests/developer-debug-mdb.mf b/usr/src/pkg/manifests/developer-debug-mdb.mf index 2cd8a0299e6e..34be68fa230d 100644 --- a/usr/src/pkg/manifests/developer-debug-mdb.mf +++ b/usr/src/pkg/manifests/developer-debug-mdb.mf @@ -107,7 +107,6 @@ file path=kernel/kmdb/$(ARCH64)/krtld group=sys mode=0555 file path=kernel/kmdb/$(ARCH64)/lofs group=sys mode=0555 file path=kernel/kmdb/$(ARCH64)/logindmux group=sys mode=0555 file path=kernel/kmdb/$(ARCH64)/mac group=sys mode=0555 -file path=kernel/kmdb/$(ARCH64)/md group=sys mode=0555 file path=kernel/kmdb/$(ARCH64)/mdb_ds group=sys mode=0555 file path=kernel/kmdb/$(ARCH64)/mm group=sys mode=0555 file path=kernel/kmdb/$(ARCH64)/mpt group=sys mode=0555 @@ -145,7 +144,6 @@ $(i386_ONLY)file path=kernel/kmdb/krtld group=sys mode=0555 $(i386_ONLY)file path=kernel/kmdb/lofs group=sys mode=0555 $(i386_ONLY)file path=kernel/kmdb/logindmux group=sys mode=0555 $(i386_ONLY)file path=kernel/kmdb/mac group=sys mode=0555 -$(i386_ONLY)file path=kernel/kmdb/md group=sys mode=0555 $(i386_ONLY)file path=kernel/kmdb/mdb_ds group=sys mode=0555 $(i386_ONLY)file path=kernel/kmdb/mm group=sys mode=0555 $(i386_ONLY)file path=kernel/kmdb/mpt group=sys mode=0555 @@ -227,7 +225,6 @@ file path=usr/lib/mdb/kvm/$(ARCH64)/krtld.so group=sys mode=0555 file path=usr/lib/mdb/kvm/$(ARCH64)/lofs.so group=sys mode=0555 file path=usr/lib/mdb/kvm/$(ARCH64)/logindmux.so group=sys mode=0555 file path=usr/lib/mdb/kvm/$(ARCH64)/mac.so group=sys mode=0555 -file path=usr/lib/mdb/kvm/$(ARCH64)/md.so group=sys mode=0555 $(i386_ONLY)file path=usr/lib/mdb/kvm/$(ARCH64)/mdb_kb.so group=sys mode=0555 file path=usr/lib/mdb/kvm/$(ARCH64)/mdb_ks.so group=sys mode=0555 file path=usr/lib/mdb/kvm/$(ARCH64)/mm.so group=sys mode=0555 @@ -267,7 +264,6 @@ $(i386_ONLY)file path=usr/lib/mdb/kvm/krtld.so group=sys mode=0555 $(i386_ONLY)file path=usr/lib/mdb/kvm/lofs.so group=sys mode=0555 $(i386_ONLY)file path=usr/lib/mdb/kvm/logindmux.so group=sys mode=0555 $(i386_ONLY)file path=usr/lib/mdb/kvm/mac.so group=sys mode=0555 -$(i386_ONLY)file path=usr/lib/mdb/kvm/md.so group=sys mode=0555 $(i386_ONLY)file path=usr/lib/mdb/kvm/mdb_kb.so group=sys mode=0555 $(i386_ONLY)file path=usr/lib/mdb/kvm/mdb_ks.so group=sys mode=0555 $(i386_ONLY)file path=usr/lib/mdb/kvm/mm.so group=sys mode=0555 diff --git a/usr/src/pkg/manifests/storage-metassist.mf b/usr/src/pkg/manifests/storage-metassist.mf index 82cafa104cba..6590066a701f 100644 --- a/usr/src/pkg/manifests/storage-metassist.mf +++ b/usr/src/pkg/manifests/storage-metassist.mf @@ -21,45 +21,10 @@ # # Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. -# Copyright 2012 Nexenta Systems, Inc. All rights reserved. +# Copyright 2016 Nexenta Systems, Inc. # -# -# The default for payload-bearing actions in this package is to appear in the -# global zone only. See the include file for greater detail, as well as -# information about overriding the defaults. -# - set name=pkg.fmri value=pkg:/storage/metassist@$(PKGVERS) -set name=pkg.description value="Solaris Volume Manager Assistant" -set name=pkg.summary value="Solaris Volume Manager Assistant" -set name=info.classification value=org.opensolaris.category.2008:System/Core +set name=pkg.obsolete value=true +set name=org.opensolaris.noincorp value=true set name=variant.arch value=$(ARCH) -dir path=etc group=sys -dir path=etc/default group=sys -dir path=usr group=sys -dir path=usr/sbin -dir path=usr/share -dir path=usr/share/lib -dir path=usr/share/lib/xml group=sys -dir path=usr/share/lib/xml/dtd group=sys -dir path=usr/share/lib/xml/style group=sys -dir path=usr/share/man -dir path=usr/share/man/man1m -dir path=usr/share/man/man4 -file path=etc/default/metassist.xml group=sys -file path=usr/sbin/metassist mode=0555 -file path=usr/share/lib/xml/dtd/volume-config.dtd group=sys -file path=usr/share/lib/xml/dtd/volume-defaults.dtd group=sys -file path=usr/share/lib/xml/dtd/volume-request.dtd group=sys -file path=usr/share/lib/xml/style/volume-command.xsl group=sys -file path=usr/share/man/man1m/metassist.1m -file path=usr/share/man/man4/volume-config.4 -file path=usr/share/man/man4/volume-request.4 -legacy pkg=SUNWmdar desc="Solaris Volume Manager Assistant (Root)" \ - name="Solaris Volume Manager Assistant (Root)" -legacy pkg=SUNWmdau desc="Solaris Volume Manager Assistant (Usr)" \ - name="Solaris Volume Manager Assistant (Usr)" -license cr_Sun license=cr_Sun -license lic_CDDL license=lic_CDDL -link path=usr/share/man/man4/volume-defaults.4 target=volume-request.4 diff --git a/usr/src/pkg/manifests/storage-svm.mf b/usr/src/pkg/manifests/storage-svm.mf index 6bd2d477f9b7..aec36dfd869c 100644 --- a/usr/src/pkg/manifests/storage-svm.mf +++ b/usr/src/pkg/manifests/storage-svm.mf @@ -24,163 +24,7 @@ # Copyright 2016 Nexenta Systems, Inc. # -# -# The default for payload-bearing actions in this package is to appear in the -# global zone only. See the include file for greater detail, as well as -# information about overriding the defaults. -# - set name=pkg.fmri value=pkg:/storage/svm@$(PKGVERS) -set name=pkg.description value="Solaris Volume Manager commands" -set name=pkg.summary value="Solaris Volume Manager" -set name=info.classification value=org.opensolaris.category.2008:System/Core +set name=pkg.obsolete value=true +set name=org.opensolaris.noincorp value=true set name=variant.arch value=$(ARCH) -dir path=etc group=sys -dir path=etc/flash group=sys -dir path=etc/flash/postdeployment group=sys -dir path=etc/flash/predeployment group=sys -dir path=etc/lvm group=sys -dir path=kernel group=sys -dir path=kernel/drv group=sys -dir path=kernel/drv/$(ARCH64) group=sys -dir path=kernel/misc group=sys -dir path=kernel/misc/$(ARCH64) group=sys -dir path=lib -dir path=lib/svc -dir path=lib/svc/manifest group=sys -dir path=lib/svc/manifest/network group=sys -dir path=lib/svc/manifest/network/rpc group=sys -dir path=lib/svc/manifest/system group=sys -dir path=lib/svc/method -dir path=sbin group=sys -dir path=usr group=sys -dir path=usr/lib -dir path=usr/lib/drv -dir path=usr/lib/lvm -dir path=usr/sbin -dir path=usr/share/man -dir path=usr/share/man/man1m -dir path=usr/share/man/man4 -dir path=usr/share/man/man7d -dir path=usr/snadm -dir path=usr/snadm/lib -driver name=md policy="admin write_priv_set=sys_config" \ - perms="* 0640 root sys" perms="admin 0644 root sys" -file path=etc/flash/postdeployment/svm.cleanup group=sys mode=0744 -file path=etc/flash/predeployment/svm.save group=sys mode=0744 -file path=etc/lvm/devpath group=sys original_name=SUNWmd:etc/lvm/devpath \ - preserve=true -file path=etc/lvm/lock group=sys original_name=SUNWmd:etc/lvm/lock \ - preserve=true -file path=etc/lvm/md.cf group=sys original_name=SUNWmd:etc/lvm/md.cf \ - preserve=true -file path=etc/lvm/md.ctlrmap group=sys original_name=SUNWmd:etc/lvm/md.ctlrmap \ - preserve=true -file path=etc/lvm/md.tab group=sys original_name=SUNWmd:etc/lvm/md.tab \ - preserve=true -file path=etc/lvm/mddb.cf group=sys original_name=SUNWmd:etc/lvm/mddb.cf \ - preserve=true -file path=etc/lvm/runtime.cf group=sys original_name=SUNWmd:etc/lvm/runtime.cf \ - preserve=true -file path=kernel/drv/$(ARCH64)/md group=sys -$(i386_ONLY)file path=kernel/drv/md group=sys -file path=kernel/drv/md.conf group=sys original_name=SUNWmd:kernel/drv/md.conf \ - preserve=true -file path=kernel/misc/$(ARCH64)/md_hotspares group=sys mode=0755 -file path=kernel/misc/$(ARCH64)/md_mirror group=sys mode=0755 -file path=kernel/misc/$(ARCH64)/md_notify group=sys mode=0755 -file path=kernel/misc/$(ARCH64)/md_raid group=sys mode=0755 -file path=kernel/misc/$(ARCH64)/md_sp group=sys mode=0755 -file path=kernel/misc/$(ARCH64)/md_stripe group=sys mode=0755 -file path=kernel/misc/$(ARCH64)/md_trans group=sys mode=0755 -$(i386_ONLY)file path=kernel/misc/md_hotspares group=sys mode=0755 -$(i386_ONLY)file path=kernel/misc/md_mirror group=sys mode=0755 -$(i386_ONLY)file path=kernel/misc/md_notify group=sys mode=0755 -$(i386_ONLY)file path=kernel/misc/md_raid group=sys mode=0755 -$(i386_ONLY)file path=kernel/misc/md_sp group=sys mode=0755 -$(i386_ONLY)file path=kernel/misc/md_stripe group=sys mode=0755 -$(i386_ONLY)file path=kernel/misc/md_trans group=sys mode=0755 -file path=lib/libmeta.so.1 -file path=lib/svc/manifest/network/rpc/mdcomm.xml group=sys mode=0444 -file path=lib/svc/manifest/network/rpc/meta.xml group=sys mode=0444 -file path=lib/svc/manifest/network/rpc/metamed.xml group=sys mode=0444 -file path=lib/svc/manifest/network/rpc/metamh.xml group=sys mode=0444 -file path=lib/svc/manifest/system/mdmonitor.xml group=sys mode=0444 -file path=lib/svc/manifest/system/metainit.xml group=sys mode=0444 -file path=lib/svc/manifest/system/metasync.xml group=sys mode=0444 -file path=lib/svc/method/svc-mdmonitor mode=0555 -file path=lib/svc/method/svc-metainit mode=0555 -file path=lib/svc/method/svc-metasync mode=0555 -file path=sbin/metadb mode=0555 -file path=sbin/metadevadm mode=0555 -file path=sbin/metainit mode=0555 -file path=sbin/metarecover mode=0555 -file path=sbin/metastat mode=0555 -file path=usr/lib/drv/preen_md.so.1 mode=0555 -file path=usr/lib/lvm/mddoors mode=0555 -file path=usr/lib/lvm/metaclust mode=0555 -file path=usr/sbin/mdmonitord mode=0555 -file path=usr/sbin/medstat mode=0555 -file path=usr/sbin/metaclear mode=0555 -file path=usr/sbin/metadetach mode=0555 -file path=usr/sbin/metahs mode=0555 -file path=usr/sbin/metaimport mode=0555 -file path=usr/sbin/metaoffline mode=0555 -file path=usr/sbin/metaonline mode=0555 -file path=usr/sbin/metaparam mode=0555 -file path=usr/sbin/metarename mode=0555 -file path=usr/sbin/metareplace mode=0555 -file path=usr/sbin/metaroot mode=0555 -file path=usr/sbin/metaset mode=0555 -file path=usr/sbin/metasync mode=0555 -file path=usr/sbin/metattach mode=0555 -file path=usr/sbin/rpc.mdcommd mode=0555 -file path=usr/sbin/rpc.metad mode=0555 -file path=usr/sbin/rpc.metamedd mode=0555 -file path=usr/sbin/rpc.metamhd mode=0555 -file path=usr/share/man/man1m/mdmonitord.1m -file path=usr/share/man/man1m/medstat.1m -file path=usr/share/man/man1m/metaclear.1m -file path=usr/share/man/man1m/metadb.1m -file path=usr/share/man/man1m/metadevadm.1m -file path=usr/share/man/man1m/metahs.1m -file path=usr/share/man/man1m/metaimport.1m -file path=usr/share/man/man1m/metainit.1m -file path=usr/share/man/man1m/metaoffline.1m -file path=usr/share/man/man1m/metaparam.1m -file path=usr/share/man/man1m/metarecover.1m -file path=usr/share/man/man1m/metarename.1m -file path=usr/share/man/man1m/metareplace.1m -file path=usr/share/man/man1m/metaroot.1m -file path=usr/share/man/man1m/metaset.1m -file path=usr/share/man/man1m/metastat.1m -file path=usr/share/man/man1m/metasync.1m -file path=usr/share/man/man1m/metattach.1m -file path=usr/share/man/man1m/rpc.mdcommd.1m -file path=usr/share/man/man1m/rpc.metad.1m -file path=usr/share/man/man1m/rpc.metamedd.1m -file path=usr/share/man/man1m/rpc.metamhd.1m -file path=usr/share/man/man4/md.tab.4 -file path=usr/share/man/man4/mddb.cf.4 -file path=usr/share/man/man4/meddb.4 -file path=usr/share/man/man7d/md.7d -file path=usr/share/man/man7d/mediator.7d -file path=usr/snadm/lib/libsvm.so.1 -legacy pkg=SUNWmdr desc="Solaris Volume Manager driver" \ - name="Solaris Volume Manager, (Root)" -legacy pkg=SUNWmdu desc="Solaris Volume Manager commands" \ - name="Solaris Volume Manager, (Usr)" -license cr_Sun license=cr_Sun -license lic_CDDL license=lic_CDDL -link path=lib/libmeta.so target=libmeta.so.1 -link path=usr/lib/libmeta.so target=../../lib/libmeta.so.1 -link path=usr/lib/libmeta.so.1 target=../../lib/libmeta.so.1 -link path=usr/sbin/metadb target=../../sbin/metadb -link path=usr/sbin/metadevadm target=../../sbin/metadevadm -link path=usr/sbin/metainit target=../../sbin/metainit -link path=usr/sbin/metarecover target=../../sbin/metarecover -link path=usr/sbin/metastat target=../../sbin/metastat -link path=usr/share/man/man1m/metadetach.1m target=metattach.1m -link path=usr/share/man/man1m/metaonline.1m target=metaoffline.1m -link path=usr/share/man/man4/md.cf.4 target=md.tab.4 -link path=usr/snadm/lib/libsvm.so target=./libsvm.so.1 diff --git a/usr/src/pkg/manifests/system-header.mf b/usr/src/pkg/manifests/system-header.mf index 658ed3a1b543..4ccc84666fcb 100644 --- a/usr/src/pkg/manifests/system-header.mf +++ b/usr/src/pkg/manifests/system-header.mf @@ -105,7 +105,6 @@ dir path=usr/include/sys/ib/ibtl/impl dir path=usr/include/sys/ib/mgt dir path=usr/include/sys/ib/mgt/ibmf dir path=usr/include/sys/iso -dir path=usr/include/sys/lvm dir path=usr/include/sys/proc dir path=usr/include/sys/rsm $(i386_ONLY)dir path=usr/include/sys/sata group=sys @@ -506,7 +505,6 @@ file path=usr/include/librcm.h file path=usr/include/libscf.h file path=usr/include/libscf_priv.h file path=usr/include/libshare.h -file path=usr/include/libsvm.h file path=usr/include/libsysevent.h file path=usr/include/libsysevent_impl.h file path=usr/include/libtsnet.h @@ -525,19 +523,8 @@ file path=usr/include/maillock.h file path=usr/include/malloc.h file path=usr/include/md4.h file path=usr/include/md5.h -file path=usr/include/mdiox.h -file path=usr/include/mdmn_changelog.h file path=usr/include/memory.h file path=usr/include/menu.h -file path=usr/include/meta.h -file path=usr/include/meta_basic.h -file path=usr/include/meta_runtime.h -file path=usr/include/metacl.h -file path=usr/include/metad.h -file path=usr/include/metadyn.h -file path=usr/include/metamed.h -file path=usr/include/metamhd.h -file path=usr/include/mhdx.h file path=usr/include/mon.h file path=usr/include/monetary.h file path=usr/include/mp.h @@ -1194,26 +1181,6 @@ file path=usr/include/sys/lockfs.h file path=usr/include/sys/lofi.h file path=usr/include/sys/log.h file path=usr/include/sys/logindmux.h -file path=usr/include/sys/lvm/md_basic.h -file path=usr/include/sys/lvm/md_convert.h -file path=usr/include/sys/lvm/md_crc.h -file path=usr/include/sys/lvm/md_hotspares.h -file path=usr/include/sys/lvm/md_mddb.h -file path=usr/include/sys/lvm/md_mdiox.h -file path=usr/include/sys/lvm/md_mhdx.h -file path=usr/include/sys/lvm/md_mirror.h -file path=usr/include/sys/lvm/md_mirror_shared.h -file path=usr/include/sys/lvm/md_names.h -file path=usr/include/sys/lvm/md_notify.h -file path=usr/include/sys/lvm/md_raid.h -file path=usr/include/sys/lvm/md_rename.h -file path=usr/include/sys/lvm/md_sp.h -file path=usr/include/sys/lvm/md_stripe.h -file path=usr/include/sys/lvm/md_trans.h -file path=usr/include/sys/lvm/mdio.h -file path=usr/include/sys/lvm/mdmed.h -file path=usr/include/sys/lvm/mdmn_commd.h -file path=usr/include/sys/lvm/mdvar.h file path=usr/include/sys/lwp.h file path=usr/include/sys/lwp_timer_impl.h file path=usr/include/sys/lwp_upimutex_impl.h @@ -1505,7 +1472,6 @@ file path=usr/include/sys/sysevent/env.h file path=usr/include/sys/sysevent/eventdefs.h file path=usr/include/sys/sysevent/ipmp.h file path=usr/include/sys/sysevent/pwrctl.h -file path=usr/include/sys/sysevent/svm.h file path=usr/include/sys/sysevent/vrrp.h file path=usr/include/sys/sysevent_impl.h $(i386_ONLY)file path=usr/include/sys/sysi86.h diff --git a/usr/src/pkg/manifests/system-library-svm-rcm.mf b/usr/src/pkg/manifests/system-library-svm-rcm.mf index 6d7ad27573b8..cbd1169f8744 100644 --- a/usr/src/pkg/manifests/system-library-svm-rcm.mf +++ b/usr/src/pkg/manifests/system-library-svm-rcm.mf @@ -24,18 +24,6 @@ # set name=pkg.fmri value=pkg:/system/library/svm-rcm@$(PKGVERS) -set name=pkg.description \ - value="reconfiguration coordination manager module for SVM" -set name=pkg.summary value="SVM RCM Module" -set name=info.classification \ - value=org.opensolaris.category.2008:System/Libraries +set name=pkg.obsolete value=true +set name=org.opensolaris.noincorp value=true set name=variant.arch value=$(ARCH) -dir path=usr group=sys -dir path=usr/lib -dir path=usr/lib/rcm -dir path=usr/lib/rcm/modules -file path=usr/lib/rcm/modules/SUNW_svm_rcm.so mode=0555 -legacy pkg=SUNWmddr desc="reconfiguration coordination manager module for SVM" \ - name="SVM RCM Module" -license cr_Sun license=cr_Sun -license lic_CDDL license=lic_CDDL diff --git a/usr/src/uts/Makefile b/usr/src/uts/Makefile index 17e4999086fe..dbd7eeb94a6b 100644 --- a/usr/src/uts/Makefile +++ b/usr/src/uts/Makefile @@ -145,7 +145,6 @@ COMMON_HDRDIRS= common/avs \ common/inet/sockmods/netpacket \ common/io/bpf/net \ common/io/fibre-channel/fca/qlc \ - common/io/lvm/md \ common/ipp \ common/net \ common/netinet \ @@ -170,7 +169,6 @@ DYNHDRDIRS = common/avs \ common/gssapi \ common/idmap \ common/io/fibre-channel/fca/qlc \ - common/io/lvm/md \ common/klm \ common/rpc \ common/rpcsvc \ diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files index 4ed01c9941d0..455b2a665bf9 100644 --- a/usr/src/uts/common/Makefile.files +++ b/usr/src/uts/common/Makefile.files @@ -1347,30 +1347,6 @@ SMBFS_OBJS += smbfs_vfsops.o smbfs_vnops.o smbfs_node.o \ BOOTFS_OBJS += bootfs_construct.o bootfs_vfsops.o bootfs_vnops.o -# -# LVM modules -# -MD_OBJS += md.o md_error.o md_ioctl.o md_mddb.o md_names.o \ - md_med.o md_rename.o md_subr.o - -MD_COMMON_OBJS = md_convert.o md_crc.o md_revchk.o - -MD_DERIVED_OBJS = metamed_xdr.o meta_basic_xdr.o - -SOFTPART_OBJS += sp.o sp_ioctl.o - -STRIPE_OBJS += stripe.o stripe_ioctl.o - -HOTSPARES_OBJS += hotspares.o - -RAID_OBJS += raid.o raid_ioctl.o raid_replay.o raid_resync.o raid_hotspare.o - -MIRROR_OBJS += mirror.o mirror_ioctl.o mirror_resync.o - -NOTIFY_OBJS += md_notify.o - -TRANS_OBJS += mdtrans.o trans_ioctl.o trans_log.o - ZFS_COMMON_OBJS += \ arc.o \ blkptr.o \ diff --git a/usr/src/uts/common/Makefile.rules b/usr/src/uts/common/Makefile.rules index c16892536846..d1e69d2c011e 100644 --- a/usr/src/uts/common/Makefile.rules +++ b/usr/src/uts/common/Makefile.rules @@ -22,9 +22,9 @@ # # Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. # Copyright 2016 Garrett D'Amore -# Copyright 2015 Nexenta Systems, Inc. All rights reserved. # Copyright 2013 Saso Kiselkov. All rights reserved. # Copyright 2016 Joyent, Inc. +# Copyright 2016 Nexenta Systems, Inc. # # @@ -924,38 +924,6 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/common/io/lp/%.c $(COMPILE.c) -o $@ $< $(CTFCONVERT_O) -$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/lvm/hotspares/%.c - $(COMPILE.c) -o $@ $< - $(CTFCONVERT_O) - -$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/lvm/md/%.c - $(COMPILE.c) -o $@ $< - $(CTFCONVERT_O) - -$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/lvm/mirror/%.c - $(COMPILE.c) -o $@ $< - $(CTFCONVERT_O) - -$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/lvm/notify/%.c - $(COMPILE.c) -o $@ $< - $(CTFCONVERT_O) - -$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/lvm/raid/%.c - $(COMPILE.c) -o $@ $< - $(CTFCONVERT_O) - -$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/lvm/softpart/%.c - $(COMPILE.c) -o $@ $< - $(CTFCONVERT_O) - -$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/lvm/stripe/%.c - $(COMPILE.c) -o $@ $< - $(CTFCONVERT_O) - -$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/lvm/trans/%.c - $(COMPILE.c) -o $@ $< - $(CTFCONVERT_O) - $(OBJS_DIR)/%.o: $(UTSBASE)/common/io/mac/%.c $(COMPILE.c) -o $@ $< $(CTFCONVERT_O) @@ -1530,10 +1498,6 @@ $(OBJS_DIR)/%.o: $(COMMONBASE)/list/%.c $(COMPILE.c) -o $@ $< $(CTFCONVERT_O) -$(OBJS_DIR)/%.o: $(COMMONBASE)/lvm/%.c - $(COMPILE.c) -o $@ $< - $(CTFCONVERT_O) - $(OBJS_DIR)/%.o: $(COMMONBASE)/lzma/%.c $(COMPILE.c) -o $@ $< $(CTFCONVERT_O) @@ -2303,30 +2267,6 @@ $(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/aggr/%.c $(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/lp/%.c @($(LHEAD) $(LINT.c) $< $(LTAIL)) -$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/lvm/hotspares/%.c - @($(LHEAD) $(LINT.c) $< $(LTAIL)) - -$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/lvm/md/%.c - @($(LHEAD) $(LINT.c) $< $(LTAIL)) - -$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/lvm/mirror/%.c - @($(LHEAD) $(LINT.c) $< $(LTAIL)) - -$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/lvm/raid/%.c - @($(LHEAD) $(LINT.c) $< $(LTAIL)) - -$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/lvm/softpart/%.c - @($(LHEAD) $(LINT.c) $< $(LTAIL)) - -$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/lvm/stripe/%.c - @($(LHEAD) $(LINT.c) $< $(LTAIL)) - -$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/lvm/notify/%.c - @($(LHEAD) $(LINT.c) $< $(LTAIL)) - -$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/lvm/trans/%.c - @($(LHEAD) $(LINT.c) $< $(LTAIL)) - $(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/mac/%.c @($(LHEAD) $(LINT.c) $< $(LTAIL)) @@ -2711,9 +2651,6 @@ $(LINTS_DIR)/%.ln: $(UTSBASE)/common/ktli/%.c $(LINTS_DIR)/%.ln: $(COMMONBASE)/list/%.c @($(LHEAD) $(LINT.c) $< $(LTAIL)) -$(LINTS_DIR)/%.ln: $(COMMONBASE)/lvm/%.c - @($(LHEAD) $(LINT.c) $< $(LTAIL)) - $(LINTS_DIR)/%.ln: $(COMMONBASE)/lzma/%.c @($(LHEAD) $(LINT.c) $< $(LTAIL)) diff --git a/usr/src/uts/common/fs/ufs/ufs_vfsops.c b/usr/src/uts/common/fs/ufs/ufs_vfsops.c index ca6d32c3d42c..002d821ec2c6 100644 --- a/usr/src/uts/common/fs/ufs/ufs_vfsops.c +++ b/usr/src/uts/common/fs/ufs/ufs_vfsops.c @@ -18,9 +18,11 @@ * * CDDL HEADER END */ + /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2016 Nexenta Systems, Inc. */ /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ @@ -2100,199 +2102,3 @@ ufsinit(int fstype, char *name) ufs_iinit(); return (0); } - -#ifdef __sparc - -/* - * Mounting a mirrored SVM volume is only supported on ufs, - * this is special-case boot code to support that configuration. - * At this point, we have booted and mounted root on a - * single component of the mirror. Complete the boot - * by configuring SVM and converting the root to the - * dev_t of the mirrored root device. This dev_t conversion - * only works because the underlying device doesn't change. - */ -int -ufs_remountroot(struct vfs *vfsp) -{ - struct ufsvfs *ufsvfsp; - struct ulockfs *ulp; - dev_t new_rootdev; - dev_t old_rootdev; - struct vnode *old_rootvp; - struct vnode *new_rootvp; - int error, sberror = 0; - struct inode *ip; - union ihead *ih; - struct buf *bp; - int i; - - old_rootdev = rootdev; - old_rootvp = rootvp; - - new_rootdev = getrootdev(); - if (new_rootdev == (dev_t)NODEV) { - return (ENODEV); - } - - new_rootvp = makespecvp(new_rootdev, VBLK); - - error = VOP_OPEN(&new_rootvp, - (vfsp->vfs_flag & VFS_RDONLY) ? FREAD : FREAD|FWRITE, CRED(), NULL); - if (error) { - cmn_err(CE_CONT, - "Cannot open mirrored root device, error %d\n", error); - return (error); - } - - if (vfs_lock(vfsp) != 0) { - return (EBUSY); - } - - ufsvfsp = (struct ufsvfs *)vfsp->vfs_data; - ulp = &ufsvfsp->vfs_ulockfs; - - mutex_enter(&ulp->ul_lock); - atomic_inc_ulong(&ufs_quiesce_pend); - - (void) ufs_quiesce(ulp); - (void) ufs_flush(vfsp); - - /* - * Convert root vfs to new dev_t, including vfs hash - * table and fs id. - */ - vfs_root_redev(vfsp, new_rootdev, ufsfstype); - - ufsvfsp->vfs_devvp = new_rootvp; - ufsvfsp->vfs_dev = new_rootdev; - - bp = ufsvfsp->vfs_bufp; - bp->b_edev = new_rootdev; - bp->b_dev = cmpdev(new_rootdev); - - /* - * The buffer for the root inode does not contain a valid b_vp - */ - (void) bfinval(new_rootdev, 0); - - /* - * Here we hand-craft inodes with old root device - * references to refer to the new device instead. - */ - mutex_enter(&ufs_scan_lock); - - for (i = 0, ih = ihead; i < inohsz; i++, ih++) { - mutex_enter(&ih_lock[i]); - for (ip = ih->ih_chain[0]; - ip != (struct inode *)ih; - ip = ip->i_forw) { - if (ip->i_ufsvfs != ufsvfsp) - continue; - if (ip == ufsvfsp->vfs_qinod) - continue; - if (ip->i_dev == old_rootdev) { - ip->i_dev = new_rootdev; - } - - if (ip->i_devvp == old_rootvp) { - ip->i_devvp = new_rootvp; - } - } - mutex_exit(&ih_lock[i]); - } - - mutex_exit(&ufs_scan_lock); - - /* - * Make Sure logging structures are using the new device - * if logging is enabled. Also start any logging thread that - * needs to write to the device and couldn't earlier. - */ - if (ufsvfsp->vfs_log) { - buf_t *bp, *tbp; - ml_unit_t *ul = ufsvfsp->vfs_log; - struct fs *fsp = ufsvfsp->vfs_fs; - - /* - * Update the main logging structure. - */ - ul->un_dev = new_rootdev; - - /* - * Get a new bp for the on disk structures. - */ - bp = ul->un_bp; - tbp = ngeteblk(dbtob(LS_SECTORS)); - tbp->b_edev = new_rootdev; - tbp->b_dev = cmpdev(new_rootdev); - tbp->b_blkno = bp->b_blkno; - bcopy(bp->b_un.b_addr, tbp->b_un.b_addr, DEV_BSIZE); - bcopy(bp->b_un.b_addr, tbp->b_un.b_addr + DEV_BSIZE, DEV_BSIZE); - bp->b_flags |= (B_STALE | B_AGE); - brelse(bp); - ul->un_bp = tbp; - - /* - * Allocate new circular buffers. - */ - alloc_rdbuf(&ul->un_rdbuf, MAPBLOCKSIZE, MAPBLOCKSIZE); - alloc_wrbuf(&ul->un_wrbuf, ldl_bufsize(ul)); - - /* - * Clear the noroll bit which indicates that logging - * can't roll the log yet and start the logmap roll thread - * unless the filesystem is still read-only in which case - * remountfs() will do it when going to read-write. - */ - ASSERT(ul->un_flags & LDL_NOROLL); - - if (!fsp->fs_ronly) { - ul->un_flags &= ~LDL_NOROLL; - logmap_start_roll(ul); - } - - /* - * Start the reclaim thread if needed. - */ - if (!fsp->fs_ronly && (fsp->fs_reclaim & - (FS_RECLAIM|FS_RECLAIMING))) { - fsp->fs_reclaim &= ~FS_RECLAIM; - fsp->fs_reclaim |= FS_RECLAIMING; - ufs_thread_start(&ufsvfsp->vfs_reclaim, - ufs_thread_reclaim, vfsp); - TRANS_SBWRITE(ufsvfsp, TOP_SBUPDATE_UPDATE); - if (sberror = geterror(ufsvfsp->vfs_bufp)) { - refstr_t *mntpt; - mntpt = vfs_getmntpoint(vfsp); - cmn_err(CE_WARN, - "Remountroot failed to update Reclaim" - "state for filesystem %s " - "Error writing SuperBlock %d", - refstr_value(mntpt), error); - refstr_rele(mntpt); - } - } - } - - rootdev = new_rootdev; - rootvp = new_rootvp; - - atomic_dec_ulong(&ufs_quiesce_pend); - cv_broadcast(&ulp->ul_cv); - mutex_exit(&ulp->ul_lock); - - vfs_unlock(vfsp); - - error = VOP_CLOSE(old_rootvp, FREAD, 1, (offset_t)0, CRED(), NULL); - if (error) { - cmn_err(CE_CONT, - "close of root device component failed, error %d\n", - error); - } - VN_RELE(old_rootvp); - - return (sberror ? sberror : error); -} - -#endif /* __sparc */ diff --git a/usr/src/uts/common/fs/vfs.c b/usr/src/uts/common/fs/vfs.c index c50fe824add3..18a5cf26d07a 100644 --- a/usr/src/uts/common/fs/vfs.c +++ b/usr/src/uts/common/fs/vfs.c @@ -18,12 +18,13 @@ * * CDDL HEADER END */ + /* * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, Joyent, Inc. All rights reserved. - * Copyright 2015 Nexenta Systems, Inc. All rights reserved. * Copyright 2016 Toomas Soome * Copyright (c) 2016 by Delphix. All rights reserved. + * Copyright 2016 Nexenta Systems, Inc. */ /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ @@ -914,32 +915,6 @@ vfs_mountroot(void) vfs_mountfs("sharefs", "sharefs", "/etc/dfs/sharetab"); } -#ifdef __sparc - /* - * This bit of magic can go away when we convert sparc to - * the new boot architecture based on ramdisk. - * - * Booting off a mirrored root volume: - * At this point, we have booted and mounted root on a - * single component of the mirror. Complete the boot - * by configuring SVM and converting the root to the - * dev_t of the mirrored root device. This dev_t conversion - * only works because the underlying device doesn't change. - */ - if (root_is_svm) { - if (svm_rootconf()) { - panic("vfs_mountroot: cannot remount root"); - } - - /* - * mnttab should reflect the new root device - */ - vfs_lock_wait(rootvfs); - vfs_setresource(rootvfs, rootfs.bo_name, 0); - vfs_unlock(rootvfs); - } -#endif /* __sparc */ - if (strcmp(rootfs.bo_fstype, "zfs") != 0) { /* * Look up the root device via devfs so that a dv_node is diff --git a/usr/src/uts/common/io/ib/inc.flg b/usr/src/uts/common/io/ib/inc.flg index a7fbf4569a26..c62760b82090 100644 --- a/usr/src/uts/common/io/ib/inc.flg +++ b/usr/src/uts/common/io/ib/inc.flg @@ -83,10 +83,6 @@ find_files "s.*" \ usr/src/uts/intel/asm \ usr/src/uts/intel/amd64 -# lvm -find_files "s.*.x" \ - usr/src/uts/common/sys/lvm - # makefiles echo_file usr/src/Makefile.master echo_file usr/src/Makefile.master.64 @@ -101,7 +97,6 @@ echo_file usr/src/uts/sun/Makefile.rules echo_file usr/src/uts/common/Makefile.files echo_file usr/src/uts/common/Makefile.rules echo_file usr/src/uts/common/sys/Makefile -echo_file usr/src/uts/common/sys/lvm/Makefile echo_file usr/src/uts/sparc/Makefile echo_file usr/src/uts/sparc/Makefile.files echo_file usr/src/uts/sparc/Makefile.rules diff --git a/usr/src/uts/common/io/lvm/hotspares/hotspares.c b/usr/src/uts/common/io/lvm/hotspares/hotspares.c deleted file mode 100644 index 13ce78287d03..000000000000 --- a/usr/src/uts/common/io/lvm/hotspares/hotspares.c +++ /dev/null @@ -1,1809 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - * Copyright (c) 2011 Bayard G. Bell. All rights reserved. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include - -md_ops_t hotspares_md_ops; -#ifndef lint -md_ops_t *md_interface_ops = &hotspares_md_ops; -#endif - -extern md_ops_t **md_ops; -extern md_ops_t *md_opslist; -extern md_set_t md_set[]; - -extern kmutex_t md_mx; /* used to md global stuff */ -extern kcondvar_t md_cv; /* md_status events */ -extern int md_status; - -extern void md_clear_hot_spare_interface(); - -static void -set_hot_spare_state(hot_spare_t *hs, hotspare_states_t newstate) -{ - hs->hs_state = newstate; - uniqtime32(&hs->hs_timestamp); -} - -static hot_spare_t * -lookup_hot_spare(set_t setno, mddb_recid_t hs_id, int must_exist) -{ - hot_spare_t *hs; - - for (hs = (hot_spare_t *)md_set[setno].s_hs; hs; hs = hs->hs_next) { - if (hs->hs_record_id == hs_id) - return (hs); - } - if (must_exist) - ASSERT(0); - - return ((hot_spare_t *)NULL); -} - - -static int -seths_create_hsp(set_hs_params_t *shs) -{ - hot_spare_pool_t *hsp; - mddb_recid_t recid; - set_t setno; - mddb_type_t typ1; - - setno = HSP_SET(shs->shs_hot_spare_pool); - - /* Scan the hot spare pool list */ - hsp = find_hot_spare_pool(setno, shs->shs_hot_spare_pool); - if (hsp != (hot_spare_pool_t *)0) - return (0); - - typ1 = (mddb_type_t)md_getshared_key(setno, - hotspares_md_ops.md_driver.md_drivername); - - /* create a hot spare pool record */ - if (shs->shs_options & MD_CRO_64BIT) { -#if defined(_ILP32) - return (mdhsperror(&shs->mde, MDE_HSP_UNIT_TOO_LARGE, - shs->shs_hot_spare_pool)); -#else - recid = mddb_createrec(sizeof (hot_spare_pool_ond_t), typ1, - HSP_REC, MD_CRO_64BIT | MD_CRO_HOTSPARE_POOL | MD_CRO_FN, - setno); -#endif - } else { - recid = mddb_createrec(sizeof (hot_spare_pool_ond_t), typ1, - HSP_REC, MD_CRO_32BIT | MD_CRO_HOTSPARE_POOL | MD_CRO_FN, - setno); - } - - if (recid < 0) { - return (mdhsperror(&shs->mde, MDE_HSP_CREATE_FAILURE, - shs->shs_hot_spare_pool)); - } - - /* get the record addr */ - hsp = (hot_spare_pool_t *)mddb_getrecaddr_resize(recid, sizeof (*hsp), - HSP_ONDSK_STR_OFF); - - hsp->hsp_self_id = shs->shs_hot_spare_pool; - hsp->hsp_record_id = recid; - hsp->hsp_next = (hot_spare_pool_t *)md_set[setno].s_hsp; - hsp->hsp_refcount = 0; - hsp->hsp_nhotspares = 0; - hsp->hsp_revision |= MD_FN_META_DEV; - - md_set[setno].s_hsp = (void *) hsp; - - mddb_commitrec_wrapper(recid); - SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_CREATE, SVM_TAG_HSP, setno, - md_expldev(hsp->hsp_self_id)); - - rw_enter(&hotspares_md_ops.md_link_rw.lock, RW_WRITER); - hsp->hsp_link.ln_next = hotspares_md_ops.md_head; - hsp->hsp_link.ln_setno = setno; - hsp->hsp_link.ln_id = hsp->hsp_self_id; - hotspares_md_ops.md_head = &hsp->hsp_link; - rw_exit(&hotspares_md_ops.md_link_rw.lock); - - return (0); -} - - -static int -seths_add(set_hs_params_t *shs) -{ - hot_spare_t *hs; - hot_spare_pool_t *hsp; - hot_spare_pool_t *prev_hsp; - hot_spare_pool_t *new_hsp; - hot_spare_pool_t *old_hsp; - md_create_rec_option_t options; - mddb_recid_t recid; - mddb_recid_t recids[5]; - size_t new_size; - int i; - int delete_hsp = 0; - int irecid; - set_t setno; - mddb_type_t typ1; - int hsp_created = 0; - mdkey_t key_old; - int num_keys_old = 0; - - /* Not much to do here in case of a dryrun */ - if (shs->shs_options & HS_OPT_DRYRUN) { - return (0); - } - - /* create an empty hot spare pool */ - if (shs->shs_options & HS_OPT_POOL) { - return (seths_create_hsp(shs)); - } - - setno = HSP_SET(shs->shs_hot_spare_pool); - typ1 = (mddb_type_t)md_getshared_key(setno, - hotspares_md_ops.md_driver.md_drivername); - - /* Scan the hot spare list */ - hs = (hot_spare_t *)md_set[setno].s_hs; - while (hs) { - if (hs->hs_devnum == shs->shs_component_old) { - break; - } - hs = hs->hs_next; - } - - if (hs == NULL) { - /* - * Did not find match for device using devnum so use - * key associated with shs_component_old just - * in case there is a match but the match's dev is NODEV. - * If unable to find a unique key for shs_component_old - * then fail since namespace has multiple entries - * for this old component and we shouldn't allow - * an addition of a hotspare in this case. - */ - if (md_getkeyfromdev(setno, mddb_getsidenum(setno), - shs->shs_component_old, &key_old, &num_keys_old) != 0) { - return (mddeverror(&shs->mde, MDE_NAME_SPACE, - shs->shs_component_old)); - } - - /* - * If more than one key matches given old_dev - fail command - * since shouldn't add new hotspare if namespace has - * multiple entries. - */ - if (num_keys_old > 1) { - return (mddeverror(&shs->mde, MDE_MULTNM, - shs->shs_component_old)); - } - /* - * If there is no key for this entry then fail since - * a key for this entry should exist. - */ - if (num_keys_old == 0) { - return (mddeverror(&shs->mde, MDE_INVAL_HS, - shs->shs_component_old)); - } - /* Scan the hot spare list again */ - hs = (hot_spare_t *)md_set[setno].s_hs; - while (hs) { - /* - * Only need to compare keys when hs_devnum is NODEV. - */ - if ((hs->hs_devnum == NODEV64) && - (hs->hs_key == key_old)) { - break; - } - hs = hs->hs_next; - } - } - - if (hs == NULL) { - /* create a hot spare record */ - if (shs->shs_size_option & MD_CRO_64BIT) { -#if defined(_ILP32) - return (mdhserror(&shs->mde, MDE_HS_UNIT_TOO_LARGE, - shs->shs_hot_spare_pool, shs->shs_component_old)); -#else - recid = mddb_createrec(HS_ONDSK_STR_SIZE, typ1, HS_REC, - MD_CRO_64BIT | MD_CRO_HOTSPARE, setno); -#endif - } else { - recid = mddb_createrec(HS_ONDSK_STR_SIZE, typ1, HS_REC, - MD_CRO_32BIT | MD_CRO_HOTSPARE, setno); - } - - if (recid < 0) { - return (mdhserror(&shs->mde, MDE_HS_CREATE_FAILURE, - shs->shs_hot_spare_pool, - shs->shs_component_old)); - } - - /* get the addr */ - hs = (hot_spare_t *)mddb_getrecaddr_resize(recid, sizeof (*hs), - 0); - - hs->hs_record_id = recid; - - hs->hs_devnum = shs->shs_component_old; - hs->hs_key = shs->shs_key_old; - hs->hs_start_blk = shs->shs_start_blk; - hs->hs_has_label = shs->shs_has_label; - hs->hs_number_blks = shs->shs_number_blks; - set_hot_spare_state(hs, HSS_AVAILABLE); - hs->hs_refcount = 0; - hs->hs_next = (hot_spare_t *)md_set[setno].s_hs; - md_set[setno].s_hs = (void *) hs; - } - - /* Scan the hot spare pool list */ - hsp = (hot_spare_pool_t *)md_set[setno].s_hsp; - prev_hsp = (hot_spare_pool_t *)0; - while (hsp) { - if (hsp->hsp_self_id == shs->shs_hot_spare_pool) { - break; - } - prev_hsp = hsp; - hsp = hsp->hsp_next; - } - - if (hsp == NULL) { - /* create a hot spare pool record */ - recid = mddb_createrec(sizeof (hot_spare_pool_ond_t), - typ1, HSP_REC, - MD_CRO_32BIT | MD_CRO_HOTSPARE_POOL | MD_CRO_FN, setno); - - if (recid < 0) { - return (mdhsperror(&shs->mde, MDE_HSP_CREATE_FAILURE, - shs->shs_hot_spare_pool)); - } - - /* get the record addr */ - hsp = (hot_spare_pool_t *)mddb_getrecaddr_resize(recid, - sizeof (*hsp), HSP_ONDSK_STR_OFF); - - hsp->hsp_self_id = shs->shs_hot_spare_pool; - hsp->hsp_record_id = recid; - hsp->hsp_next = (hot_spare_pool_t *)md_set[setno].s_hsp; - hsp->hsp_refcount = 0; - hsp->hsp_nhotspares = 0; - hsp->hsp_revision |= MD_FN_META_DEV; - - /* force prev_hsp to NULL, this will cause hsp to be linked */ - prev_hsp = (hot_spare_pool_t *)0; - - rw_enter(&hotspares_md_ops.md_link_rw.lock, RW_WRITER); - hsp->hsp_link.ln_next = hotspares_md_ops.md_head; - hsp->hsp_link.ln_setno = setno; - hsp->hsp_link.ln_id = hsp->hsp_self_id; - hotspares_md_ops.md_head = &hsp->hsp_link; - rw_exit(&hotspares_md_ops.md_link_rw.lock); - hsp_created = 1; - } else { - - /* - * Make sure the hot spare is not already in the pool. - */ - for (i = 0; i < hsp->hsp_nhotspares; i++) - if (hsp->hsp_hotspares[i] == hs->hs_record_id) { - return (mdhserror(&shs->mde, MDE_HS_INUSE, - shs->shs_hot_spare_pool, - hs->hs_devnum)); - } - /* - * Create a new hot spare pool record - * This gives us the one extra hs slot, - * because there is one slot in the - * hot_spare_pool struct - */ - new_size = sizeof (hot_spare_pool_ond_t) + - (sizeof (mddb_recid_t) * hsp->hsp_nhotspares); - - /* - * The Friendly Name status of the new HSP should duplicate - * the status of the existing one. - */ - if (hsp->hsp_revision & MD_FN_META_DEV) { - options = - MD_CRO_32BIT | MD_CRO_HOTSPARE_POOL | MD_CRO_FN; - } else { - options = MD_CRO_32BIT | MD_CRO_HOTSPARE_POOL; - } - recid = mddb_createrec(new_size, typ1, HSP_REC, options, setno); - - if (recid < 0) { - return (mdhsperror(&shs->mde, MDE_HSP_CREATE_FAILURE, - hsp->hsp_self_id)); - } - new_size = sizeof (hot_spare_pool_t) + - (sizeof (mddb_recid_t) * hsp->hsp_nhotspares); - - /* get the record addr */ - new_hsp = (hot_spare_pool_t *)mddb_getrecaddr_resize(recid, - new_size, HSP_ONDSK_STR_OFF); - - /* copy the old record into the new one */ - bcopy((caddr_t)hsp, (caddr_t)new_hsp, - (size_t)((sizeof (hot_spare_pool_t) + - (sizeof (mddb_recid_t) * hsp->hsp_nhotspares) - - sizeof (mddb_recid_t)))); - new_hsp->hsp_record_id = recid; - - md_rem_link(setno, hsp->hsp_self_id, - &hotspares_md_ops.md_link_rw.lock, - &hotspares_md_ops.md_head); - - rw_enter(&hotspares_md_ops.md_link_rw.lock, RW_WRITER); - new_hsp->hsp_link.ln_next = hotspares_md_ops.md_head; - new_hsp->hsp_link.ln_setno = setno; - new_hsp->hsp_link.ln_id = new_hsp->hsp_self_id; - hotspares_md_ops.md_head = &new_hsp->hsp_link; - rw_exit(&hotspares_md_ops.md_link_rw.lock); - - /* mark the old hsp to be deleted */ - delete_hsp = 1; - old_hsp = hsp; - hsp = new_hsp; - } - - if (shs->shs_size_option & MD_CRO_64BIT) { - hs->hs_revision |= MD_64BIT_META_DEV; - } else { - hs->hs_revision &= ~MD_64BIT_META_DEV; - } - - /* lock the db records */ - recids[0] = hs->hs_record_id; - recids[1] = hsp->hsp_record_id; - irecid = 2; - if (delete_hsp) - recids[irecid++] = old_hsp->hsp_record_id; - recids[irecid] = 0; - - /* increment the reference count */ - hs->hs_refcount++; - - /* add the hs at the end of the hot spare pool */ - hsp->hsp_hotspares[hsp->hsp_nhotspares] = hs->hs_record_id; - hsp->hsp_nhotspares++; - - /* - * NOTE: We do not commit the previous hot spare pool record. - * There is no need, the link gets rebuilt at boot time. - */ - if (prev_hsp) - prev_hsp->hsp_next = hsp; - else - md_set[setno].s_hsp = (void *) hsp; - - if (delete_hsp) - old_hsp->hsp_self_id = MD_HSP_NONE; - - /* commit the db records */ - mddb_commitrecs_wrapper(recids); - - if (delete_hsp) { - /* delete the old hot spare pool record */ - mddb_deleterec_wrapper(old_hsp->hsp_record_id); - } - - if (hsp_created) { - SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_CREATE, SVM_TAG_HSP, setno, - md_expldev(hsp->hsp_self_id)); - } - SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_ADD, SVM_TAG_HSP, setno, - md_expldev(hsp->hsp_self_id)); - - return (0); -} - - -static int -seths_delete_hsp(set_hs_params_t *shs) -{ - - hot_spare_pool_t *prev_hsp; - hot_spare_pool_t *hsp; - set_t setno; - hsp_t hspid; - - setno = HSP_SET(shs->shs_hot_spare_pool); - - /* Scan the hot spare pool list */ - prev_hsp = (hot_spare_pool_t *)0; - hsp = (hot_spare_pool_t *)md_set[setno].s_hsp; - while (hsp) { - if (hsp->hsp_self_id == shs->shs_hot_spare_pool) { - break; - } - prev_hsp = hsp; - hsp = hsp->hsp_next; - } - - if (hsp == NULL) { - return (mdhsperror(&shs->mde, MDE_INVAL_HSP, - shs->shs_hot_spare_pool)); - } - - if (hsp->hsp_nhotspares != 0) { - return (mdhsperror(&shs->mde, MDE_HSP_BUSY, - shs->shs_hot_spare_pool)); - } - - if (hsp->hsp_refcount != 0) { - return (mdhsperror(&shs->mde, MDE_HSP_REF, - shs->shs_hot_spare_pool)); - } - - /* In case of a dryrun, we're done here */ - if (shs->shs_options & HS_OPT_DRYRUN) { - return (0); - } - /* - * NOTE: We do not commit the previous hot spare pool record. - * There is no need, the link gets rebuilt at boot time. - */ - if (prev_hsp) - prev_hsp->hsp_next = hsp->hsp_next; - else - md_set[setno].s_hsp = (void *) hsp->hsp_next; - - hspid = hsp->hsp_self_id; - - md_rem_link(setno, hsp->hsp_self_id, - &hotspares_md_ops.md_link_rw.lock, - &hotspares_md_ops.md_head); - - mddb_deleterec_wrapper(hsp->hsp_record_id); - - SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DELETE, SVM_TAG_HSP, setno, - md_expldev(hspid)); - return (0); -} - - -static int -seths_delete(set_hs_params_t *shs) -{ - hot_spare_t *hs; - hot_spare_t *prev_hs; - hot_spare_pool_t *hsp; - mddb_recid_t recids[4]; - int i; - set_t setno; - sv_dev_t sv; - int delete_hs = 0; - mdkey_t key_old; - int num_keys_old = 0; - - /* delete the hot spare pool */ - if (shs->shs_options & HS_OPT_POOL) { - return (seths_delete_hsp(shs)); - } - - setno = HSP_SET(shs->shs_hot_spare_pool); - - /* Scan the hot spare list */ - hs = (hot_spare_t *)md_set[setno].s_hs; - prev_hs = (hot_spare_t *)0; - while (hs) { - if (hs->hs_devnum == shs->shs_component_old) { - break; - } - prev_hs = hs; - hs = hs->hs_next; - } - - if (hs == NULL) { - /* - * Unable to find device using devnum so use - * key associated with shs_component_old instead. - * If unable to find a unique key for shs_component_old - * then fail since namespace has multiple entries - * for this old component and we're unable to determine - * which key is the valid match for shs_component_old. - * - * Only need to compare keys when hs_devnum is NODEV. - */ - if (md_getkeyfromdev(setno, mddb_getsidenum(setno), - shs->shs_component_old, &key_old, &num_keys_old) != 0) { - return (mddeverror(&shs->mde, MDE_NAME_SPACE, - shs->shs_component_old)); - } - - /* - * If more than one key matches given old_dev - fail command - * since shouldn't add new hotspare if namespace has - * multiple entries. - */ - if (num_keys_old > 1) { - return (mddeverror(&shs->mde, MDE_MULTNM, - shs->shs_component_old)); - } - /* - * If there is no key for this entry then fail since - * a key for this entry should exist. - */ - if (num_keys_old == 0) { - return (mddeverror(&shs->mde, MDE_INVAL_HS, - shs->shs_component_old)); - } - /* Scan the hot spare list again */ - hs = (hot_spare_t *)md_set[setno].s_hs; - prev_hs = (hot_spare_t *)0; - while (hs) { - /* - * Only need to compare keys when hs_devnum is NODEV. - */ - if ((hs->hs_devnum == NODEV64) && - (hs->hs_key == key_old)) { - break; - } - prev_hs = hs; - hs = hs->hs_next; - } - } - - if (hs == NULL) { - return (mddeverror(&shs->mde, MDE_INVAL_HS, - shs->shs_component_old)); - } - - /* Scan the hot spare pool list */ - hsp = find_hot_spare_pool(setno, shs->shs_hot_spare_pool); - if (hsp == (hot_spare_pool_t *)0) { - return (mdhsperror(&shs->mde, MDE_INVAL_HSP, - shs->shs_hot_spare_pool)); - } - - /* check for force flag and state of hot spare */ - if (((shs->shs_options & HS_OPT_FORCE) == 0) && - (hs->hs_state == HSS_RESERVED)) { - return (mdhserror(&shs->mde, MDE_HS_RESVD, - shs->shs_hot_spare_pool, shs->shs_component_old)); - } - - if (hsp->hsp_refcount && (hs->hs_state == HSS_RESERVED)) { - return (mdhserror(&shs->mde, MDE_HS_RESVD, - shs->shs_hot_spare_pool, shs->shs_component_old)); - } - - /* - * Make sure the device is in the pool. - */ - for (i = 0; i < hsp->hsp_nhotspares; i++) { - if (hsp->hsp_hotspares[i] == hs->hs_record_id) { - break; - } - } - - if (i >= hsp->hsp_nhotspares) { - return (mddeverror(&shs->mde, MDE_INVAL_HS, - hs->hs_devnum)); - } - - /* In case of a dryrun, we're done here */ - if (shs->shs_options & HS_OPT_DRYRUN) { - return (0); - } - - /* lock the db records */ - recids[0] = hs->hs_record_id; - recids[1] = hsp->hsp_record_id; - recids[2] = 0; - - sv.setno = setno; - sv.key = hs->hs_key; - - hs->hs_refcount--; - if (hs->hs_refcount == 0) { - /* - * NOTE: We do not commit the previous hot spare record. - * There is no need, the link we get rebuilt at boot time. - */ - if (prev_hs) { - prev_hs->hs_next = hs->hs_next; - } else - md_set[setno].s_hs = (void *) hs->hs_next; - - /* mark the hot spare to be deleted */ - delete_hs = 1; - recids[0] = hsp->hsp_record_id; - recids[1] = 0; - } - - /* find the location of the hs in the hsp */ - for (i = 0; i < hsp->hsp_nhotspares; i++) { - if (hsp->hsp_hotspares[i] == hs->hs_record_id) - break; - } - - /* remove the hs from the hsp */ - for (i++; i < hsp->hsp_nhotspares; i++) - hsp->hsp_hotspares[i - 1] = hsp->hsp_hotspares[i]; - - hsp->hsp_nhotspares--; - - /* commit the db records */ - mddb_commitrecs_wrapper(recids); - - if (delete_hs) - mddb_deleterec_wrapper(hs->hs_record_id); - - md_rem_names(&sv, 1); - - SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REMOVE, SVM_TAG_HSP, setno, - md_expldev(hsp->hsp_self_id)); - - return (0); -} - -static int -seths_replace(set_hs_params_t *shs) -{ - hot_spare_t *hs; - hot_spare_t *prev_hs; - hot_spare_t *new_hs; - hot_spare_pool_t *hsp; - int new_found = 0; - mddb_recid_t recid; - mddb_recid_t recids[5]; - int i; - sv_dev_t sv; - int delete_hs = 0; - set_t setno; - mddb_type_t typ1; - mdkey_t key_old; - int num_keys_old = 0; - - setno = HSP_SET(shs->shs_hot_spare_pool); - typ1 = (mddb_type_t)md_getshared_key(setno, - hotspares_md_ops.md_driver.md_drivername); - - /* Scan the hot spare list */ - hs = (hot_spare_t *)md_set[setno].s_hs; - prev_hs = (hot_spare_t *)0; - while (hs) { - if (hs->hs_devnum == shs->shs_component_old) { - break; - } - prev_hs = hs; - hs = hs->hs_next; - } - - if (hs == NULL) { - /* - * Unable to find device using devnum so use - * key associated with shs_component_old instead. - * If unable to find a unique key for shs_component_old - * then fail since namespace has multiple entries - * for this old component and we're unable to determine - * which key is the valid match for shs_component_old. - * - * Only need to compare keys when hs_devnum is NODEV. - */ - if (md_getkeyfromdev(setno, mddb_getsidenum(setno), - shs->shs_component_old, &key_old, &num_keys_old) != 0) { - return (mddeverror(&shs->mde, MDE_NAME_SPACE, - shs->shs_component_old)); - } - - /* - * If more than one key matches given old_dev - fail command - * since unable to determine which key is correct. - */ - if (num_keys_old > 1) { - return (mddeverror(&shs->mde, MDE_MULTNM, - shs->shs_component_old)); - } - /* - * If there is no key for this entry then fail since - * a key for this entry should exist. - */ - if (num_keys_old == 0) { - return (mddeverror(&shs->mde, MDE_INVAL_HS, - shs->shs_component_old)); - } - /* Scan the hot spare list again */ - hs = (hot_spare_t *)md_set[setno].s_hs; - prev_hs = (hot_spare_t *)0; - while (hs) { - /* - * Only need to compare keys when hs_devnum is NODEV. - */ - if ((hs->hs_devnum == NODEV64) && - (hs->hs_key == key_old)) { - break; - } - prev_hs = hs; - hs = hs->hs_next; - } - } - - if (hs == NULL) { - return (mddeverror(&shs->mde, MDE_INVAL_HS, - shs->shs_component_old)); - } - - /* check the force flag and the state of the hot spare */ - if (((shs->shs_options & HS_OPT_FORCE) == 0) && - (hs->hs_state == HSS_RESERVED)) { - return (mdhserror(&shs->mde, MDE_HS_RESVD, - shs->shs_hot_spare_pool, - hs->hs_devnum)); - } - - /* Scan the hot spare pool list */ - hsp = find_hot_spare_pool(setno, shs->shs_hot_spare_pool); - if (hsp == (hot_spare_pool_t *)0) { - return (mdhsperror(&shs->mde, MDE_INVAL_HSP, - shs->shs_hot_spare_pool)); - } - - /* - * Make sure the old device is in the pool. - */ - for (i = 0; i < hsp->hsp_nhotspares; i++) { - if (hsp->hsp_hotspares[i] == hs->hs_record_id) { - break; - } - } - if (i >= hsp->hsp_nhotspares) { - return (mddeverror(&shs->mde, MDE_INVAL_HS, - hs->hs_devnum)); - } - - /* Scan the hot spare list for the new hs */ - new_hs = (hot_spare_t *)md_set[setno].s_hs; - new_found = 0; - while (new_hs) { - if (new_hs->hs_devnum == shs->shs_component_new) { - new_found = 1; - break; - } - new_hs = new_hs->hs_next; - } - - /* - * Make sure the new device is not already in the pool. - * We don't have to search the hs in this hsp, if the - * new hs was just created. Only if the hot spare was found. - */ - if (new_found) { - for (i = 0; i < hsp->hsp_nhotspares; i++) - if (hsp->hsp_hotspares[i] == new_hs->hs_record_id) { - return (mdhserror(&shs->mde, MDE_HS_INUSE, - shs->shs_hot_spare_pool, - new_hs->hs_devnum)); - } - } - - /* In case of a dryrun, we're done here */ - if (shs->shs_options & HS_OPT_DRYRUN) { - return (0); - } - - /* - * Create the new hotspare - */ - if (!new_found) { - /* create a hot spare record */ - if (shs->shs_size_option & MD_CRO_64BIT) { -#if defined(_ILP32) - return (mdhserror(&shs->mde, MDE_HS_UNIT_TOO_LARGE, - shs->shs_hot_spare_pool, shs->shs_component_new)); -#else - recid = mddb_createrec(HS_ONDSK_STR_SIZE, typ1, HS_REC, - MD_CRO_64BIT | MD_CRO_HOTSPARE, setno); -#endif - } else { - recid = mddb_createrec(HS_ONDSK_STR_SIZE, typ1, HS_REC, - MD_CRO_32BIT | MD_CRO_HOTSPARE, setno); - } - - if (recid < 0) { - return (mdhserror(&shs->mde, MDE_HS_CREATE_FAILURE, - shs->shs_hot_spare_pool, - shs->shs_component_new)); - } - - /* get the addr */ - new_hs = (hot_spare_t *)mddb_getrecaddr_resize(recid, - sizeof (*new_hs), 0); - - new_hs->hs_record_id = recid; - new_hs->hs_devnum = shs->shs_component_new; - new_hs->hs_key = shs->shs_key_new; - new_hs->hs_start_blk = shs->shs_start_blk; - new_hs->hs_has_label = shs->shs_has_label; - new_hs->hs_number_blks = shs->shs_number_blks; - set_hot_spare_state(new_hs, HSS_AVAILABLE); - new_hs->hs_refcount = 0; - new_hs->hs_isopen = 1; - } - - /* lock the db records */ - recids[0] = hs->hs_record_id; - recids[1] = new_hs->hs_record_id; - recids[2] = hsp->hsp_record_id; - recids[3] = 0; - - sv.setno = setno; - sv.key = hs->hs_key; - - hs->hs_refcount--; - if (hs->hs_refcount == 0) { - /* - * NOTE: We do not commit the previous hot spare record. - * There is no need, the link we get rebuilt at boot time. - */ - if (prev_hs) { - prev_hs->hs_next = hs->hs_next; - } else - md_set[setno].s_hs = (void *) hs->hs_next; - - /* mark hs to be deleted in the correct order */ - delete_hs = 1; - - recids[0] = new_hs->hs_record_id; - recids[1] = hsp->hsp_record_id; - recids[2] = 0; - } - - /* link into the hs list */ - new_hs->hs_refcount++; - if (!new_found) { - /* do this AFTER the old dev is possibly removed */ - new_hs->hs_next = (hot_spare_t *)md_set[setno].s_hs; - md_set[setno].s_hs = (void *) new_hs; - } - - /* find the location of the old hs in the hsp */ - for (i = 0; i < hsp->hsp_nhotspares; i++) { - if (hsp->hsp_hotspares[i] == hs->hs_record_id) { - hsp->hsp_hotspares[i] = new_hs->hs_record_id; - break; - } - } - - if (shs->shs_size_option & MD_CRO_64BIT) { - new_hs->hs_revision |= MD_64BIT_META_DEV; - } else { - new_hs->hs_revision &= ~MD_64BIT_META_DEV; - } - - /* commit the db records */ - mddb_commitrecs_wrapper(recids); - - if (delete_hs) - mddb_deleterec_wrapper(hs->hs_record_id); - - md_rem_names(&sv, 1); - - SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REPLACE, SVM_TAG_HSP, setno, - md_expldev(hsp->hsp_self_id)); - return (0); -} - -static int -seths_enable(set_hs_params_t *shs) -{ - hot_spare_t *hs; - mddb_recid_t recids[2]; - set_t setno = shs->md_driver.md_setno; - mdkey_t key_old; - int num_keys_old = 0; - - - /* - * Find device by using key associated with shs_component_old. - * If unable to find a unique key for shs_component_old - * then fail since namespace has multiple entries - * for this old component and we're unable to determine - * which key is the valid match for shs_component_old. - * This failure keeps a hotspare from being enabled on a slice - * that may already be in use by another metadevice. - */ - if (md_getkeyfromdev(setno, mddb_getsidenum(setno), - shs->shs_component_old, &key_old, &num_keys_old) != 0) { - return (mddeverror(&shs->mde, MDE_NAME_SPACE, - shs->shs_component_old)); - } - - /* - * If more than one key matches given old_dev - fail command - * since unable to determine which key is correct. - */ - if (num_keys_old > 1) { - return (mddeverror(&shs->mde, MDE_MULTNM, - shs->shs_component_old)); - } - /* - * If there is no key for this entry then fail since - * a key for this entry should exist. - */ - if (num_keys_old == 0) { - return (mddeverror(&shs->mde, MDE_INVAL_HS, - shs->shs_component_old)); - } - - /* Scan the hot spare list for the hs */ - hs = (hot_spare_t *)md_set[setno].s_hs; - while (hs) { - /* - * Since component may or may not be currently in the system, - * use the keys to find a match (not the devt). - */ - if (hs->hs_key == key_old) { - break; - } - hs = hs->hs_next; - } - - if (hs == NULL) { - return (mddeverror(&shs->mde, MDE_INVAL_HS, - shs->shs_component_old)); - } - - /* make sure it's broken */ - if (hs->hs_state != HSS_BROKEN) { - return (mddeverror(&shs->mde, MDE_FIX_INVAL_HS_STATE, - hs->hs_devnum)); - } - - /* In case of a dryrun, we're done here */ - if (shs->shs_options & HS_OPT_DRYRUN) { - return (0); - } - - /* fix it */ - set_hot_spare_state(hs, HSS_AVAILABLE); - hs->hs_start_blk = shs->shs_start_blk; - hs->hs_has_label = shs->shs_has_label; - hs->hs_number_blks = shs->shs_number_blks; - - /* commit the db records */ - recids[0] = hs->hs_record_id; - recids[1] = 0; - mddb_commitrecs_wrapper(recids); - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ENABLE, SVM_TAG_HS, setno, - shs->shs_component_old); - - return (0); -} - -static int -get_hs( - get_hs_params_t *ghs -) -{ - hot_spare_t *hs; - set_t setno = ghs->md_driver.md_setno; - - mdclrerror(&ghs->mde); - - /* Scan the hot spare list for the hs */ - hs = (hot_spare_t *)md_set[setno].s_hs; - while (hs) { - if (hs->hs_key == ghs->ghs_key) { - break; - } - hs = hs->hs_next; - } - - if (hs == NULL) { - return (mddeverror(&ghs->mde, MDE_INVAL_HS, - ghs->ghs_devnum)); - } - - ghs->ghs_start_blk = hs->hs_start_blk; - ghs->ghs_number_blks = hs->hs_number_blks; - ghs->ghs_state = hs->hs_state; - ghs->ghs_timestamp = hs->hs_timestamp; - ghs->ghs_revision = hs->hs_revision; - return (0); -} - -static void -build_key_list(set_t setno, hot_spare_pool_t *hsp, mdkey_t *list) -{ - int i; - - for (i = 0; i < hsp->hsp_nhotspares; i++) { - hot_spare_t *hs; - hs = lookup_hot_spare(setno, hsp->hsp_hotspares[i], 1); - list[i] = hs->hs_key; - } -} - -static int -get_hsp( - void *d, - int mode -) -{ - hot_spare_pool_t *hsp; - get_hsp_t *ghsp; - size_t size; - set_t setno; - int err = 0; - md_i_get_t *migp = (md_i_get_t *)d; - - - setno = migp->md_driver.md_setno; - - mdclrerror(&migp->mde); - - /* Scan the hot spare pool list */ - hsp = find_hot_spare_pool(setno, migp->id); - if (hsp == NULL) { - return (mdhsperror(&migp->mde, MDE_INVAL_HSP, - migp->id)); - } - - size = (sizeof (ghsp->ghsp_hs_keys[0]) * (hsp->hsp_nhotspares - 1)) + - sizeof (get_hsp_t); - - if (migp->size == 0) { - migp->size = (int)size; - return (0); - } - - if (migp->size < size) - return (EFAULT); - - ghsp = kmem_alloc(size, KM_SLEEP); - - ghsp->ghsp_id = hsp->hsp_self_id; - ghsp->ghsp_refcount = hsp->hsp_refcount; - ghsp->ghsp_nhotspares = hsp->hsp_nhotspares; - build_key_list(setno, hsp, ghsp->ghsp_hs_keys); - if (ddi_copyout(ghsp, (caddr_t)(uintptr_t)migp->mdp, size, mode)) - err = EFAULT; - kmem_free(ghsp, size); - return (err); -} - -static int -set_hs( - set_hs_params_t *shs -) -{ - mdclrerror(&shs->mde); - - if (md_get_setstatus(shs->md_driver.md_setno) & MD_SET_STALE) - return (mdmddberror(&shs->mde, MDE_DB_STALE, NODEV32, - shs->md_driver.md_setno)); - - switch (shs->shs_cmd) { - case ADD_HOT_SPARE: - return (seths_add(shs)); - case DELETE_HOT_SPARE: - return (seths_delete(shs)); - case REPLACE_HOT_SPARE: - return (seths_replace(shs)); - case FIX_HOT_SPARE: - return (seths_enable(shs)); - default: - return (mderror(&shs->mde, MDE_INVAL_HSOP)); - } -} - -static void -hotspares_poke_hotspares(void) -{ - intptr_t (*poke_hs)(); - int i; - - for (i = 0; i < MD_NOPS; i++) { - /* handle change */ - poke_hs = md_get_named_service(NODEV64, i, "poke hotspares", 0); - if (poke_hs) - (void) (*poke_hs)(); - } -} - - -/*ARGSUSED4*/ -static int -hotspares_ioctl( - dev_t dev, - int cmd, - void *data, - int mode, - IOLOCK *lockp -) -{ - size_t sz = 0; - void *d = NULL; - int err = 0; - - /* single thread */ - if (getminor(dev) != MD_ADM_MINOR) - return (ENOTTY); - - /* We can only handle 32-bit clients for internal commands */ - if ((mode & DATAMODEL_MASK) != DATAMODEL_ILP32) { - return (EINVAL); - } - - mutex_enter(&md_mx); - while (md_status & MD_GBL_HS_LOCK) - cv_wait(&md_cv, &md_mx); - md_status |= MD_GBL_HS_LOCK; - mutex_exit(&md_mx); - - /* dispatch ioctl */ - switch (cmd) { - - case MD_IOCSET_HS: /* setup hot spares and pools */ - { - if (! (mode & FWRITE)) { - err = EACCES; - break; - } - - sz = sizeof (set_hs_params_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = set_hs(d); - break; - } - - case MD_IOCGET_HS: /* get hot spare info */ - { - if (! (mode & FREAD)) { - err = EACCES; - break; - } - - sz = sizeof (get_hs_params_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = get_hs(d); - break; - } - - case MD_IOCGET: /* get hot spare pool info */ - { - if (! (mode & FREAD)) { - err = EACCES; - break; - } - - sz = sizeof (md_i_get_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = get_hsp(d, mode); - break; - } - - default: - err = ENOTTY; - } - - /* - * copyout and free any args - */ - if (sz != 0) { - if (err == 0) { - if (ddi_copyout(d, data, sz, mode) != 0) { - err = EFAULT; - } - } - kmem_free(d, sz); - } - - /* un single thread */ - mutex_enter(&md_mx); - md_status &= ~MD_GBL_HS_LOCK; - cv_broadcast(&md_cv); - mutex_exit(&md_mx); - - /* handle change */ - hotspares_poke_hotspares(); - - /* return success */ - return (err); -} - - -static void -load_hotspare(set_t setno, mddb_recid_t recid) -{ - hot_spare_t *hs; - mddb_de_ic_t *dep; - mddb_rb32_t *rbp; - size_t newreqsize; - hot_spare_t *b_hs; - hot_spare32_od_t *s_hs; - - mddb_setrecprivate(recid, MD_PRV_GOTIT); - - dep = mddb_getrecdep(recid); - dep->de_flags = MDDB_F_HOTSPARE; - rbp = dep->de_rb; - switch (rbp->rb_revision) { - case MDDB_REV_RB: - case MDDB_REV_RBFN: - /* - * Needs to convert to internal 64 bit - */ - s_hs = (hot_spare32_od_t *)mddb_getrecaddr(recid); - newreqsize = sizeof (hot_spare_t); - b_hs = (hot_spare_t *)kmem_zalloc(newreqsize, KM_SLEEP); - hs_convert((caddr_t)s_hs, (caddr_t)b_hs, SMALL_2_BIG); - kmem_free(s_hs, dep->de_reqsize); - dep->de_rb_userdata = b_hs; - dep->de_reqsize = newreqsize; - hs = b_hs; - break; - case MDDB_REV_RB64: - case MDDB_REV_RB64FN: - hs = (hot_spare_t *)mddb_getrecaddr_resize - (recid, sizeof (*hs), 0); - break; - } - MDDB_NOTE_FN(rbp->rb_revision, hs->hs_revision); - -#if defined(_ILP32) - if (hs->hs_revision & MD_64BIT_META_DEV) { - char devname[MD_MAX_CTDLEN]; - - set_hot_spare_state(hs, HSS_BROKEN); - (void) md_devname(setno, hs->hs_devnum, devname, - sizeof (devname)); - cmn_err(CE_NOTE, "%s is unavailable because 64 bit hotspares " - "are not accessible on a 32 bit kernel\n", devname); - } -#endif - - ASSERT(hs != NULL); - - if (hs->hs_refcount == 0) { - mddb_setrecprivate(recid, MD_PRV_PENDDEL); - return; - } - - hs->hs_next = (hot_spare_t *)md_set[setno].s_hs; - md_set[setno].s_hs = (void *)hs; - - hs->hs_isopen = 0; - - hs->hs_devnum = md_getdevnum(setno, mddb_getsidenum(setno), - hs->hs_key, MD_NOTRUST_DEVT); -} - - -static void -load_hotsparepool(set_t setno, mddb_recid_t recid) -{ - hot_spare_pool_t *hsp; - hot_spare_pool_ond_t *hsp_ond; - size_t hsp_icsize; - - mddb_setrecprivate(recid, MD_PRV_GOTIT); - - hsp_ond = (hot_spare_pool_ond_t *)mddb_getrecaddr(recid); - ASSERT(hsp_ond != NULL); - - if (hsp_ond->hsp_self_id == MD_HSP_NONE) { - mddb_setrecprivate(recid, MD_PRV_PENDDEL); - return; - } - - hsp_icsize = HSP_ONDSK_STR_OFF + mddb_getrecsize(recid); - - hsp = (hot_spare_pool_t *)mddb_getrecaddr_resize(recid, hsp_icsize, - HSP_ONDSK_STR_OFF); - hsp->hsp_next = (hot_spare_pool_t *)md_set[setno].s_hsp; - md_set[setno].s_hsp = (void *) hsp; - - rw_enter(&hotspares_md_ops.md_link_rw.lock, RW_WRITER); - hsp->hsp_link.ln_next = hotspares_md_ops.md_head; - hsp->hsp_link.ln_setno = setno; - hsp->hsp_link.ln_id = hsp->hsp_self_id; - hotspares_md_ops.md_head = &hsp->hsp_link; - rw_exit(&hotspares_md_ops.md_link_rw.lock); -} - -static int -hotspares_snarf(md_snarfcmd_t cmd, set_t setno) -{ - mddb_recid_t recid; - int gotsomething; - mddb_type_t typ1; - - if (cmd == MD_SNARF_CLEANUP) - return (0); - - gotsomething = 0; - - typ1 = (mddb_type_t)md_getshared_key(setno, - hotspares_md_ops.md_driver.md_drivername); - recid = mddb_makerecid(setno, 0); - while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0) { - if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) - continue; - - switch (mddb_getrectype2(recid)) { - case HSP_REC: - load_hotsparepool(setno, recid); - gotsomething = 1; - break; - case HS_REC: - load_hotspare(setno, recid); - gotsomething = 1; - break; - default: - ASSERT(0); - } - } - - if (gotsomething) - return (gotsomething); - - recid = mddb_makerecid(setno, 0); - while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0) - if (!(mddb_getrecprivate(recid) & MD_PRV_GOTIT)) - mddb_setrecprivate(recid, MD_PRV_PENDDEL); - - return (0); -} - -static int -hotspares_halt(md_haltcmd_t cmd, set_t setno) -{ - hot_spare_t *hs, **p_hs; - hot_spare_pool_t *hsp, **p_hsp; - - if (cmd == MD_HALT_CLOSE) - return (0); - - if (cmd == MD_HALT_OPEN) - return (0); - - if (cmd == MD_HALT_CHECK) - return (0); - - if (cmd == MD_HALT_UNLOAD) - return (0); - - if (cmd != MD_HALT_DOIT) - return (1); - /* - * Find all the hotspares for set "setno" - * and remove them from the hot_spare_list. - */ - p_hs = (hot_spare_t **)&md_set[setno].s_hs; - hs = (hot_spare_t *)md_set[setno].s_hs; - for (; hs != NULL; hs = *p_hs) - *p_hs = hs->hs_next; - - /* - * Find all the hotspare pools for set "setno" - * and remove them from the hot_spare_pools list. - * Also remove from the get_next list. - */ - p_hsp = (hot_spare_pool_t **)&md_set[setno].s_hsp; - hsp = (hot_spare_pool_t *)md_set[setno].s_hsp; - for (; hsp != NULL; hsp = *p_hsp) { - md_rem_link(setno, hsp->hsp_self_id, - &hotspares_md_ops.md_link_rw.lock, - &hotspares_md_ops.md_head); - *p_hsp = hsp->hsp_next; - } - - return (0); -} - -static hot_spare_t * -usable_hs( - set_t setno, - mddb_recid_t hs_id, - diskaddr_t nblks, - int labeled, - diskaddr_t *start) -{ - hot_spare_t *hs; - - hs = lookup_hot_spare(setno, hs_id, 1); - - if (hs->hs_state != HSS_AVAILABLE) - return ((hot_spare_t *)0); - - if (labeled && hs->hs_has_label && (hs->hs_number_blks >= nblks)) { - *start = 0; - return (hs); - } else if ((hs->hs_number_blks - hs->hs_start_blk) >= nblks) { - *start = hs->hs_start_blk; - return (hs); - } - return ((hot_spare_t *)0); -} - -static int -reserve_a_hs( - set_t setno, - mddb_recid_t id, - uint64_t size, - int labeled, - mddb_recid_t *hs_id, - mdkey_t *key, - md_dev64_t *dev, - diskaddr_t *sblock) -{ - hot_spare_pool_t *hsp; - hot_spare_t *hs; - int i; - - *hs_id = 0; - - hsp = find_hot_spare_pool(setno, id); - if (hsp == NULL) - return (-1); - - for (i = 0; i < hsp->hsp_nhotspares; i++) { - hs = usable_hs(setno, hsp->hsp_hotspares[i], - size, labeled, sblock); - if (hs == NULL) - continue; - - set_hot_spare_state(hs, HSS_RESERVED); - *hs_id = hs->hs_record_id; - *key = hs->hs_key; - *dev = hs->hs_devnum; - /* NOTE: Mirror code commits the hs record */ - return (0); - } - - return (-1); -} - - -/* ARGSUSED3 */ -static int -return_a_hs( - set_t setno, - mddb_recid_t id, - mddb_recid_t *hs_id, - mdkey_t key, - diskaddr_t sblock, - uint64_t size, - hotspare_states_t new_state) -{ - hot_spare_pool_t *hsp; - hot_spare_t *hs; - int i; - - /* - * NOTE: sblock/size are not currently being used. - * That is because we always allocate the whole hs. - * Later if we choose to allocate only what is needed - * then the sblock/size can be used to determine - * which part is being unreseved. - */ - *hs_id = 0; - - hsp = find_hot_spare_pool(setno, id); - if (hsp == NULL) - return (-1); - - for (i = 0; i < hsp->hsp_nhotspares; i++) { - hs = lookup_hot_spare(setno, hsp->hsp_hotspares[i], 1); - if (hs->hs_key != key) - continue; - - set_hot_spare_state(hs, new_state); - *hs_id = hs->hs_record_id; - if (new_state == HSS_BROKEN) { - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ERRED, SVM_TAG_HS, - setno, hs->hs_devnum); - } - if (new_state == HSS_AVAILABLE) { - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_HS_FREED, SVM_TAG_HS, - setno, hs->hs_devnum); - } - - /* NOTE: Mirror/Raid code commits the hs record */ - return (0); - } - - return (-1); -} - - -static int -modify_hsp_ref(set_t setno, mddb_recid_t id, int incref, mddb_recid_t *hsp_id) -{ - hot_spare_pool_t *hsp; - - *hsp_id = 0; - - if (id < 0) - return (0); - - hsp = find_hot_spare_pool(setno, id); - if (hsp == NULL) - return (-1); - - if (incref) - hsp->hsp_refcount++; - else - hsp->hsp_refcount--; - - *hsp_id = hsp->hsp_record_id; - - /* NOTE: Stripe code commits the hsp record */ - return (0); -} - - -static int -mkdev_for_a_hs(mddb_recid_t hs_id, md_dev64_t *dev) -{ - hot_spare_t *hs; - - hs = lookup_hot_spare(mddb_getsetnum(hs_id), hs_id, 0); - if (hs == NULL) - return (0); - - *dev = hs->hs_devnum; - return (0); -} - -static intptr_t -hotspares_interface( - hs_cmds_t cmd, - mddb_recid_t id, - uint64_t size, - int bool, - mddb_recid_t *hs_id, - mdkey_t *key, - md_dev64_t *dev, - diskaddr_t *sblock) -{ - set_t setno; - int err = -1; - - mutex_enter(&md_mx); - while (md_status & MD_GBL_HS_LOCK) - cv_wait(&md_cv, &md_mx); - - /* If md_halt has been run do not continue */ - if (md_status & (MD_GBL_HALTED | MD_GBL_DAEMONS_DIE)) { - mutex_exit(&md_mx); - return (ENXIO); - } - - md_status |= MD_GBL_HS_LOCK; - mutex_exit(&md_mx); - - setno = mddb_getsetnum(id); - - switch (cmd) { - case HS_GET: - err = reserve_a_hs(setno, id, size, bool, hs_id, - key, dev, sblock); - break; - case HS_FREE: - err = return_a_hs(setno, id, hs_id, *key, 0, 0, HSS_AVAILABLE); - hotspares_poke_hotspares(); - break; - case HS_BAD: - err = return_a_hs(setno, id, hs_id, *key, 0, 0, HSS_BROKEN); - break; - case HSP_INCREF: - err = modify_hsp_ref(setno, id, 1, hs_id); - break; - case HSP_DECREF: - err = modify_hsp_ref(setno, id, 0, hs_id); - break; - case HS_MKDEV: - err = mkdev_for_a_hs(*hs_id, dev); - break; - } - - mutex_enter(&md_mx); - md_status &= ~MD_GBL_HS_LOCK; - cv_broadcast(&md_cv); - mutex_exit(&md_mx); - - return (err); -} - -static void -imp_hotsparepool( - set_t setno, - mddb_recid_t recid -) -{ - hot_spare_pool_ond_t *hsp_ond; - mddb_recid_t *hsp_recid, *hs_recid; - int i; - uint_t *hsp_selfid; - - mddb_setrecprivate(recid, MD_PRV_GOTIT); - - hsp_ond = (hot_spare_pool_ond_t *)mddb_getrecaddr(recid); - hsp_recid = &(hsp_ond->hsp_record_id); - hsp_selfid = &(hsp_ond->hsp_self_id); - /* - * Fixup the pool and hotspares - */ - *hsp_recid = MAKERECID(setno, DBID(*hsp_recid)); - *hsp_selfid = MAKERECID(setno, DBID(*hsp_selfid)); - - for (i = 0; i < hsp_ond->hsp_nhotspares; i++) { - hs_recid = &(hsp_ond->hsp_hotspares[i]); - *hs_recid = MAKERECID(setno, DBID(*hs_recid)); - } -} - -static void -imp_hotspare( - set_t setno, - mddb_recid_t recid -) -{ - mddb_de_ic_t *dep; - mddb_rb32_t *rbp; - hot_spare_t *hs64; - hot_spare32_od_t *hs32; - mddb_recid_t *hs_recid; - - mddb_setrecprivate(recid, MD_PRV_GOTIT); - - dep = mddb_getrecdep(recid); - rbp = dep->de_rb; - switch (rbp->rb_revision) { - case MDDB_REV_RB: - case MDDB_REV_RBFN: - /* - * 32 bit hotspare - */ - hs32 = (hot_spare32_od_t *)mddb_getrecaddr(recid); - hs_recid = &(hs32->hs_record_id); - break; - case MDDB_REV_RB64: - case MDDB_REV_RB64FN: - hs64 = (hot_spare_t *)mddb_getrecaddr(recid); - hs_recid = &(hs64->hs_record_id); - break; - } - - /* - * Fixup the setno - */ - *hs_recid = MAKERECID(setno, DBID(*hs_recid)); -} - -static int -hotspares_imp_set( - set_t setno -) -{ - mddb_recid_t recid; - int gotsomething; - mddb_type_t typ1; - - - gotsomething = 0; - - typ1 = (mddb_type_t)md_getshared_key(setno, - hotspares_md_ops.md_driver.md_drivername); - recid = mddb_makerecid(setno, 0); - while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0) { - if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) - continue; - - switch (mddb_getrectype2(recid)) { - case HSP_REC: - imp_hotsparepool(setno, recid); - gotsomething = 1; - break; - case HS_REC: - imp_hotspare(setno, recid); - gotsomething = 1; - break; - default: - ASSERT(0); - } - } - - return (gotsomething); -} - -static md_named_services_t hotspares_named_services[] = { - {hotspares_interface, "hot spare interface"}, - {NULL, 0} -}; - -md_ops_t hotspares_md_ops = { - NULL, /* open */ - NULL, /* close */ - NULL, /* strategy */ - NULL, /* print */ - NULL, /* dump */ - NULL, /* read */ - NULL, /* write */ - hotspares_ioctl, /* hotspares_ioctl, */ - hotspares_snarf, /* hotspares_snarf */ - hotspares_halt, /* halt */ - NULL, /* aread */ - NULL, /* awrite */ - hotspares_imp_set, /* import set */ - hotspares_named_services /* named_services */ -}; - -static void -fini_uninit() -{ - /* prevent access to services that may have been imported */ - md_clear_hot_spare_interface(); -} - -/* define the module linkage */ -MD_PLUGIN_MISC_MODULE("hot spares module", md_noop, fini_uninit()) diff --git a/usr/src/uts/common/io/lvm/md/Makefile b/usr/src/uts/common/io/lvm/md/Makefile deleted file mode 100644 index 7d568aab8449..000000000000 --- a/usr/src/uts/common/io/lvm/md/Makefile +++ /dev/null @@ -1,61 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2009 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# Copyright 2013 Nexenta Systems, Inc. All rights reserved. -# Copyright 2015 Igor Kozhukhov -# - -# Path to the base of the uts directory tree (usually /usr/src/uts). -UTSBASE = ../../../.. - -# -# Include common rules. -# -include $(SRC)/Makefile.master - -MD_XDR_CSRC = $(UTSBASE)/common/io/lvm/md -MD_XDR_XSRC = $(UTSBASE)/common/sys/lvm -RPCGENFLAGS += -C -M -D_KERNEL -DSYSV - -DERIVED_FILES = meta_basic_xdr.c metamed_xdr.c - -all_h install_h : $(DERIVED_FILES) - -check: - -clean: - $(RM) $(DERIVED_FILES) - -clobber: clean - -%_xdr.c: $(MD_XDR_XSRC)/%.x - $(RPCGEN) $(RPCGENFLAGS) -c -i 100 $(MD_XDR_XSRC)/$*.x | \ -$(AWK) '/^#include/ {\ - sub(/(\.\.\/)*common\//,"");\ - sub(/meta_basic.h/, "md_basic.h");\ - sub(/metamed.h/, "mdmed.h");\ -}\ -{ print $0; }' > $@ - -.KEEP_STATE: diff --git a/usr/src/uts/common/io/lvm/md/inc.flg b/usr/src/uts/common/io/lvm/md/inc.flg deleted file mode 100644 index 8dcabcae4871..000000000000 --- a/usr/src/uts/common/io/lvm/md/inc.flg +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/sh -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -#ident "%Z%%M% %I% %E% SMI" -# -# Copyright 2005 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. - -find_files "s.*" usr/src/common/lvm diff --git a/usr/src/uts/common/io/lvm/md/md.c b/usr/src/uts/common/io/lvm/md/md.c deleted file mode 100644 index 4fcd89708a68..000000000000 --- a/usr/src/uts/common/io/lvm/md/md.c +++ /dev/null @@ -1,2019 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011 Bayard G. Bell. All rights reserved. - */ - -/* - * Md - is the meta-disk driver. It sits below the UFS file system - * but above the 'real' disk drivers, xy, id, sd etc. - * - * To the UFS software, md looks like a normal driver, since it has - * the normal kinds of entries in the bdevsw and cdevsw arrays. So - * UFS accesses md in the usual ways. In particular, the strategy - * routine, mdstrategy(), gets called by fbiwrite(), ufs_getapage(), - * and ufs_writelbn(). - * - * Md maintains an array of minor devices (meta-partitions). Each - * meta partition stands for a matrix of real partitions, in rows - * which are not necessarily of equal length. Md maintains a table, - * with one entry for each meta-partition, which lists the rows and - * columns of actual partitions, and the job of the strategy routine - * is to translate from the meta-partition device and block numbers - * known to UFS into the actual partitions' device and block numbers. - * - * See below, in mdstrategy(), mdreal(), and mddone() for details of - * this translation. - */ - -/* - * Driver for Virtual Disk. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -int md_init_debug = 0; /* module binding debug */ - -/* - * Tunable to turn off the failfast behavior. - */ -int md_ff_disable = 0; - -/* - * dynamically allocated list of non FF driver names - needs to - * be freed when md is detached. - */ -char **non_ff_drivers = NULL; - -md_krwlock_t md_unit_array_rw; /* protects all unit arrays */ -md_krwlock_t nm_lock; /* protects all the name spaces */ - -md_resync_t md_cpr_resync; - -extern char svm_bootpath[]; -#define SVM_PSEUDO_STR "/pseudo/md@0:" - -#define VERSION_LENGTH 6 -#define VERSION "1.0" - -/* - * Keep track of possible 'orphan' entries in the name space - */ -int *md_nm_snarfed = NULL; - -/* - * Global tunable giving the percentage of free space left in replica during - * conversion of non-devid style replica to devid style replica. - */ -int md_conv_perc = MDDB_DEVID_CONV_PERC; - -#ifdef DEBUG -/* debug code to verify framework exclusion guarantees */ -int md_in; -kmutex_t md_in_mx; /* used to md global stuff */ -#define IN_INIT 0x01 -#define IN_FINI 0x02 -#define IN_ATTACH 0x04 -#define IN_DETACH 0x08 -#define IN_OPEN 0x10 -#define MD_SET_IN(x) { \ - mutex_enter(&md_in_mx); \ - if (md_in) \ - debug_enter("MD_SET_IN exclusion lost"); \ - if (md_in & x) \ - debug_enter("MD_SET_IN already set"); \ - md_in |= x; \ - mutex_exit(&md_in_mx); \ -} - -#define MD_CLR_IN(x) { \ - mutex_enter(&md_in_mx); \ - if (md_in & ~(x)) \ - debug_enter("MD_CLR_IN exclusion lost"); \ - if (!(md_in & x)) \ - debug_enter("MD_CLR_IN already clr"); \ - md_in &= ~x; \ - mutex_exit(&md_in_mx); \ -} -#else /* DEBUG */ -#define MD_SET_IN(x) -#define MD_CLR_IN(x) -#endif /* DEBUG */ -hrtime_t savetime1, savetime2; - - -/* - * list things protected by md_mx even if they aren't - * used in this file. - */ -kmutex_t md_mx; /* used to md global stuff */ -kcondvar_t md_cv; /* md_status events */ -int md_status = 0; /* global status for the meta-driver */ -int md_num_daemons = 0; -int md_ioctl_cnt = 0; -int md_mtioctl_cnt = 0; /* multithreaded ioctl cnt */ -uint_t md_mdelay = 10; /* variable so can be patched */ - -int (*mdv_strategy_tstpnt)(buf_t *, int, void*); - -major_t md_major, md_major_targ; - -unit_t md_nunits = MD_MAXUNITS; -set_t md_nsets = MD_MAXSETS; -int md_nmedh = 0; -char *md_med_trans_lst = NULL; -md_set_t md_set[MD_MAXSETS]; -md_set_io_t md_set_io[MD_MAXSETS]; - -md_krwlock_t hsp_rwlp; /* protects hot_spare_interface */ -md_krwlock_t ni_rwlp; /* protects notify_interface */ -md_ops_t **md_ops = NULL; -ddi_modhandle_t *md_mods = NULL; -md_ops_t *md_opslist; -clock_t md_hz; -md_event_queue_t *md_event_queue = NULL; - -int md_in_upgrade; -int md_keep_repl_state; -int md_devid_destroy; - -/* for sending messages thru a door to userland */ -door_handle_t mdmn_door_handle = NULL; -int mdmn_door_did = -1; - -dev_info_t *md_devinfo = NULL; - -md_mn_nodeid_t md_mn_mynode_id = ~0u; /* My node id (for multi-node sets) */ - -static uint_t md_ocnt[OTYPCNT]; - -static int mdinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); -static int mdattach(dev_info_t *, ddi_attach_cmd_t); -static int mddetach(dev_info_t *, ddi_detach_cmd_t); -static int mdopen(dev_t *, int, int, cred_t *); -static int mdclose(dev_t, int, int, cred_t *); -static int mddump(dev_t, caddr_t, daddr_t, int); -static int mdread(dev_t, struct uio *, cred_t *); -static int mdwrite(dev_t, struct uio *, cred_t *); -static int mdaread(dev_t, struct aio_req *, cred_t *); -static int mdawrite(dev_t, struct aio_req *, cred_t *); -static int mdioctl(dev_t, int, intptr_t, int, cred_t *, int *); -static int mdprop_op(dev_t, dev_info_t *, - ddi_prop_op_t, int, char *, caddr_t, int *); - -static struct cb_ops md_cb_ops = { - mdopen, /* open */ - mdclose, /* close */ - mdstrategy, /* strategy */ - /* print routine -- none yet */ - (int(*)(dev_t, char *))nulldev, - mddump, /* dump */ - mdread, /* read */ - mdwrite, /* write */ - mdioctl, /* ioctl */ - /* devmap */ - (int(*)(dev_t, devmap_cookie_t, offset_t, size_t, size_t *, - uint_t))nodev, - /* mmap */ - (int(*)(dev_t, off_t, int))nodev, - /* segmap */ - (int(*)(dev_t, off_t, struct as *, caddr_t *, off_t, unsigned, - unsigned, unsigned, cred_t *))nodev, - nochpoll, /* poll */ - mdprop_op, /* prop_op */ - 0, /* streamtab */ - (D_64BIT|D_MP|D_NEW), /* driver compatibility flag */ - CB_REV, /* cb_ops version */ - mdaread, /* aread */ - mdawrite, /* awrite */ -}; - -static struct dev_ops md_devops = { - DEVO_REV, /* dev_ops version */ - 0, /* device reference count */ - mdinfo, /* info routine */ - nulldev, /* identify routine */ - nulldev, /* probe - not defined */ - mdattach, /* attach routine */ - mddetach, /* detach routine */ - nodev, /* reset - not defined */ - &md_cb_ops, /* driver operations */ - NULL, /* bus operations */ - nodev, /* power management */ - ddi_quiesce_not_needed, /* quiesce */ -}; - -/* - * loadable module wrapper - */ -#include - -static struct modldrv modldrv = { - &mod_driverops, /* type of module -- a pseudodriver */ - "Solaris Volume Manager base module", /* name of the module */ - &md_devops, /* driver ops */ -}; - -static struct modlinkage modlinkage = { - MODREV_1, - (void *)&modldrv, - NULL -}; - - -/* md_medd.c */ -extern void med_init(void); -extern void med_fini(void); -extern void md_devid_cleanup(set_t, uint_t); - -/* md_names.c */ -extern struct nm_next_hdr *get_first_record(set_t, int, int); - -int md_maxphys = 0; /* maximum io size in bytes */ -#define MD_MAXBCOUNT (1024 * 1024) -unsigned md_maxbcount = 0; /* maximum physio size in bytes */ - -/* - * Some md ioctls trigger io framework device tree operations. An - * example is md ioctls that call md_resolve_bydevid(): which uses the - * io framework to resolve a devid. Such operations result in acquiring - * io framework locks (like ndi_devi_enter() of "/") while holding - * driver locks (like md_unit_writerlock()). - * - * The prop_op(9E) entry point is called from the devinfo driver with - * an active ndi_devi_enter of "/". To avoid deadlock, md's prop_op - * implementation must avoid taking a lock that is held per above md - * ioctl description: i.e. mdprop_op(9E) can't call md_unit_readerlock() - * without risking deadlock. - * - * To service "size" requests without risking deadlock, we maintain a - * "mnum->nblocks" sizemap (protected by a short-term global mutex). - */ -static kmutex_t md_nblocks_mutex; -static mod_hash_t *md_nblocksmap; /* mnum -> nblocks */ -int md_nblocksmap_size = 512; - -/* - * Maintain "mnum->nblocks" sizemap for mdprop_op use: - * - * Create: any code that establishes a unit's un_total_blocks needs the - * following type of call to establish nblocks for mdprop_op(): - * md_nblocks_set(mnum, un->c.un_total_blocks);" - * NOTE: locate via cscope md_create_minor_node/md_create_unit_incore - * ...or "MD_UNIT..*=" - * - * Change: any code that changes a unit's un_total_blocks needs the - * following type of call to sync nblocks for mdprop_op(): - * md_nblocks_set(mnum, un->c.un_total_blocks);" - * NOTE: locate via cscope for "un_total_blocks[ \t]*=" - * - * Destroy: any code that deletes a unit needs the following type of call - * to sync nblocks for mdprop_op(): - * md_nblocks_set(mnum, -1ULL); - * NOTE: locate via cscope md_remove_minor_node/md_destroy_unit_incore - * ...or "MD_UNIT..*=" - */ -void -md_nblocks_set(minor_t mnum, uint64_t nblocks) -{ - mutex_enter(&md_nblocks_mutex); - if (nblocks == -1ULL) - (void) mod_hash_destroy(md_nblocksmap, - (mod_hash_key_t)(intptr_t)mnum); - else - (void) mod_hash_replace(md_nblocksmap, - (mod_hash_key_t)(intptr_t)mnum, - (mod_hash_val_t)(intptr_t)nblocks); - mutex_exit(&md_nblocks_mutex); -} - -/* get the size of a mnum from "mnum->nblocks" sizemap */ -uint64_t -md_nblocks_get(minor_t mnum) -{ - mod_hash_val_t hv; - - mutex_enter(&md_nblocks_mutex); - if (mod_hash_find(md_nblocksmap, - (mod_hash_key_t)(intptr_t)mnum, &hv) == 0) { - mutex_exit(&md_nblocks_mutex); - return ((uint64_t)(intptr_t)hv); - } - mutex_exit(&md_nblocks_mutex); - return (0); -} - -/* allocate/free dynamic space associated with driver globals */ -void -md_global_alloc_free(int alloc) -{ - set_t s; - - if (alloc) { - /* initialize driver global locks */ - cv_init(&md_cv, NULL, CV_DEFAULT, NULL); - mutex_init(&md_mx, NULL, MUTEX_DEFAULT, NULL); - rw_init(&md_unit_array_rw.lock, NULL, RW_DEFAULT, NULL); - rw_init(&nm_lock.lock, NULL, RW_DEFAULT, NULL); - rw_init(&ni_rwlp.lock, NULL, RW_DRIVER, NULL); - rw_init(&hsp_rwlp.lock, NULL, RW_DRIVER, NULL); - mutex_init(&md_cpr_resync.md_resync_mutex, NULL, - MUTEX_DEFAULT, NULL); - mutex_init(&md_nblocks_mutex, NULL, MUTEX_DEFAULT, NULL); - - /* initialize per set driver global locks */ - for (s = 0; s < MD_MAXSETS; s++) { - /* initialize per set driver globals locks */ - mutex_init(&md_set[s].s_dbmx, - NULL, MUTEX_DEFAULT, NULL); - mutex_init(&md_set_io[s].md_io_mx, - NULL, MUTEX_DEFAULT, NULL); - cv_init(&md_set_io[s].md_io_cv, - NULL, CV_DEFAULT, NULL); - } - } else { - /* destroy per set driver global locks */ - for (s = 0; s < MD_MAXSETS; s++) { - cv_destroy(&md_set_io[s].md_io_cv); - mutex_destroy(&md_set_io[s].md_io_mx); - mutex_destroy(&md_set[s].s_dbmx); - } - - /* destroy driver global locks */ - mutex_destroy(&md_nblocks_mutex); - mutex_destroy(&md_cpr_resync.md_resync_mutex); - rw_destroy(&hsp_rwlp.lock); - rw_destroy(&ni_rwlp.lock); - rw_destroy(&nm_lock.lock); - rw_destroy(&md_unit_array_rw.lock); - mutex_destroy(&md_mx); - cv_destroy(&md_cv); - } -} - -int -_init(void) -{ - set_t s; - int err; - - MD_SET_IN(IN_INIT); - - /* allocate dynamic space associated with driver globals */ - md_global_alloc_free(1); - - /* initialize driver globals */ - md_major = ddi_name_to_major("md"); - md_hz = drv_usectohz(NUM_USEC_IN_SEC); - - /* initialize tunable globals */ - if (md_maxphys == 0) /* maximum io size in bytes */ - md_maxphys = maxphys; - if (md_maxbcount == 0) /* maximum physio size in bytes */ - md_maxbcount = MD_MAXBCOUNT; - - /* initialize per set driver globals */ - for (s = 0; s < MD_MAXSETS; s++) - md_set_io[s].io_state = MD_SET_ACTIVE; - - /* - * NOTE: the framework does not currently guarantee exclusion - * between _init and attach after calling mod_install. - */ - MD_CLR_IN(IN_INIT); - if ((err = mod_install(&modlinkage))) { - MD_SET_IN(IN_INIT); - md_global_alloc_free(0); /* free dynamic space */ - MD_CLR_IN(IN_INIT); - } - return (err); -} - -int -_fini(void) -{ - int err; - - /* - * NOTE: the framework currently does not guarantee exclusion - * with attach until after mod_remove returns 0. - */ - if ((err = mod_remove(&modlinkage))) - return (err); - - MD_SET_IN(IN_FINI); - md_global_alloc_free(0); /* free dynamic space */ - MD_CLR_IN(IN_FINI); - return (err); -} - -int -_info(struct modinfo *modinfop) -{ - return (mod_info(&modlinkage, modinfop)); -} - -/* ARGSUSED */ -static int -mdattach(dev_info_t *dip, ddi_attach_cmd_t cmd) -{ - int len; - unit_t i; - size_t sz; - char ver[VERSION_LENGTH]; - char **maj_str_array; - char *str, *str2; - - MD_SET_IN(IN_ATTACH); - md_in_upgrade = 0; - md_keep_repl_state = 0; - md_devid_destroy = 0; - - if (cmd != DDI_ATTACH) { - MD_CLR_IN(IN_ATTACH); - return (DDI_FAILURE); - } - - if (md_devinfo != NULL) { - MD_CLR_IN(IN_ATTACH); - return (DDI_FAILURE); - } - - mddb_init(); - - if (md_start_daemons(TRUE)) { - MD_CLR_IN(IN_ATTACH); - mddb_unload(); /* undo mddb_init() allocations */ - return (DDI_FAILURE); - } - - /* clear the halted state */ - md_clr_status(MD_GBL_HALTED); - - /* see if the diagnostic switch is on */ - if (ddi_prop_get_int(DDI_DEV_T_ANY, dip, - DDI_PROP_DONTPASS, "md_init_debug", 0)) - md_init_debug++; - - /* see if the failfast disable switch is on */ - if (ddi_prop_get_int(DDI_DEV_T_ANY, dip, - DDI_PROP_DONTPASS, "md_ff_disable", 0)) - md_ff_disable++; - - /* try and get the md_nmedh property */ - md_nmedh = ddi_prop_get_int(DDI_DEV_T_ANY, dip, - DDI_PROP_DONTPASS, "md_nmedh", MED_DEF_HOSTS); - if ((md_nmedh <= 0) || (md_nmedh > MED_MAX_HOSTS)) - md_nmedh = MED_DEF_HOSTS; - - /* try and get the md_med_trans_lst property */ - len = 0; - if (ddi_prop_op(DDI_DEV_T_ANY, dip, PROP_LEN, - 0, "md_med_trans_lst", NULL, &len) != DDI_PROP_SUCCESS || - len == 0) { - md_med_trans_lst = md_strdup("tcp"); - } else { - md_med_trans_lst = kmem_zalloc((size_t)len, KM_SLEEP); - if (ddi_prop_op(DDI_DEV_T_ANY, dip, PROP_LEN_AND_VAL_BUF, - 0, "md_med_trans_lst", md_med_trans_lst, &len) != - DDI_PROP_SUCCESS) { - kmem_free(md_med_trans_lst, (size_t)len); - md_med_trans_lst = md_strdup("tcp"); - } - } - - /* - * Must initialize the internal data structures before the - * any possible calls to 'goto attach_failure' as _fini - * routine references them. - */ - med_init(); - - md_ops = (md_ops_t **)kmem_zalloc( - sizeof (md_ops_t *) * MD_NOPS, KM_SLEEP); - md_mods = (ddi_modhandle_t *)kmem_zalloc( - sizeof (ddi_modhandle_t) * MD_NOPS, KM_SLEEP); - - /* try and get the md_xlate property */ - /* Should we only do this if upgrade? */ - len = sizeof (char) * 5; - if (ddi_prop_op(DDI_DEV_T_ANY, dip, PROP_LEN_AND_VAL_BUF, - 0, "md_xlate_ver", ver, &len) == DDI_PROP_SUCCESS) { - if (strcmp(ver, VERSION) == 0) { - len = 0; - if (ddi_prop_op(DDI_DEV_T_ANY, dip, - PROP_LEN_AND_VAL_ALLOC, 0, "md_xlate", - (caddr_t)&md_tuple_table, &len) != - DDI_PROP_SUCCESS) { - if (md_init_debug) - cmn_err(CE_WARN, - "md_xlate ddi_prop_op failed"); - goto attach_failure; - } else { - md_tuple_length = - len/(2 * ((int)sizeof (dev32_t))); - md_in_upgrade = 1; - } - - /* Get target's name to major table */ - if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, - dip, DDI_PROP_DONTPASS, - "md_targ_nm_table", &maj_str_array, - &md_majortab_len) != DDI_PROP_SUCCESS) { - md_majortab_len = 0; - if (md_init_debug) - cmn_err(CE_WARN, "md_targ_nm_table " - "ddi_prop_lookup_string_array " - "failed"); - goto attach_failure; - } - - md_major_tuple_table = - (struct md_xlate_major_table *) - kmem_zalloc(md_majortab_len * - sizeof (struct md_xlate_major_table), KM_SLEEP); - - for (i = 0; i < md_majortab_len; i++) { - /* Getting major name */ - str = strchr(maj_str_array[i], ' '); - if (str == NULL) - continue; - *str = '\0'; - md_major_tuple_table[i].drv_name = - md_strdup(maj_str_array[i]); - - /* Simplified atoi to get major number */ - str2 = str + 1; - md_major_tuple_table[i].targ_maj = 0; - while ((*str2 >= '0') && (*str2 <= '9')) { - md_major_tuple_table[i].targ_maj *= 10; - md_major_tuple_table[i].targ_maj += - *str2++ - '0'; - } - *str = ' '; - } - ddi_prop_free((void *)maj_str_array); - } else { - if (md_init_debug) - cmn_err(CE_WARN, "md_xlate_ver is incorrect"); - goto attach_failure; - } - } - - /* - * Check for properties: - * md_keep_repl_state and md_devid_destroy - * and set globals if these exist. - */ - md_keep_repl_state = ddi_getprop(DDI_DEV_T_ANY, dip, - 0, "md_keep_repl_state", 0); - - md_devid_destroy = ddi_getprop(DDI_DEV_T_ANY, dip, - 0, "md_devid_destroy", 0); - - if (MD_UPGRADE) - md_major_targ = md_targ_name_to_major("md"); - else - md_major_targ = 0; - - /* allocate admin device node */ - if (ddi_create_priv_minor_node(dip, "admin", S_IFCHR, - MD_ADM_MINOR, DDI_PSEUDO, 0, NULL, PRIV_SYS_CONFIG, 0640)) - goto attach_failure; - - if (ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, - DDI_KERNEL_IOCTL, NULL, 0) != DDI_SUCCESS) - goto attach_failure; - - if (ddi_prop_update_int(DDI_DEV_T_NONE, dip, - "ddi-abrwrite-supported", 1) != DDI_SUCCESS) - goto attach_failure; - - /* these could have been cleared by a detach */ - md_nunits = MD_MAXUNITS; - md_nsets = MD_MAXSETS; - - sz = sizeof (void *) * MD_MAXUNITS; - if (md_set[0].s_un == NULL) - md_set[0].s_un = kmem_zalloc(sz, KM_SLEEP); - if (md_set[0].s_ui == NULL) - md_set[0].s_ui = kmem_zalloc(sz, KM_SLEEP); - - md_devinfo = dip; - - /* - * Only allocate device node for root mirror metadevice. - * Don't pre-allocate unnecessary device nodes (thus slowing down a - * boot when we attach). - * We can't read the mddbs in attach. The mddbs will be read - * by metainit during the boot process when it is doing the - * auto-take processing and any other minor nodes will be - * allocated at that point. - * - * There are two scenarios to be aware of here: - * 1) when we are booting from a mirrored root we need the root - * metadevice to exist very early (during vfs_mountroot processing) - * 2) we need all of the nodes to be created so that any mnttab entries - * will succeed (handled by metainit reading the mddb during boot). - */ - if (strncmp(SVM_PSEUDO_STR, svm_bootpath, sizeof (SVM_PSEUDO_STR) - 1) - == 0) { - char *p; - int mnum = 0; - - /* - * The svm_bootpath string looks something like - * /pseudo/md@0:0,150,blk where 150 is the minor number - * in this example so we need to set the pointer p onto - * the first digit of the minor number and convert it - * from ascii. - */ - for (p = svm_bootpath + sizeof (SVM_PSEUDO_STR) + 1; - *p >= '0' && *p <= '9'; p++) { - mnum *= 10; - mnum += *p - '0'; - } - - if (md_create_minor_node(0, mnum)) { - kmem_free(md_set[0].s_un, sz); - kmem_free(md_set[0].s_ui, sz); - goto attach_failure; - } - } - - /* create the hash to store the meta device sizes */ - md_nblocksmap = mod_hash_create_idhash("md_nblocksmap", - md_nblocksmap_size, mod_hash_null_valdtor); - - MD_CLR_IN(IN_ATTACH); - return (DDI_SUCCESS); - -attach_failure: - /* - * Use our own detach routine to toss any stuff we allocated above. - * NOTE: detach will call md_halt to free the mddb_init allocations. - */ - MD_CLR_IN(IN_ATTACH); - if (mddetach(dip, DDI_DETACH) != DDI_SUCCESS) - cmn_err(CE_WARN, "detach from attach failed"); - return (DDI_FAILURE); -} - -/* ARGSUSED */ -static int -mddetach(dev_info_t *dip, ddi_detach_cmd_t cmd) -{ - extern int check_active_locators(); - set_t s; - size_t sz; - int len; - - MD_SET_IN(IN_DETACH); - - /* check command */ - if (cmd != DDI_DETACH) { - MD_CLR_IN(IN_DETACH); - return (DDI_FAILURE); - } - - /* - * if we have not already halted yet we have no active config - * then automatically initiate a halt so we can detach. - */ - if (!(md_get_status() & MD_GBL_HALTED)) { - if (check_active_locators() == 0) { - /* - * NOTE: a successful md_halt will have done the - * mddb_unload to free allocations done in mddb_init - */ - if (md_halt(MD_NO_GBL_LOCKS_HELD)) { - cmn_err(CE_NOTE, "md:detach: " - "Could not halt Solaris Volume Manager"); - MD_CLR_IN(IN_DETACH); - return (DDI_FAILURE); - } - } - - /* fail detach if we have not halted */ - if (!(md_get_status() & MD_GBL_HALTED)) { - MD_CLR_IN(IN_DETACH); - return (DDI_FAILURE); - } - } - - /* must be in halted state, this will be cleared on next attach */ - ASSERT(md_get_status() & MD_GBL_HALTED); - - /* cleanup attach allocations and initializations */ - md_major_targ = 0; - - sz = sizeof (void *) * md_nunits; - for (s = 0; s < md_nsets; s++) { - if (md_set[s].s_un != NULL) { - kmem_free(md_set[s].s_un, sz); - md_set[s].s_un = NULL; - } - - if (md_set[s].s_ui != NULL) { - kmem_free(md_set[s].s_ui, sz); - md_set[s].s_ui = NULL; - } - } - md_nunits = 0; - md_nsets = 0; - md_nmedh = 0; - - if (non_ff_drivers != NULL) { - int i; - - for (i = 0; non_ff_drivers[i] != NULL; i++) - kmem_free(non_ff_drivers[i], - strlen(non_ff_drivers[i]) + 1); - - /* free i+1 entries because there is a null entry at list end */ - kmem_free(non_ff_drivers, (i + 1) * sizeof (char *)); - non_ff_drivers = NULL; - } - - if (md_med_trans_lst != NULL) { - kmem_free(md_med_trans_lst, strlen(md_med_trans_lst) + 1); - md_med_trans_lst = NULL; - } - - if (md_mods != NULL) { - kmem_free(md_mods, sizeof (ddi_modhandle_t) * MD_NOPS); - md_mods = NULL; - } - - if (md_ops != NULL) { - kmem_free(md_ops, sizeof (md_ops_t *) * MD_NOPS); - md_ops = NULL; - } - - if (MD_UPGRADE) { - len = md_tuple_length * (2 * ((int)sizeof (dev32_t))); - md_in_upgrade = 0; - md_xlate_free(len); - md_majortab_free(); - } - - /* - * Undo what we did in mdattach, freeing resources - * and removing things we installed. The system - * framework guarantees we are not active with this devinfo - * node in any other entry points at this time. - */ - ddi_prop_remove_all(dip); - ddi_remove_minor_node(dip, NULL); - - med_fini(); - - mod_hash_destroy_idhash(md_nblocksmap); - - md_devinfo = NULL; - - MD_CLR_IN(IN_DETACH); - return (DDI_SUCCESS); -} - - -/* - * Given the device number return the devinfo pointer - * given to md via md_attach - */ -/*ARGSUSED*/ -static int -mdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) -{ - int error = DDI_FAILURE; - - switch (infocmd) { - case DDI_INFO_DEVT2DEVINFO: - if (md_devinfo) { - *result = (void *)md_devinfo; - error = DDI_SUCCESS; - } - break; - - case DDI_INFO_DEVT2INSTANCE: - *result = (void *)0; - error = DDI_SUCCESS; - break; - } - return (error); -} - -/* - * property operation routine. return the number of blocks for the partition - * in question or forward the request to the property facilities. - */ -static int -mdprop_op( - dev_t dev, /* device number associated with device */ - dev_info_t *dip, /* device info struct for this device */ - ddi_prop_op_t prop_op, /* property operator */ - int mod_flags, /* property flags */ - char *name, /* name of property */ - caddr_t valuep, /* where to put property value */ - int *lengthp) /* put length of property here */ -{ - return (ddi_prop_op_nblocks(dev, dip, prop_op, mod_flags, - name, valuep, lengthp, md_nblocks_get(getminor(dev)))); -} - -static void -snarf_user_data(set_t setno) -{ - mddb_recid_t recid; - mddb_recstatus_t status; - - recid = mddb_makerecid(setno, 0); - while ((recid = mddb_getnextrec(recid, MDDB_USER, 0)) > 0) { - if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) - continue; - - status = mddb_getrecstatus(recid); - if (status == MDDB_STALE) - continue; - - if (status == MDDB_NODATA) { - mddb_setrecprivate(recid, MD_PRV_PENDDEL); - continue; - } - - ASSERT(status == MDDB_OK); - - mddb_setrecprivate(recid, MD_PRV_GOTIT); - } -} - -static void -md_print_block_usage(mddb_set_t *s, uint_t blks) -{ - uint_t ib; - int li; - mddb_mb_ic_t *mbip; - uint_t max_blk_needed; - mddb_lb_t *lbp; - mddb_sidelocator_t *slp; - int drv_index; - md_splitname sn; - char *name; - char *suffix; - size_t prefixlen; - size_t suffixlen; - int alloc_sz; - - - max_blk_needed = s->s_totalblkcnt - s->s_freeblkcnt + blks; - - cmn_err(CE_WARN, "Blocks in Metadevice State Database: %d\n" - " Additional Blocks Needed: %d\n\n" - " Increase size of following replicas for\n" - " device relocatability by deleting listed\n" - " replica and re-adding replica with\n" - " increased size (see metadb(1M)):\n" - " Replica Increase By", - s->s_totalblkcnt, (blks - s->s_freeblkcnt)); - - lbp = s->s_lbp; - - for (li = 0; li < lbp->lb_loccnt; li++) { - if (lbp->lb_locators[li].l_flags & MDDB_F_DELETED) - continue; - ib = 0; - for (mbip = s->s_mbiarray[li]; mbip != NULL; - mbip = mbip->mbi_next) { - ib += (uint_t)mbip->mbi_mddb_mb.mb_blkcnt; - } - if (ib == 0) - continue; - if (ib < max_blk_needed) { - slp = &lbp->lb_sidelocators[s->s_sideno][li]; - drv_index = slp->l_drvnm_index; - mddb_locatorblock2splitname(s->s_lnp, li, s->s_sideno, - &sn); - prefixlen = SPN_PREFIX(&sn).pre_len; - suffixlen = SPN_SUFFIX(&sn).suf_len; - alloc_sz = (int)(prefixlen + suffixlen + 2); - name = (char *)kmem_alloc(alloc_sz, KM_SLEEP); - (void) strncpy(name, SPN_PREFIX(&sn).pre_data, - prefixlen); - name[prefixlen] = '/'; - suffix = name + (prefixlen + 1); - (void) strncpy(suffix, SPN_SUFFIX(&sn).suf_data, - suffixlen); - name[prefixlen + suffixlen + 1] = '\0'; - cmn_err(CE_WARN, - " %s (%s:%d:%d) %d blocks", - name, lbp->lb_drvnm[drv_index].dn_data, - slp->l_mnum, lbp->lb_locators[li].l_blkno, - (max_blk_needed - ib)); - kmem_free(name, alloc_sz); - } - } -} - -/* - * md_create_minor_node: - * Create the minor device for the given set and un_self_id. - * - * Input: - * setno - set number - * mnum - selfID of unit - * - * Output: - * None. - * - * Returns 0 for success, 1 for failure. - * - * Side-effects: - * None. - */ -int -md_create_minor_node(set_t setno, minor_t mnum) -{ - char name[20]; - - /* Check for valid arguments */ - if (setno >= MD_MAXSETS || MD_MIN2UNIT(mnum) >= MD_MAXUNITS) - return (1); - - (void) snprintf(name, 20, "%u,%u,blk", - (unsigned)setno, (unsigned)MD_MIN2UNIT(mnum)); - - if (ddi_create_minor_node(md_devinfo, name, S_IFBLK, - MD_MKMIN(setno, mnum), DDI_PSEUDO, 0)) - return (1); - - (void) snprintf(name, 20, "%u,%u,raw", - (unsigned)setno, (unsigned)MD_MIN2UNIT(mnum)); - - if (ddi_create_minor_node(md_devinfo, name, S_IFCHR, - MD_MKMIN(setno, mnum), DDI_PSEUDO, 0)) - return (1); - - return (0); -} - -/* - * For a given key check if it is an orphaned record. - * The following conditions are used to determine an orphan. - * 1. The device associated with that key is not a metadevice. - * 2. If DEVID_STYLE then the physical device does not have a device Id - * associated with it. - * - * If a key does not have an entry in the devid namespace it could be - * a device that does not support device ids. Hence the record is not - * deleted. - */ - -static int -md_verify_orphaned_record(set_t setno, mdkey_t key) -{ - md_dev64_t odev; /* orphaned dev */ - mddb_set_t *s; - side_t side = 0; - struct nm_next_hdr *did_nh = NULL; - - s = (mddb_set_t *)md_set[setno].s_db; - if ((did_nh = get_first_record(setno, 1, (NM_DEVID | NM_NOTSHARED))) - == NULL) - return (0); - /* - * If devid style is set then get the dev_t using MD_NOTRUST_DEVT - */ - if (s->s_lbp->lb_flags & MDDB_DEVID_STYLE) { - odev = md_getdevnum(setno, side, key, MD_NOTRUST_DEVT); - if ((odev == NODEV64) || (md_getmajor(odev) == md_major)) - return (0); - if (lookup_entry(did_nh, setno, side, key, odev, NM_DEVID) == - NULL) - return (1); - } - return (0); -} - -int -md_snarf_db_set(set_t setno, md_error_t *ep) -{ - int err = 0; - int i; - mddb_recid_t recid; - mddb_type_t drvrid; - mddb_recstatus_t status; - md_ops_t *ops; - uint_t privat; - mddb_set_t *s; - uint_t cvt_blks; - struct nm_next_hdr *nh; - mdkey_t key = MD_KEYWILD; - side_t side = 0; - int size; - int devid_flag; - int retval; - uint_t un; - int un_next_set = 0; - - md_haltsnarf_enter(setno); - - mutex_enter(&md_mx); - if (md_set[setno].s_status & MD_SET_SNARFED) { - mutex_exit(&md_mx); - md_haltsnarf_exit(setno); - return (0); - } - mutex_exit(&md_mx); - - if (! (md_get_status() & MD_GBL_DAEMONS_LIVE)) { - if (md_start_daemons(TRUE)) { - if (ep != NULL) - (void) mdsyserror(ep, ENXIO); - err = -1; - goto out; - } - } - - - /* - * Load the devid name space if it exists - */ - (void) md_load_namespace(setno, NULL, NM_DEVID); - if (!md_load_namespace(setno, ep, 0L)) { - /* - * Unload the devid namespace - */ - (void) md_unload_namespace(setno, NM_DEVID); - err = -1; - goto out; - } - - /* - * If replica is in non-devid state, convert if: - * - not in probe during upgrade (md_keep_repl_state = 0) - * - enough space available in replica - * - local set - * - not a multi-node diskset - * - clustering is not present (for non-local set) - */ - s = (mddb_set_t *)md_set[setno].s_db; - devid_flag = 0; - if (!(s->s_lbp->lb_flags & MDDB_DEVID_STYLE) && !md_keep_repl_state) - devid_flag = 1; - if (cluster_bootflags & CLUSTER_CONFIGURED) - if (setno != MD_LOCAL_SET) - devid_flag = 0; - if (MD_MNSET_SETNO(setno)) - devid_flag = 0; - if ((md_devid_destroy == 1) && (md_keep_repl_state == 1)) - devid_flag = 0; - - /* - * if we weren't devid style before and md_keep_repl_state=1 - * we need to stay non-devid - */ - if ((md_keep_repl_state == 1) && - ((s->s_lbp->lb_flags & MDDB_DEVID_STYLE) == 0)) - devid_flag = 0; - if (devid_flag) { - /* - * Determine number of free blocks needed to convert - * entire replica to device id format - locator blocks - * and namespace. - */ - cvt_blks = 0; - if (mddb_lb_did_convert(s, 0, &cvt_blks) != 0) { - if (ep != NULL) - (void) mdsyserror(ep, EIO); - err = -1; - goto out; - - } - cvt_blks += md_nm_did_chkspace(setno); - - /* add MDDB_DEVID_CONV_PERC% */ - if ((md_conv_perc > 0) && (md_conv_perc <= 100)) { - cvt_blks = cvt_blks * (100 + md_conv_perc) / 100; - } - - if (cvt_blks <= s->s_freeblkcnt) { - if (mddb_lb_did_convert(s, 1, &cvt_blks) != 0) { - if (ep != NULL) - (void) mdsyserror(ep, EIO); - err = -1; - goto out; - } - - } else { - /* - * Print message that replica can't be converted for - * lack of space. No failure - just continue to - * run without device ids. - */ - cmn_err(CE_WARN, - "Unable to add Solaris Volume Manager device " - "relocation data.\n" - " To use device relocation feature:\n" - " - Increase size of listed replicas\n" - " - Reboot"); - md_print_block_usage(s, cvt_blks); - cmn_err(CE_WARN, - "Loading set without device relocation data.\n" - " Solaris Volume Manager disk movement " - "not tracked in local set."); - } - } - - /* - * go through and load any modules referenced in - * data base - */ - recid = mddb_makerecid(setno, 0); - while ((recid = mddb_getnextrec(recid, MDDB_ALL, 0)) > 0) { - status = mddb_getrecstatus(recid); - if (status == MDDB_STALE) { - if (! (md_get_setstatus(setno) & MD_SET_STALE)) { - md_set_setstatus(setno, MD_SET_STALE); - cmn_err(CE_WARN, - "md: state database is stale"); - } - } else if (status == MDDB_NODATA) { - mddb_setrecprivate(recid, MD_PRV_PENDDEL); - continue; - } - drvrid = mddb_getrectype1(recid); - if (drvrid < MDDB_FIRST_MODID) - continue; - if (md_loadsubmod(setno, md_getshared_name(setno, drvrid), - drvrid) < 0) { - cmn_err(CE_NOTE, "md: could not load misc/%s", - md_getshared_name(setno, drvrid)); - } - } - - if (recid < 0) - goto out; - - snarf_user_data(setno); - - /* - * Initialize the md_nm_snarfed array - * this array is indexed by the key and - * is set by md_getdevnum during the snarf time - */ - if ((nh = get_first_record(setno, 0, NM_NOTSHARED)) != NULL) { - size = (int)((((struct nm_rec_hdr *)nh->nmn_record)-> - r_next_key) * (sizeof (int))); - md_nm_snarfed = (int *)kmem_zalloc(size, KM_SLEEP); - } - - /* - * go through and snarf until nothing gets added - */ - do { - i = 0; - for (ops = md_opslist; ops != NULL; ops = ops->md_next) { - if (ops->md_snarf != NULL) { - retval = ops->md_snarf(MD_SNARF_DOIT, setno); - if (retval == -1) { - err = -1; - /* Don't know the failed unit */ - (void) mdmderror(ep, MDE_RR_ALLOC_ERROR, - 0); - (void) md_halt_set(setno, MD_HALT_ALL); - (void) mddb_unload_set(setno); - md_haltsnarf_exit(setno); - return (err); - } else { - i += retval; - } - } - } - } while (i); - - /* - * Set the first available slot and availability - */ - md_set[setno].s_un_avail = 0; - for (un = 0; un < MD_MAXUNITS; un++) { - if (md_set[setno].s_un[un] != NULL) { - continue; - } else { - if (!un_next_set) { - md_set[setno].s_un_next = un; - un_next_set = 1; - } - md_set[setno].s_un_avail++; - } - } - - md_set_setstatus(setno, MD_SET_SNARFED); - - recid = mddb_makerecid(setno, 0); - while ((recid = mddb_getnextrec(recid, MDDB_ALL, 0)) > 0) { - privat = mddb_getrecprivate(recid); - if (privat & MD_PRV_COMMIT) { - if (mddb_commitrec(recid)) { - if (!(md_get_setstatus(setno) & MD_SET_STALE)) { - md_set_setstatus(setno, MD_SET_STALE); - cmn_err(CE_WARN, - "md: state database is stale"); - } - } - mddb_setrecprivate(recid, MD_PRV_GOTIT); - } - } - - /* Deletes must happen after all the commits */ - recid = mddb_makerecid(setno, 0); - while ((recid = mddb_getnextrec(recid, MDDB_ALL, 0)) > 0) { - privat = mddb_getrecprivate(recid); - if (privat & MD_PRV_DELETE) { - if (mddb_deleterec(recid)) { - if (!(md_get_setstatus(setno) & MD_SET_STALE)) { - md_set_setstatus(setno, MD_SET_STALE); - cmn_err(CE_WARN, - "md: state database is stale"); - } - mddb_setrecprivate(recid, MD_PRV_GOTIT); - } - recid = mddb_makerecid(setno, 0); - } - } - - /* - * go through and clean up records until nothing gets cleaned up. - */ - do { - i = 0; - for (ops = md_opslist; ops != NULL; ops = ops->md_next) - if (ops->md_snarf != NULL) - i += ops->md_snarf(MD_SNARF_CLEANUP, setno); - } while (i); - - if (md_nm_snarfed != NULL && - !(md_get_setstatus(setno) & MD_SET_STALE)) { - /* - * go thru and cleanup the namespace and the device id - * name space - */ - for (key = 1; - key < ((struct nm_rec_hdr *)nh->nmn_record)->r_next_key; - key++) { - /* - * Is the entry an 'orphan'? - */ - if (lookup_entry(nh, setno, side, key, NODEV64, 0L) != - NULL) { - /* - * If the value is not set then apparently - * it is not part of the current configuration, - * remove it this can happen when system panic - * between the primary name space update and - * the device id name space update - */ - if (md_nm_snarfed[key] == 0) { - if (md_verify_orphaned_record(setno, - key) == 1) - (void) remove_entry(nh, - side, key, 0L); - } - } - } - } - - if (md_nm_snarfed != NULL) { - /* - * Done and free the memory - */ - kmem_free(md_nm_snarfed, size); - md_nm_snarfed = NULL; - } - - if (s->s_lbp->lb_flags & MDDB_DEVID_STYLE && - !(md_get_setstatus(setno) & MD_SET_STALE)) { - /* - * if the destroy flag has been set and - * the MD_SET_DIDCLUP bit is not set in - * the set's status field, cleanup the - * entire device id namespace - */ - if (md_devid_destroy && - !(md_get_setstatus(setno) & MD_SET_DIDCLUP)) { - (void) md_devid_cleanup(setno, 1); - md_set_setstatus(setno, MD_SET_DIDCLUP); - } else - (void) md_devid_cleanup(setno, 0); - } - - /* - * clear single threading on snarf, return success or error - */ -out: - md_haltsnarf_exit(setno); - return (err); -} - -void -get_minfo(struct dk_minfo *info, minor_t mnum) -{ - md_unit_t *un; - mdi_unit_t *ui; - - info->dki_capacity = 0; - info->dki_lbsize = 0; - info->dki_media_type = 0; - - if ((ui = MDI_UNIT(mnum)) == NULL) { - return; - } - un = (md_unit_t *)md_unit_readerlock(ui); - info->dki_capacity = un->c.un_total_blocks; - md_unit_readerexit(ui); - info->dki_lbsize = DEV_BSIZE; - info->dki_media_type = DK_UNKNOWN; -} - - -void -get_info(struct dk_cinfo *info, minor_t mnum) -{ - /* - * Controller Information - */ - info->dki_ctype = DKC_MD; - info->dki_cnum = ddi_get_instance(ddi_get_parent(md_devinfo)); - (void) strcpy(info->dki_cname, - ddi_get_name(ddi_get_parent(md_devinfo))); - /* - * Unit Information - */ - info->dki_unit = mnum; - info->dki_slave = 0; - (void) strcpy(info->dki_dname, ddi_driver_name(md_devinfo)); - info->dki_flags = 0; - info->dki_partition = 0; - info->dki_maxtransfer = (ushort_t)(md_maxphys / DEV_BSIZE); - - /* - * We can't get from here to there yet - */ - info->dki_addr = 0; - info->dki_space = 0; - info->dki_prio = 0; - info->dki_vec = 0; -} - -/* - * open admin device - */ -static int -mdadminopen( - int flag, - int otyp) -{ - int err = 0; - - /* single thread */ - mutex_enter(&md_mx); - - /* check type and flags */ - if ((otyp != OTYP_CHR) && (otyp != OTYP_LYR)) { - err = EINVAL; - goto out; - } - if (((flag & FEXCL) && (md_status & MD_GBL_OPEN)) || - (md_status & MD_GBL_EXCL)) { - err = EBUSY; - goto out; - } - - /* count and flag open */ - md_ocnt[otyp]++; - md_status |= MD_GBL_OPEN; - if (flag & FEXCL) - md_status |= MD_GBL_EXCL; - - /* unlock return success */ -out: - mutex_exit(&md_mx); - return (err); -} - -/* - * open entry point - */ -static int -mdopen( - dev_t *dev, - int flag, - int otyp, - cred_t *cred_p) -{ - minor_t mnum = getminor(*dev); - unit_t unit = MD_MIN2UNIT(mnum); - set_t setno = MD_MIN2SET(mnum); - mdi_unit_t *ui = NULL; - int err = 0; - md_parent_t parent; - - /* dispatch admin device opens */ - if (mnum == MD_ADM_MINOR) - return (mdadminopen(flag, otyp)); - - /* lock, check status */ - rw_enter(&md_unit_array_rw.lock, RW_READER); - -tryagain: - if (md_get_status() & MD_GBL_HALTED) { - err = ENODEV; - goto out; - } - - /* check minor */ - if ((setno >= md_nsets) || (unit >= md_nunits)) { - err = ENXIO; - goto out; - } - - /* make sure we're snarfed */ - if ((md_get_setstatus(MD_LOCAL_SET) & MD_SET_SNARFED) == 0) { - if (md_snarf_db_set(MD_LOCAL_SET, NULL) != 0) { - err = ENODEV; - goto out; - } - } - if ((md_get_setstatus(setno) & MD_SET_SNARFED) == 0) { - err = ENODEV; - goto out; - } - - /* check unit */ - if ((ui = MDI_UNIT(mnum)) == NULL) { - err = ENXIO; - goto out; - } - - /* - * The softpart open routine may do an I/O during the open, in - * which case the open routine will set the OPENINPROGRESS flag - * and drop all locks during the I/O. If this thread sees - * the OPENINPROGRESS flag set, if should wait until the flag - * is reset before calling the driver's open routine. It must - * also revalidate the world after it grabs the unit_array lock - * since the set may have been released or the metadevice cleared - * during the sleep. - */ - if (MD_MNSET_SETNO(setno)) { - mutex_enter(&ui->ui_mx); - if (ui->ui_lock & MD_UL_OPENINPROGRESS) { - rw_exit(&md_unit_array_rw.lock); - cv_wait(&ui->ui_cv, &ui->ui_mx); - rw_enter(&md_unit_array_rw.lock, RW_READER); - mutex_exit(&ui->ui_mx); - goto tryagain; - } - mutex_exit(&ui->ui_mx); - } - - /* Test if device is openable */ - if ((ui->ui_tstate & MD_NOTOPENABLE) != 0) { - err = ENXIO; - goto out; - } - - /* don't allow opens w/WRITE flag if stale */ - if ((flag & FWRITE) && (md_get_setstatus(setno) & MD_SET_STALE)) { - err = EROFS; - goto out; - } - - /* don't allow writes to subdevices */ - parent = md_get_parent(md_expldev(*dev)); - if ((flag & FWRITE) && MD_HAS_PARENT(parent)) { - err = EROFS; - goto out; - } - - /* open underlying driver */ - if (md_ops[ui->ui_opsindex]->md_open != NULL) { - if ((err = (*md_ops[ui->ui_opsindex]->md_open) - (dev, flag, otyp, cred_p, 0)) != 0) - goto out; - } - - /* or do it ourselves */ - else { - /* single thread */ - (void) md_unit_openclose_enter(ui); - err = md_unit_incopen(mnum, flag, otyp); - md_unit_openclose_exit(ui); - if (err != 0) - goto out; - } - - /* unlock, return status */ -out: - rw_exit(&md_unit_array_rw.lock); - return (err); -} - -/* - * close admin device - */ -static int -mdadminclose( - int otyp) -{ - int i; - int err = 0; - - /* single thread */ - mutex_enter(&md_mx); - - /* check type and flags */ - if ((otyp < 0) || (otyp >= OTYPCNT)) { - err = EINVAL; - goto out; - } else if (md_ocnt[otyp] == 0) { - err = ENXIO; - goto out; - } - - /* count and flag closed */ - if (otyp == OTYP_LYR) - md_ocnt[otyp]--; - else - md_ocnt[otyp] = 0; - md_status &= ~MD_GBL_OPEN; - for (i = 0; (i < OTYPCNT); ++i) - if (md_ocnt[i] != 0) - md_status |= MD_GBL_OPEN; - if (! (md_status & MD_GBL_OPEN)) - md_status &= ~MD_GBL_EXCL; - - /* unlock return success */ -out: - mutex_exit(&md_mx); - return (err); -} - -/* - * close entry point - */ -static int -mdclose( - dev_t dev, - int flag, - int otyp, - cred_t *cred_p) -{ - minor_t mnum = getminor(dev); - set_t setno = MD_MIN2SET(mnum); - unit_t unit = MD_MIN2UNIT(mnum); - mdi_unit_t *ui = NULL; - int err = 0; - - /* dispatch admin device closes */ - if (mnum == MD_ADM_MINOR) - return (mdadminclose(otyp)); - - /* check minor */ - if ((setno >= md_nsets) || (unit >= md_nunits) || - ((ui = MDI_UNIT(mnum)) == NULL)) { - err = ENXIO; - goto out; - } - - /* close underlying driver */ - if (md_ops[ui->ui_opsindex]->md_close != NULL) { - if ((err = (*md_ops[ui->ui_opsindex]->md_close) - (dev, flag, otyp, cred_p, 0)) != 0) - goto out; - } - - /* or do it ourselves */ - else { - /* single thread */ - (void) md_unit_openclose_enter(ui); - err = md_unit_decopen(mnum, otyp); - md_unit_openclose_exit(ui); - if (err != 0) - goto out; - } - - /* return success */ -out: - return (err); -} - - -/* - * This routine performs raw read operations. It is called from the - * device switch at normal priority. - * - * The main catch is that the *uio struct which is passed to us may - * specify a read which spans two buffers, which would be contiguous - * on a single partition, but not on a striped partition. This will - * be handled by mdstrategy. - */ -/*ARGSUSED*/ -static int -mdread(dev_t dev, struct uio *uio, cred_t *credp) -{ - minor_t mnum; - mdi_unit_t *ui; - int error; - - if (((mnum = getminor(dev)) == MD_ADM_MINOR) || - (MD_MIN2SET(mnum) >= md_nsets) || - (MD_MIN2UNIT(mnum) >= md_nunits) || - ((ui = MDI_UNIT(mnum)) == NULL)) - return (ENXIO); - - if (md_ops[ui->ui_opsindex]->md_read != NULL) - return ((*md_ops[ui->ui_opsindex]->md_read) - (dev, uio, credp)); - - if ((error = md_chk_uio(uio)) != 0) - return (error); - - return (physio(mdstrategy, NULL, dev, B_READ, md_minphys, uio)); -} - -/* - * This routine performs async raw read operations. It is called from the - * device switch at normal priority. - * - * The main catch is that the *aio struct which is passed to us may - * specify a read which spans two buffers, which would be contiguous - * on a single partition, but not on a striped partition. This will - * be handled by mdstrategy. - */ -/*ARGSUSED*/ -static int -mdaread(dev_t dev, struct aio_req *aio, cred_t *credp) -{ - minor_t mnum; - mdi_unit_t *ui; - int error; - - - if (((mnum = getminor(dev)) == MD_ADM_MINOR) || - (MD_MIN2SET(mnum) >= md_nsets) || - (MD_MIN2UNIT(mnum) >= md_nunits) || - ((ui = MDI_UNIT(mnum)) == NULL)) - return (ENXIO); - - if (md_ops[ui->ui_opsindex]->md_aread != NULL) - return ((*md_ops[ui->ui_opsindex]->md_aread) - (dev, aio, credp)); - - if ((error = md_chk_uio(aio->aio_uio)) != 0) - return (error); - - return (aphysio(mdstrategy, anocancel, dev, B_READ, md_minphys, aio)); -} - -/* - * This routine performs raw write operations. It is called from the - * device switch at normal priority. - * - * The main catch is that the *uio struct which is passed to us may - * specify a write which spans two buffers, which would be contiguous - * on a single partition, but not on a striped partition. This is - * handled by mdstrategy. - * - */ -/*ARGSUSED*/ -static int -mdwrite(dev_t dev, struct uio *uio, cred_t *credp) -{ - minor_t mnum; - mdi_unit_t *ui; - int error; - - if (((mnum = getminor(dev)) == MD_ADM_MINOR) || - (MD_MIN2SET(mnum) >= md_nsets) || - (MD_MIN2UNIT(mnum) >= md_nunits) || - ((ui = MDI_UNIT(mnum)) == NULL)) - return (ENXIO); - - if (md_ops[ui->ui_opsindex]->md_write != NULL) - return ((*md_ops[ui->ui_opsindex]->md_write) - (dev, uio, credp)); - - if ((error = md_chk_uio(uio)) != 0) - return (error); - - return (physio(mdstrategy, NULL, dev, B_WRITE, md_minphys, uio)); -} - -/* - * This routine performs async raw write operations. It is called from the - * device switch at normal priority. - * - * The main catch is that the *aio struct which is passed to us may - * specify a write which spans two buffers, which would be contiguous - * on a single partition, but not on a striped partition. This is - * handled by mdstrategy. - * - */ -/*ARGSUSED*/ -static int -mdawrite(dev_t dev, struct aio_req *aio, cred_t *credp) -{ - minor_t mnum; - mdi_unit_t *ui; - int error; - - - if (((mnum = getminor(dev)) == MD_ADM_MINOR) || - (MD_MIN2SET(mnum) >= md_nsets) || - (MD_MIN2UNIT(mnum) >= md_nunits) || - ((ui = MDI_UNIT(mnum)) == NULL)) - return (ENXIO); - - if (md_ops[ui->ui_opsindex]->md_awrite != NULL) - return ((*md_ops[ui->ui_opsindex]->md_awrite) - (dev, aio, credp)); - - if ((error = md_chk_uio(aio->aio_uio)) != 0) - return (error); - - return (aphysio(mdstrategy, anocancel, dev, B_WRITE, md_minphys, aio)); -} - -int -mdstrategy(struct buf *bp) -{ - minor_t mnum; - mdi_unit_t *ui; - - ASSERT((bp->b_flags & B_DONE) == 0); - - if (panicstr) - md_clr_status(MD_GBL_DAEMONS_LIVE); - - if (((mnum = getminor(bp->b_edev)) == MD_ADM_MINOR) || - (MD_MIN2SET(mnum) >= md_nsets) || - (MD_MIN2UNIT(mnum) >= md_nunits) || - ((ui = MDI_UNIT(mnum)) == NULL)) { - bp->b_flags |= B_ERROR; - bp->b_error = ENXIO; - bp->b_resid = bp->b_bcount; - biodone(bp); - return (0); - } - - bp->b_flags &= ~(B_ERROR | B_DONE); - if (md_ops[ui->ui_opsindex]->md_strategy != NULL) { - (*md_ops[ui->ui_opsindex]->md_strategy) (bp, 0, NULL); - } else { - (void) errdone(ui, bp, ENXIO); - } - return (0); -} - -/* - * Return true if the ioctl is allowed to be multithreaded. - * All the ioctls with MN are sent only from the message handlers through - * rpc.mdcommd, which (via it's own locking mechanism) takes care that not two - * ioctl for the same metadevice are issued at the same time. - * So we are safe here. - * The other ioctls do not mess with any metadevice structures and therefor - * are harmless too, if called multiple times at the same time. - */ -static boolean_t -is_mt_ioctl(int cmd) { - - switch (cmd) { - case MD_IOCGUNIQMSGID: - case MD_IOCGVERSION: - case MD_IOCISOPEN: - case MD_MN_SET_MM_OWNER: - case MD_MN_SET_STATE: - case MD_MN_SUSPEND_WRITES: - case MD_MN_ALLOCATE_HOTSPARE: - case MD_MN_SET_SETFLAGS: - case MD_MN_GET_SETFLAGS: - case MD_MN_MDDB_OPTRECFIX: - case MD_MN_MDDB_PARSE: - case MD_MN_MDDB_BLOCK: - case MD_MN_DB_USERREQ: - case MD_IOC_SPSTATUS: - case MD_MN_COMMD_ERR: - case MD_MN_SET_COMMD_RUNNING: - case MD_MN_RESYNC: - case MD_MN_SETSYNC: - case MD_MN_POKE_HOTSPARES: - case MD_MN_RR_DIRTY: - case MD_MN_RR_CLEAN: - case MD_MN_IOC_SPUPDATEWM: - return (1); - default: - return (0); - } -} - -/* - * This routine implements the ioctl calls for the Virtual Disk System. - * It is called from the device switch at normal priority. - */ -/* ARGSUSED */ -static int -mdioctl(dev_t dev, int cmd, intptr_t data, int mode, cred_t *cred_p, - int *rval_p) -{ - minor_t mnum = getminor(dev); - mdi_unit_t *ui; - IOLOCK lock; - int err; - - /* - * For multinode disksets number of ioctls are allowed to be - * multithreaded. - * A fundamental assumption made in this implementation is that - * ioctls either do not interact with other md structures or the - * ioctl to the admin device can only occur if the metadevice - * device is open. i.e. avoid a race between metaclear and the - * progress of a multithreaded ioctl. - */ - - if (!is_mt_ioctl(cmd) && md_ioctl_lock_enter() == EINTR) { - return (EINTR); - } - - /* - * initialize lock tracker - */ - IOLOCK_INIT(&lock); - - /* Flag to indicate that MD_GBL_IOCTL_LOCK is not acquired */ - - if (is_mt_ioctl(cmd)) { - /* increment the md_mtioctl_cnt */ - mutex_enter(&md_mx); - md_mtioctl_cnt++; - mutex_exit(&md_mx); - lock.l_flags |= MD_MT_IOCTL; - } - - /* - * this has been added to prevent notification from re-snarfing - * so metaunload will work. It may interfere with other modules - * halt process. - */ - if (md_get_status() & (MD_GBL_HALTED | MD_GBL_DAEMONS_DIE)) - return (IOLOCK_RETURN(ENXIO, &lock)); - - /* - * admin device ioctls - */ - if (mnum == MD_ADM_MINOR) { - err = md_admin_ioctl(md_expldev(dev), cmd, (void *) data, - mode, &lock); - } - - /* - * metadevice ioctls - */ - else if ((MD_MIN2SET(mnum) >= md_nsets) || - (MD_MIN2UNIT(mnum) >= md_nunits) || - (md_set[MD_MIN2SET(mnum)].s_ui == NULL) || - ((ui = MDI_UNIT(mnum)) == NULL)) { - err = ENXIO; - } else if (md_ops[ui->ui_opsindex]->md_ioctl == NULL) { - err = ENOTTY; - } else { - err = (*md_ops[ui->ui_opsindex]->md_ioctl) - (dev, cmd, (void *) data, mode, &lock); - } - - /* - * drop any locks we grabbed - */ - return (IOLOCK_RETURN_IOCTLEND(err, &lock)); -} - -static int -mddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) -{ - minor_t mnum; - set_t setno; - mdi_unit_t *ui; - - if ((mnum = getminor(dev)) == MD_ADM_MINOR) - return (ENXIO); - - setno = MD_MIN2SET(mnum); - - if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits) || - ((ui = MDI_UNIT(mnum)) == NULL)) - return (ENXIO); - - - if ((md_get_setstatus(setno) & MD_SET_SNARFED) == 0) - return (ENXIO); - - if (md_ops[ui->ui_opsindex]->md_dump != NULL) - return ((*md_ops[ui->ui_opsindex]->md_dump) - (dev, addr, blkno, nblk)); - - return (ENXIO); -} - -/* - * Metadevice unit number dispatcher - * When this routine is called it will scan the - * incore unit array and return the avail slot - * hence the unit number to the caller - * - * Return -1 if there is nothing available - */ -unit_t -md_get_nextunit(set_t setno) -{ - unit_t un, start; - - /* - * If nothing available - */ - if (md_set[setno].s_un_avail == 0) { - return (MD_UNITBAD); - } - - mutex_enter(&md_mx); - start = un = md_set[setno].s_un_next; - - /* LINTED: E_CONSTANT_CONDITION */ - while (1) { - if (md_set[setno].s_un[un] == NULL) { - /* - * Advance the starting index for the next - * md_get_nextunit call - */ - if (un == MD_MAXUNITS - 1) { - md_set[setno].s_un_next = 0; - } else { - md_set[setno].s_un_next = un + 1; - } - break; - } - - un = ((un == MD_MAXUNITS - 1) ? 0 : un + 1); - - if (un == start) { - un = MD_UNITBAD; - break; - } - - } - - mutex_exit(&md_mx); - return (un); -} diff --git a/usr/src/uts/common/io/lvm/md/md.conf b/usr/src/uts/common/io/lvm/md/md.conf deleted file mode 100644 index d6ea8f5c215d..000000000000 --- a/usr/src/uts/common/io/lvm/md/md.conf +++ /dev/null @@ -1,30 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -#pragma ident "%Z%%M% %I% %E% SMI" -# -# Copyright 2004 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# The parameters nmd and md_nsets are obsolete. The values for these -# parameters no longer have any meaning. -name="md" parent="pseudo" nmd=128 md_nsets=4; diff --git a/usr/src/uts/common/io/lvm/md/md_error.c b/usr/src/uts/common/io/lvm/md/md_error.c deleted file mode 100644 index f3336afad8ba..000000000000 --- a/usr/src/uts/common/io/lvm/md/md_error.c +++ /dev/null @@ -1,307 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 1994, 1999, 2000-2002 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -/* - * error functions - */ -#include -#include -#include -#include - -/* - * null error constant - */ -const md_error_t mdnullerror = {{MDEC_VOID}, NULL, NULL, NULL}; - -/* - * clear error - */ -void -mdclrerror( - md_error_t *ep -) -{ - bzero((caddr_t)ep, sizeof (*ep)); -} - -/* - * steal (copy) an error code safely - */ -int -mdstealerror( - md_error_t *to, - md_error_t *from -) -{ - mdclrerror(to); - *to = *from; - (void) bzero((caddr_t)from, sizeof (*from)); - return (0); -} - -/* - * simple error - */ -int -mderror( - md_error_t *ep, - md_void_errno_t errnum -) -{ - md_void_error_t *ip = &ep->info.md_error_info_t_u.void_error; - - mdclrerror(ep); - ep->info.errclass = MDEC_VOID; - ip->errnum = errnum; - - return (0); -} - -/* - * system error - */ -int -mdsyserror( - md_error_t *ep, - int errnum -) -{ - md_sys_error_t *ip = &ep->info.md_error_info_t_u.sys_error; - - mdclrerror(ep); - ep->info.errclass = MDEC_SYS; - ip->errnum = errnum; - - return (0); -} - -/* - * device error - */ -int -mddeverror( - md_error_t *ep, - md_dev_errno_t errnum, - md_dev64_t dev -) -{ - md_dev_error_t *ip = &ep->info.md_error_info_t_u.dev_error; - - mdclrerror(ep); - ep->info.errclass = MDEC_DEV; - ip->errnum = errnum; - ip->dev = (md_dev64_t)dev; - - return (0); -} - -/* - * metadevice error - */ -int -mdmderror( - md_error_t *ep, - md_md_errno_t errnum, - minor_t mnum -) -{ - md_md_error_t *ip = &ep->info.md_error_info_t_u.md_error; - - mdclrerror(ep); - ep->info.errclass = MDEC_MD; - ip->errnum = errnum; - ip->mnum = mnum; - - return (0); -} - -/* - * component error - */ -int -mdcomperror( - md_error_t *ep, - md_comp_errno_t errnum, - minor_t mnum, - md_dev64_t dev -) -{ - md_comp_error_t *ip = &ep->info.md_error_info_t_u.comp_error; - - mdclrerror(ep); - ep->info.errclass = MDEC_COMP; - ip->errnum = errnum; - ip->comp.mnum = mnum; - ip->comp.dev = dev; - - return (0); -} - -/* - * hotspare pool error - */ -int -mdhsperror( - md_error_t *ep, - md_hsp_errno_t errnum, - hsp_t hsp -) -{ - md_hsp_error_t *ip = &ep->info.md_error_info_t_u.hsp_error; - - mdclrerror(ep); - ep->info.errclass = MDEC_HSP; - ip->errnum = errnum; - ip->hsp = hsp; - - return (0); -} - -/* - * hotspare error - */ -int -mdhserror( - md_error_t *ep, - md_hs_errno_t errnum, - hsp_t hsp, - md_dev64_t dev -) -{ - md_hs_error_t *ip = &ep->info.md_error_info_t_u.hs_error; - - mdclrerror(ep); - ep->info.errclass = MDEC_HS; - ip->errnum = errnum; - ip->hs.hsp = hsp; - ip->hs.dev = dev; - - return (0); -} - -/* - * MDDB error - */ -int -mdmddberror( - md_error_t *ep, - md_mddb_errno_t errnum, - minor_t mnum, - set_t setno -) -{ - md_mddb_error_t *ip = &ep->info.md_error_info_t_u.mddb_error; - - mdclrerror(ep); - ep->info.errclass = MDEC_MDDB; - ip->errnum = errnum; - ip->mnum = mnum; - ip->setno = setno; - - return (0); -} - -int -mddbstatus2error( - md_error_t *ep, - int status, - minor_t mnum, - set_t setno -) -{ - md_mddb_errno_t errnum; - - switch (status) { - case MDDB_E_INVALID: - errnum = MDE_DB_INVALID; - break; - case MDDB_E_EXISTS: - errnum = MDE_DB_EXISTS; - break; - case MDDB_E_MASTER: - errnum = MDE_DB_MASTER; - break; - case MDDB_E_TOOSMALL: - errnum = MDE_DB_TOOSMALL; - break; - case MDDB_E_NORECORD: - errnum = MDE_DB_NORECORD; - break; - case MDDB_E_NOSPACE: - errnum = MDE_DB_NOSPACE; - break; - case MDDB_E_NOTNOW: - errnum = MDE_DB_NOTNOW; - break; - case MDDB_E_NODB: - errnum = MDE_DB_NODB; - break; - case MDDB_E_NOTOWNER: - errnum = MDE_DB_NOTOWNER; - break; - case MDDB_E_STALE: - errnum = MDE_DB_STALE; - break; - case MDDB_E_TOOFEW: - errnum = MDE_DB_TOOFEW; - break; - case MDDB_E_TAGDATA: - errnum = MDE_DB_TAGDATA; - break; - case MDDB_E_ACCOK: - errnum = MDE_DB_ACCOK; - break; - case MDDB_E_NTAGDATA: - errnum = MDE_DB_NTAGDATA; - break; - case MDDB_E_ACCNOTOK: - errnum = MDE_DB_ACCNOTOK; - break; - case MDDB_E_NOLOCBLK: - errnum = MDE_DB_NOLOCBLK; - break; - case MDDB_E_NOLOCNMS: - errnum = MDE_DB_NOLOCNMS; - break; - case MDDB_E_NODIRBLK: - errnum = MDE_DB_NODIRBLK; - break; - case MDDB_E_NOTAGREC: - errnum = MDE_DB_NOTAGREC; - break; - case MDDB_E_NOTAG: - errnum = MDE_DB_NOTAG; - break; - default: - ASSERT(0); - errnum = (md_mddb_errno_t)status; - break; - } - return (mdmddberror(ep, errnum, mnum, setno)); -} diff --git a/usr/src/uts/common/io/lvm/md/md_ioctl.c b/usr/src/uts/common/io/lvm/md/md_ioctl.c deleted file mode 100644 index 55ed85e22488..000000000000 --- a/usr/src/uts/common/io/lvm/md/md_ioctl.c +++ /dev/null @@ -1,4280 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. - */ - -/* - * Driver for Virtual Disk. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -extern md_ops_t **md_ops; -extern unit_t md_nunits; -extern set_t md_nsets; -extern int md_nmedh; -extern md_set_t md_set[]; -extern md_set_io_t md_set_io[]; -extern int md_status; -extern int md_ioctl_cnt; -extern int md_in_upgrade; -extern major_t md_major; - -/* md.c */ -extern kmutex_t md_mx; -extern kcondvar_t md_cv; - -/* md_hotspares.c */ -extern hot_spare_pool_t *find_hot_spare_pool(set_t setno, int hsp_id); - -/* md_med.c */ -extern int med_addr_tab_nents; -extern int med_get_t_size_ioctl(mddb_med_t_parm_t *tpp, int mode); -extern int med_get_t_ioctl(mddb_med_t_parm_t *tpp, int mode); -extern int med_set_t_ioctl(mddb_med_t_parm_t *tpp, int mode); -extern unit_t md_get_nextunit(set_t setno); - -/* md_mddb.c */ -extern mddb_set_t *mddb_setenter(set_t setno, int flag, int *errorcodep); -extern void mddb_setexit(mddb_set_t *s); -extern md_krwlock_t nm_lock; - -#define MD_MN_COMMD_CMD "rpc.mdcommd" -static pid_t md_mn_commd_pid; - -/* - * md_mn_is_commd_present: - * ---------------------- - * Determine if commd is running on this node. - * - * If md_mn_commd_pid is 0, trust it. Otherwise, do some in-depth checking - * to make sure it's still the one we originally set up by checking the - * provided PID's u_comm for the right program name in u_comm. - * - * This one's intended for the "something went awry" cases, and not for - * general use, due to its higher cost for the good/normal case. - */ -int -md_mn_is_commd_present(void) -{ - proc_t *commd_procp; - - if (md_mn_commd_pid == (pid_t)0) { - return (0); - } - - /* some in-depth checking */ - mutex_enter(&pidlock); - if ((commd_procp = prfind(md_mn_commd_pid)) != NULL && - strncmp(commd_procp->p_user.u_comm, - MD_MN_COMMD_CMD, strlen(MD_MN_COMMD_CMD)) == 0) { - mutex_exit(&pidlock); - /* - * returns a little more info than asked for, but it will - * never be PID 0 when valid. - */ - return ((int)md_mn_commd_pid); - } - /* if it's not there, make sure we only do these contortions once */ - md_mn_commd_pid = (pid_t)0; - mutex_exit(&pidlock); - - cmn_err(CE_WARN, "!rpc.mdcommd exited abnormally"); - return (0); -} - -/* - * This version merely checks the PID value that was set via an ioctl. - * It's intended to be used in the main code flow, where performance is - * critical, and accuracy can be sacrificed a little. If something is - * already known to be wrong, don't use this, but use - * md_mn_is_commd_present() instead. - */ -int -md_mn_is_commd_present_lite(void) -{ - return ((int)md_mn_commd_pid); -} - -/* - * md_mn_clear_commd_present: - * ------------------------- - * Clear the md_mn_commd_pid. Called only from a CPR request to suspend / - * terminate a resync thread. We clear the md_mn_commd_pid so that - * any RPC request that was in transit can complete with a failure and _not_ - * result in an unexpected system panic. - */ -void -md_mn_clear_commd_present() -{ - md_mn_commd_pid = (pid_t)0; -} - -/* - * It is possible to pass in a minor number via the ioctl interface - * and this minor number is used to reference elements in arrays. - * Therefore we need to make sure that the value passed in is - * correct within the array sizes, and array dereference. Not - * doing so allows for incorrect values which may result in panics. - */ -static int -verify_minor(minor_t mnum) -{ - set_t setno = MD_MIN2SET(mnum); - - /* - * Check the bounds. - */ - if (setno >= md_nsets || (MD_MIN2UNIT(mnum) >= md_nunits)) { - return (EINVAL); - } - - /* has the set been initialised ? */ - if ((md_get_setstatus(setno) & MD_SET_SNARFED) == 0) - return (ENODEV); - - return (0); -} - -static int -get_lb_inittime_ioctl( - mddb_config_t *cp -) -{ - set_t setno = cp->c_setno; - int err; - mddb_set_t *s; - - if (setno >= md_nsets) - return (-1); - - if ((s = mddb_setenter(setno, MDDB_MUSTEXIST, &err)) == NULL) - return (-1); - - if (s->s_lbp == NULL) { - mddb_setexit(s); - return (-1); - } - - cp->c_timestamp = s->s_lbp->lb_inittime; - - mddb_setexit(s); - return (0); -} - -static int -setnm_ioctl(mdnm_params_t *nm, int mode) -{ - char *name, *minorname = NULL; - side_t side; - int err = 0; - void *devid = NULL; - int devid_sz; - - /* - * Don't allow addition of new names to namespace during upgrade. - */ - if (MD_UPGRADE) { - return (EAGAIN); - } - - mdclrerror(&nm->mde); - - if ((mode & FWRITE) == 0) - return (EACCES); - - if (md_snarf_db_set(MD_LOCAL_SET, &nm->mde) != 0) - return (0); - - if ((md_get_setstatus(nm->setno) & MD_SET_SNARFED) == 0) - return (ENODEV); - - if (md_get_setstatus(nm->setno) & MD_SET_STALE) - return (mdmddberror(&nm->mde, MDE_DB_STALE, NODEV32, - nm->setno)); - - name = kmem_alloc(MAXPATHLEN, KM_SLEEP); - - err = ddi_copyin((caddr_t)(uintptr_t)nm->devname, name, - (size_t)nm->devname_len, mode); - if (err) { - err = EFAULT; - goto out; - } - - if (nm->imp_flag) { - if ((nm->devid == NULL) || (nm->minorname == NULL)) { - err = EINVAL; - goto out; - } - if (nm->devid) { - devid_sz = nm->devid_size; - devid = kmem_zalloc(devid_sz, KM_SLEEP); - err = ddi_copyin((caddr_t)(uintptr_t)nm->devid, - devid, devid_sz, mode); - if (err) { - err = EFAULT; - goto out; - } - } - if (nm->minorname) { - if (nm->minorname_len > MAXPATHLEN) { - err = EINVAL; - goto out; - } - minorname = kmem_zalloc(nm->minorname_len, KM_SLEEP); - err = ddi_copyin((caddr_t)(uintptr_t)nm->minorname, - minorname, (size_t)nm->minorname_len, mode); - if (err) { - err = EFAULT; - goto out; - } - } - } - - if (nm->side == -1) - side = mddb_getsidenum(nm->setno); - else - side = nm->side; - - if (strcmp(nm->drvnm, "") == 0) { - char *drvnm; - drvnm = ddi_major_to_name(nm->major); - (void) strncpy(nm->drvnm, drvnm, sizeof (nm->drvnm)); - } - - nm->key = md_setdevname(nm->setno, side, nm->key, nm->drvnm, - nm->mnum, name, nm->imp_flag, (ddi_devid_t)devid, minorname, - 0, &nm->mde); - /* - * If we got an error from md_setdevname & md_setdevname did not - * set the error code, we'll default to MDE_DB_NOSPACE. - */ - if ((((int)nm->key) < 0) && mdisok(&nm->mde)) { - err = mdmddberror(&nm->mde, MDE_DB_NOSPACE, NODEV32, nm->setno); - goto out; - } - -out: - kmem_free(name, MAXPATHLEN); - if (devid) { - kmem_free(devid, devid_sz); - } - if (minorname) - kmem_free(minorname, nm->minorname_len); - return (err); -} - -static int -getnm_ioctl( - mdnm_params_t *nm, - int mode -) -{ - char *name; - side_t side; - md_dev64_t dev = NODEV64; - mdc_unit_t *un; - uint_t id; - char *setname; - int err = 0; - - mdclrerror(&nm->mde); - - if (md_snarf_db_set(MD_LOCAL_SET, &nm->mde) != 0) - return (0); - - if ((md_get_setstatus(nm->setno) & MD_SET_SNARFED) == 0) - return (ENODEV); - - - name = kmem_alloc(MAXPATHLEN, KM_SLEEP); - - if (nm->side == -1) - side = mddb_getsidenum(nm->setno); - else - side = nm->side; - - if (nm->drvnm[0] == '\0') { - char *drvnm; - - if (MD_UPGRADE) - drvnm = md_targ_major_to_name(nm->major); - else - drvnm = ddi_major_to_name(nm->major); - if (drvnm != NULL) - (void) strncpy(nm->drvnm, drvnm, sizeof (nm->drvnm)); - } - - if (nm->drvnm[0] != '\0') { - if (MD_UPGRADE) - dev = md_makedevice(md_targ_name_to_major(nm->drvnm), - nm->mnum); - else - dev = md_makedevice(ddi_name_to_major(nm->drvnm), - nm->mnum); - } - - /* - * With the introduction of friendly names, all friendly named - * metadevices will have an entry in the name space. However, - * systems upgraded from pre-friendly name to a friendly name - * release won't have name space entries for pre-friendly name - * top level metadevices. - * - * So we search the name space for the our entry with either the - * given dev_t or key. If we can't find the entry, we'll try the - * un array to get information for our target metadevice. Note - * we only use the un array when searching by dev_t since a - * key implies an existing device which should have been - * found in the name space with the call md_getdevname. - */ - if (md_getdevname(nm->setno, side, nm->key, dev, name, - MAXPATHLEN) == 0) { - err = md_getnment(nm->setno, side, nm->key, dev, nm->drvnm, - sizeof (nm->drvnm), &nm->major, &nm->mnum, &nm->retkey); - if (err) { - if (err < 0) - err = EINVAL; - goto out; - } - } else { - if ((nm->key != MD_KEYWILD) || - (md_set[MD_MIN2SET(nm->mnum)].s_un == NULL) || - (MD_UNIT(nm->mnum) == NULL)) { - err = ENOENT; - goto out; - } - - /* - * We're here because the mnum is of a pre-friendly - * name device. Make sure the major value is for - * metadevices. - */ - if (nm->major != md_major) { - err = ENOENT; - goto out; - } - - /* - * get the unit number and setname to construct the - * fully qualified name for the metadevice. - */ - un = MD_UNIT(nm->mnum); - id = MD_MIN2UNIT(un->un_self_id); - if (nm->setno != MD_LOCAL_SET) { - setname = mddb_getsetname(nm->setno); - (void) snprintf(name, MAXPATHLEN, - "/dev/md/%s/dsk/d%u", setname, id); - } else { - (void) snprintf(name, MAXPATHLEN, - "/dev/md/dsk/d%u", id); - } - } - - err = ddi_copyout(name, (caddr_t)(uintptr_t)nm->devname, - strlen(name) + 1, mode); - if (err) { - err = EFAULT; - goto out; - } - -out: - kmem_free(name, MAXPATHLEN); - return (err); -} - -static int -gethspnm_ioctl( - mdhspnm_params_t *nm, - int mode -) -{ - char *name; - char *tmpname; - char *setname = NULL; - side_t side; - hot_spare_pool_t *hsp = NULL; - mdkey_t key = MD_KEYWILD; - int err = 0; - - mdclrerror(&nm->mde); - - if (md_snarf_db_set(MD_LOCAL_SET, &nm->mde) != 0) - return (0); - - if ((md_get_setstatus(nm->setno) & MD_SET_SNARFED) == 0) - return (ENODEV); - - name = kmem_zalloc(MAXPATHLEN, KM_SLEEP); - - if (nm->side == -1) - side = mddb_getsidenum(nm->setno); - else - side = nm->side; - - /* - * Get the key from input hspid, use different macros - * since the hspid could be either a FN or pre-FN hspid. - */ - if (nm->hspid != MD_HSPID_WILD) { - if (HSP_ID_IS_FN(nm->hspid)) - key = HSP_ID_TO_KEY(nm->hspid); - else - key = HSP_ID(nm->hspid); - } - - /* - * Get the input name if we're searching by hsp name. Check - * that the input name length is less than MAXPATHLEN. - */ - if ((nm->hspid == MD_HSPID_WILD) && - (nm->hspname_len <= MAXPATHLEN)) { - err = ddi_copyin((caddr_t)(uintptr_t)nm->hspname, - name, (sizeof (char)) * nm->hspname_len, mode); - - /* Stop if ddi_copyin failed. */ - if (err) { - err = EFAULT; - goto out; - } - } - - /* Must have either a valid hspid or a name to continue */ - if ((nm->hspid == MD_HSPID_WILD) && (name[0] == '\0')) { - err = EINVAL; - goto out; - } - - /* - * Try to find the hsp namespace entry corresponds to either - * the given hspid or name. If we can't find it, the hsp maybe - * a pre-friendly name hsp so we'll try to find it in the - * s_hsp array. - */ - if ((nm->hspid == MD_HSPID_WILD) || (HSP_ID_IS_FN(nm->hspid))) { - - if (md_gethspinfo(nm->setno, side, key, nm->drvnm, - &nm->ret_hspid, name) != 0) { - /* - * If we were given a key for a FN hsp and - * couldn't find its entry, simply errored - * out. - */ - if (HSP_ID_IS_FN(nm->hspid)) { - err = ENOENT; - goto out; - } - - /* - * Since md_gethspinfo failed and the hspid is - * not a FN hspid, we must have a name for a - * pre-FN hotspare pool - */ - if (name[0] == '\0') { - err = EINVAL; - goto out; - } - - tmpname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); - if (nm->setno != MD_LOCAL_SET) - setname = mddb_getsetname(nm->setno); - - hsp = (hot_spare_pool_t *)md_set[nm->setno].s_hsp; - while (hsp != NULL) { - /* Only use the pre-friendly name hsp */ - if (!(hsp->hsp_revision & MD_FN_META_DEV)) { - - if (setname != NULL) { - (void) snprintf(tmpname, - MAXPATHLEN, - "%s/hsp%03u", setname, - HSP_ID(hsp->hsp_self_id)); - } else { - (void) snprintf(tmpname, - MAXPATHLEN, "hsp%03u", - HSP_ID(hsp->hsp_self_id)); - } - - if (strcmp(name, tmpname) == 0) - break; - } - - hsp = hsp->hsp_next; - } - kmem_free(tmpname, MAXPATHLEN); - - if (hsp == NULL) { - err = ENOENT; - goto out; - } - - /* Return hsp_self_id */ - nm->ret_hspid = hsp->hsp_self_id; - } - - } else { - /* - * We have a hspid for a pre-FN hotspare pool. Let's - * try to find the matching hsp using the given - * hspid. - */ - if (nm->hspid == MD_HSPID_WILD) { - err = ENOENT; - goto out; - } - - hsp = (hot_spare_pool_t *)md_set[nm->setno].s_hsp; - while (hsp != NULL) { - if (hsp->hsp_self_id == nm->hspid) - break; - hsp = hsp->hsp_next; - } - - if (hsp == NULL) { - err = ENOENT; - goto out; - } - - /* Prepare a name to return */ - if (nm->setno != MD_LOCAL_SET) - setname = mddb_getsetname(nm->setno); - - if (setname != NULL) { - (void) snprintf(name, MAXPATHLEN, "%s/hsp%03u", - setname, HSP_ID(hsp->hsp_self_id)); - } else { - (void) snprintf(name, MAXPATHLEN, "hsp%03u", - HSP_ID(hsp->hsp_self_id)); - } - - nm->ret_hspid = hsp->hsp_self_id; - } - - if (nm->hspid != MD_HSPID_WILD) { - if ((strlen(name) + 1) > nm->hspname_len) { - err = EINVAL; - goto out; - } - err = ddi_copyout(name, (caddr_t) - (uintptr_t)nm->hspname, strlen(name)+1, mode); - } - - if (err) { - if (err < 0) - err = EINVAL; - } - -out: - kmem_free(name, MAXPATHLEN); - return (err); -} - - -/*ARGSUSED*/ -static int -update_loc_namespace_ioctl( - mdnm_params_t *nm, - char *dname, - char *pname, - int mode -) -{ - - side_t side; - - mdclrerror(&nm->mde); - - if (md_snarf_db_set(MD_LOCAL_SET, &nm->mde) != 0) - return (0); - - if (MD_MNSET_SETNO(nm->setno)) - return (0); - - if ((md_get_setstatus(nm->setno) & MD_SET_STALE)) - return (0); - - if ((md_get_setstatus(nm->setno) & MD_SET_SNARFED) == 0) - return (ENODEV); - - if (nm->side == -1) - side = mddb_getsidenum(nm->setno); - else - side = nm->side; - - return (md_update_locator_namespace(nm->setno, side, dname, - pname, nm->devt)); -} - -/*ARGSUSED*/ -static int -update_namespace_did_ioctl( - mdnm_params_t *nm, - int mode -) -{ - side_t side; - - mdclrerror(&nm->mde); - - if (md_snarf_db_set(MD_LOCAL_SET, &nm->mde) != 0) - return (0); - - if (MD_MNSET_SETNO(nm->setno)) - return (0); - - if ((md_get_setstatus(nm->setno) & MD_SET_STALE)) - return (0); - - if ((md_get_setstatus(nm->setno) & MD_SET_SNARFED) == 0) - return (ENODEV); - - if (nm->side == -1) - side = mddb_getsidenum(nm->setno); - else - side = nm->side; - - return (md_update_namespace_did(nm->setno, side, nm->key, &nm->mde)); -} - -/*ARGSUSED*/ -static int -update_namespace_ioctl( - mdnm_params_t *nm, - char *dname, - char *pname, - int mode -) -{ - side_t side; - - mdclrerror(&nm->mde); - - if (md_snarf_db_set(MD_LOCAL_SET, &nm->mde) != 0) - return (0); - - if (MD_MNSET_SETNO(nm->setno)) - return (0); - - if ((md_get_setstatus(nm->setno) & MD_SET_STALE)) - return (0); - - if ((md_get_setstatus(nm->setno) & MD_SET_SNARFED) == 0) - return (ENODEV); - - if (nm->side == -1) - side = mddb_getsidenum(nm->setno); - else - side = nm->side; - - return (md_update_namespace(nm->setno, side, nm->key, - dname, pname, nm->major, nm->mnum)); - -} - -/*ARGSUSED*/ -static int -getnextkey_ioctl( - mdnm_params_t *nm, - int mode -) -{ - side_t side; - - mdclrerror(&nm->mde); - - if (md_snarf_db_set(MD_LOCAL_SET, &nm->mde) != 0) - return (0); - - if (nm->setno >= md_nsets) - return (EINVAL); - - if ((md_get_setstatus(nm->setno) & MD_SET_SNARFED) == 0) - return (ENODEV); - - if (nm->side == -1) - side = mddb_getsidenum(nm->setno); - else - side = nm->side; - - nm->key = md_getnextkey(nm->setno, side, nm->key, &nm->ref_count); - return (0); -} - -/*ARGSUSED*/ -static int -remnm_ioctl(mdnm_params_t *nm, int mode) -{ - side_t side; - - mdclrerror(&nm->mde); - - if (md_snarf_db_set(MD_LOCAL_SET, &nm->mde) != 0) - return (0); - - if ((md_get_setstatus(nm->setno) & MD_SET_SNARFED) == 0) - return (ENODEV); - - if (nm->side == -1) - side = mddb_getsidenum(nm->setno); - else - side = nm->side; - - return (md_remdevname(nm->setno, side, nm->key)); -} - - -/*ARGSUSED*/ -static int -getdrvnm_ioctl(md_dev64_t dev, md_i_driverinfo_t *di, int mode) -{ - mdi_unit_t *ui; - minor_t mnum = di->mnum; - set_t setno = MD_MIN2SET(mnum); - - mdclrerror(&di->mde); - - if (md_snarf_db_set(MD_LOCAL_SET, &di->mde) != 0) - return (0); - - ui = MDI_UNIT(mnum); - if (ui == NULL) { - return (mdmderror(&di->mde, MDE_UNIT_NOT_SETUP, mnum)); - } - - MD_SETDRIVERNAME(di, md_ops[ui->ui_opsindex]->md_driver.md_drivername, - setno); - - return (0); -} - -/*ARGSUSED*/ -static int -getnext_ioctl(md_i_getnext_t *gn, int mode) -{ - int modindex; - md_link_t *next; - uint_t id; - int found = 0; - set_t setno = gn->md_driver.md_setno; - - mdclrerror(&gn->mde); - - if (md_snarf_db_set(MD_LOCAL_SET, &gn->mde) != 0) - return (0); - - if ((md_get_setstatus(setno) & MD_SET_SNARFED) == 0) { - if (md_get_setstatus(setno) & MD_SET_TAGDATA) - return (mdmddberror(&gn->mde, MDE_DB_TAGDATA, - NODEV32, setno)); - else - return (mderror(&gn->mde, MDE_UNIT_NOT_FOUND)); - } - - modindex = md_getmodindex((md_driver_t *)gn, 1, 0); - if (modindex == -1) { - return (mderror(&gn->mde, MDE_UNIT_NOT_FOUND)); - } - - rw_enter(&md_ops[modindex]->md_link_rw.lock, RW_READER); - id = gn->id; - next = md_ops[modindex]->md_head; - while (next) { - if ((next->ln_setno == setno) && (next->ln_id == id)) { - gn->id = id; - found = 1; - break; - } - - if ((next->ln_setno == setno) &&(next->ln_id > id) && - (! found || (next->ln_id < gn->id))) { - gn->id = next->ln_id; - found = 1; - /* continue looking for smallest */ - } - next = next->ln_next; - } - rw_exit(&md_ops[modindex]->md_link_rw.lock); - - if (! found) - return (mderror(&gn->mde, MDE_UNIT_NOT_FOUND)); - - return (0); -} - -/*ARGSUSED*/ -static int -getnum_ioctl(void *d, int mode) -{ - int modindex; - md_link_t *next; - int sz; - minor_t *minors; - minor_t *m_ptr; - set_t setno; - int err = 0; - md_error_t *mdep; - int minor_array_length; - md_driver_t *driver; - int count = 0; - struct md_i_getnum *gn = d; - - - /* number of specified devices in specified set - if 0 return count */ - minor_array_length = gn->size; - if (minor_array_length > md_nunits) - return (EINVAL); - - mdep = &gn->mde; - driver = &gn->md_driver; - setno = driver->md_setno; - - mdclrerror(mdep); - - if (md_snarf_db_set(MD_LOCAL_SET, mdep) != 0) - return (0); - - if ((md_get_setstatus(setno) & MD_SET_SNARFED) == 0) { - if (md_get_setstatus(setno) & MD_SET_TAGDATA) { - return (mdmddberror(mdep, MDE_DB_TAGDATA, - NODEV32, setno)); - } else { - return (mderror(mdep, MDE_UNIT_NOT_FOUND)); - } - } - - modindex = md_getmodindex(driver, 0, 0); - if (modindex == -1) { - - return (mderror(mdep, MDE_UNIT_NOT_FOUND)); - } - - /* if array length is not 0 then allocate the output buffers */ - if (minor_array_length != 0) { - sz = minor_array_length * ((int)sizeof (minor_t)); - minors = kmem_zalloc(sz, KM_SLEEP); - m_ptr = minors; - } - - rw_enter(&md_ops[modindex]->md_link_rw.lock, RW_READER); - next = md_ops[modindex]->md_head; - count = 0; - while (next) { - if (next->ln_setno == setno) { - if ((minor_array_length > 0) && - (count < minor_array_length)) { - *m_ptr = next->ln_id; - m_ptr++; - } - count++; - } - next = next->ln_next; - } - rw_exit(&md_ops[modindex]->md_link_rw.lock); - - gn->size = count; - /* now copy the array back */ - if (minor_array_length > 0) { - err = ddi_copyout(minors, - (caddr_t)(uintptr_t)gn->minors, sz, mode); - kmem_free(minors, sz); - } - - return (err); -} - -/*ARGSUSED*/ -static int -didstat_ioctl( - md_i_didstat_t *ds -) -{ - int cnt = 0; - int err = 0; - - mdclrerror(&ds->mde); - - if (md_snarf_db_set(MD_LOCAL_SET, &ds->mde) != 0) - return (0); - - if (ds->setno >= md_nsets) { - return (EINVAL); - } - - if ((md_get_setstatus(ds->setno) & MD_SET_SNARFED) == 0) - return (ENODEV); - - if (ds->mode == MD_FIND_INVDID) { - cnt = md_validate_devid(ds->setno, ds->side, &ds->maxsz); - if (cnt == -1) - err = -1; - ds->cnt = cnt; - } else if (ds->mode == MD_GET_INVDID) { - if (md_get_invdid(ds->setno, ds->side, ds->cnt, ds->maxsz, - (caddr_t)(uintptr_t)ds->ctdp) == -1) { - err = -1; - } - } else { - /* invalid mode */ - err = EINVAL; - } - - return (err); -} - -/*ARGSUSED*/ -static int -getdid_ioctl( - mdnm_params_t *nm, - int mode -) -{ - int err = 0; - ddi_devid_t did = NULL; - - mdclrerror(&nm->mde); - - if (md_snarf_db_set(MD_LOCAL_SET, &nm->mde) != 0) - return (0); - - if (nm->setno >= md_nsets) { - return (EINVAL); - } - - if ((md_get_setstatus(nm->setno) & MD_SET_SNARFED) == 0) - return (ENODEV); - - /* - * Tell user that replica is not in devid mode - */ - if (!(((mddb_set_t *)md_set[nm->setno].s_db)->s_lbp->lb_flags - & MDDB_DEVID_STYLE) && md_keep_repl_state) { - return (mdsyserror(&nm->mde, MDDB_F_NODEVID)); - } - - /* - * If user is prepared to receive the devid allocate a kernel buffer. - */ - if (nm->devid_size != 0) { - /* check for bogus value of devid_size */ - if (nm->devid_size > MAXPATHLEN) { - return (EINVAL); - } - did = kmem_alloc(nm->devid_size, KM_SLEEP); - } - - err = md_getdevid(nm->setno, nm->side, nm->key, did, &nm->devid_size); - - if (err) { - if (err < 0) - err = EINVAL; - goto out; - } - - /* - * If devid size was already known to user then give them the devid. - */ - if (did != NULL) - err = ddi_copyout(did, - (caddr_t)(uintptr_t)nm->devid, nm->devid_size, mode); - -out: - if (did != NULL) - kmem_free(did, nm->devid_size); - return (err); -} - -int -mddb_setmaster_ioctl(mddb_setmaster_config_t *info) -{ - /* Verify that setno is in valid range */ - if (info->c_setno >= md_nsets) - return (EINVAL); - - /* - * When adding the first disk to a MN diskset, the master - * needs to be set (in order to write out the mddb) - * before the set is snarfed or even before the set - * is marked as a MNset in the md_set structure. - * So, don't check for MNset or SNARFED and don't call - * mddb_setenter. In order to discourage bad ioctl calls, - * verify that magic field in structure is set correctly. - */ - if (info->c_magic != MDDB_SETMASTER_MAGIC) - return (EINVAL); - - if (info->c_current_host_master) - md_set[info->c_setno].s_am_i_master = 1; - else - md_set[info->c_setno].s_am_i_master = 0; - - return (0); -} - -/* - * Set the devid for the namespace record identified by the tuple - * [setno, sideno, key]. The key is the namespace key. The md_getdevnum() - * function is used to actually regenerate the devid. - */ -/*ARGSUSED*/ -static int -setdid_ioctl( - mdnm_params_t *nm, - int mode -) -{ - dev_t devt; - - /* - * If upgrading do not allow modification of the namespace. - */ - if (MD_UPGRADE) - return (EAGAIN); - - mdclrerror(&nm->mde); - - if (md_snarf_db_set(MD_LOCAL_SET, &nm->mde) != 0) - return (0); - - if (nm->setno >= md_nsets) - return (EINVAL); - - if (MD_MNSET_SETNO(nm->setno)) - return (0); - - if ((md_get_setstatus(nm->setno) & MD_SET_SNARFED) == 0) - return (ENODEV); - - devt = md_dev64_to_dev( - md_getdevnum(nm->setno, nm->side, nm->key, MD_TRUST_DEVT)); - - if (devt == NODEV) - return (ENODEV); - - return (0); -} - -/*ARGSUSED*/ -static int -getdidmin_ioctl( - mdnm_params_t *nm, - int mode -) -{ - int err = 0; - char *minorname = NULL; - - mdclrerror(&nm->mde); - - if (md_snarf_db_set(MD_LOCAL_SET, &nm->mde) != 0) - return (0); - - if (nm->setno >= md_nsets) - return (EINVAL); - - if (MD_MNSET_SETNO(nm->setno)) - return (0); - - if ((md_get_setstatus(nm->setno) & MD_SET_SNARFED) == 0) - return (ENODEV); - - minorname = kmem_alloc(MAXPATHLEN, KM_SLEEP); - - if (nm->side == -1) { - err = EINVAL; - goto out; - } - - err = md_getdevidminor(nm->setno, nm->side, nm->key, minorname, - MAXPATHLEN); - - if (err) { - if (err < 0) - err = EINVAL; - goto out; - } - - err = ddi_copyout(minorname, (caddr_t)(uintptr_t)nm->minorname, - strlen(minorname) + 1, mode); - -out: - - kmem_free(minorname, MAXPATHLEN); - return (err); -} - -static int -mddb_userreq_ioctl(mddb_userreq_t *ur, int mode) -{ - void *data; - int status; - mddb_recid_t *recids; - int flags; - - if (ur->ur_setno >= md_nsets) - return (EINVAL); - - mdclrerror(&ur->ur_mde); - - if (md_snarf_db_set(MD_LOCAL_SET, &ur->ur_mde) != 0) - return (0); - - if ((md_get_setstatus(ur->ur_setno) & MD_SET_SNARFED) == 0) - return (ENODEV); - - switch (ur->ur_cmd) { - case MD_DB_GETNEXTREC: - if (ur->ur_recid == 0) - ur->ur_recid = mddb_makerecid(ur->ur_setno, 0); - /* - * Is ur_recid a valid one ? - */ - if (DBSET(ur->ur_recid) < 0 || DBSET(ur->ur_recid) >= md_nsets) - return (EINVAL); - - ur->ur_recid = mddb_getnextrec(ur->ur_recid, ur->ur_type, - ur->ur_type2); - if (ur->ur_recid > 0) { - ur->ur_type = mddb_getrectype1(ur->ur_recid); - ur->ur_type2 = mddb_getrectype2(ur->ur_recid); - ur->ur_recstat = mddb_getrecstatus(ur->ur_recid); - } - break; - - case MD_DB_COMMIT_ONE: - /* - * Is ur_recid a valid one? - */ - if (DBSET(ur->ur_recid) < 0 || DBSET(ur->ur_recid) >= md_nsets) - return (EINVAL); - - ur->ur_recstat = mddb_getrecstatus(ur->ur_recid); - if (ur->ur_recstat == MDDB_NORECORD) - return (ENXIO); - status = mddb_commitrec(ur->ur_recid); - /* - * For MN sets we panic if there are too few database replicas - * and we're attempting to add entries to the log. - */ - if (status != 0) { - if ((MD_MNSET_SETNO(ur->ur_setno) && - (ur->ur_type2 == MDDB_UR_LR)) && - (md_get_setstatus(ur->ur_setno) & MD_SET_TOOFEW)) { - cmn_err(CE_PANIC, - "md: Panic due to lack of DiskSuite state\n" - " database replicas. Fewer than 50%% of " - "the total were available,\n so panic to " - "ensure data integrity."); - } - return (mddbstatus2error(&ur->ur_mde, status, NODEV32, - ur->ur_setno)); - } - break; - - case MD_DB_COMMIT_MANY: - if (ur->ur_size <= 0) - return (EINVAL); - - data = kmem_alloc(ur->ur_size, KM_SLEEP); - - if (ddi_copyin((caddr_t)(uintptr_t)ur->ur_data, data, - (size_t)ur->ur_size, mode)) { - kmem_free(data, ur->ur_size); - return (EFAULT); - } - - recids = (mddb_recid_t *)data; - while (*recids != 0) { - /* - * Is recid a valid ? - */ - if (DBSET(*recids) < 0 || DBSET(*recids) >= md_nsets) { - kmem_free(data, ur->ur_size); - return (EINVAL); - } - ur->ur_recstat = mddb_getrecstatus(*recids++); - if (ur->ur_recstat == MDDB_NORECORD) { - kmem_free(data, ur->ur_size); - return (ENXIO); - } - } - status = mddb_commitrecs(data); - kmem_free(data, ur->ur_size); - /* - * For MN sets we panic if there are too few database replicas - * and we're attempting to add entries to the log. - */ - if (status != 0) { - if ((MD_MNSET_SETNO(ur->ur_setno) && - (ur->ur_type2 == MDDB_UR_LR)) && - (md_get_setstatus(ur->ur_setno) & MD_SET_TOOFEW)) { - cmn_err(CE_PANIC, - "md: Panic due to lack of DiskSuite state\n" - " database replicas. Fewer than 50%% of " - "the total were available,\n so panic to " - "ensure data integrity."); - } - return (mddbstatus2error(&ur->ur_mde, status, NODEV32, - ur->ur_setno)); - } - break; - - case MD_DB_GETDATA: - /* - * Check ur_recid - */ - if (DBSET(ur->ur_recid) < 0 || DBSET(ur->ur_recid) >= md_nsets) - return (EINVAL); - - ur->ur_recstat = mddb_getrecstatus(ur->ur_recid); - if (ur->ur_recstat == MDDB_NORECORD || - ur->ur_recstat == MDDB_NODATA) - return (ENXIO); - - if (ur->ur_size > mddb_getrecsize(ur->ur_recid)) - return (EINVAL); - - data = mddb_getrecaddr(ur->ur_recid); - if (ddi_copyout(data, (caddr_t)(uintptr_t)ur->ur_data, - (size_t)ur->ur_size, mode)) { - return (EFAULT); - } - break; - - case MD_DB_SETDATA: - if (DBSET(ur->ur_recid) < 0 || DBSET(ur->ur_recid) >= md_nsets) - return (EINVAL); - - ur->ur_recstat = mddb_getrecstatus(ur->ur_recid); - if (ur->ur_recstat == MDDB_NORECORD) - return (ENXIO); - - if (ur->ur_size > mddb_getrecsize(ur->ur_recid)) - return (EINVAL); - - data = mddb_getrecaddr(ur->ur_recid); - if (ddi_copyin((caddr_t)(uintptr_t)ur->ur_data, data, - (size_t)ur->ur_size, mode)) { - return (EFAULT); - } - break; - - case MD_DB_DELETE: - if (DBSET(ur->ur_recid) < 0 || DBSET(ur->ur_recid) >= md_nsets) - return (EINVAL); - - ur->ur_recstat = mddb_getrecstatus(ur->ur_recid); - if (ur->ur_recstat == MDDB_NORECORD) - return (ENXIO); - status = mddb_deleterec(ur->ur_recid); - if (status < 0) - return (mddbstatus2error(&ur->ur_mde, status, NODEV32, - ur->ur_setno)); - break; - - case MD_DB_CREATE: - { - int mn_set = 0; - - if (md_get_setstatus(ur->ur_setno) & MD_SET_MNSET) - mn_set = 1; - - if (ur->ur_setno >= md_nsets) - return (EINVAL); - if ((mn_set) && (ur->ur_type2 == MDDB_UR_LR)) - flags = MD_CRO_32BIT | MD_CRO_CHANGELOG; - else - flags = MD_CRO_32BIT; - ur->ur_recid = mddb_createrec(ur->ur_size, ur->ur_type, - ur->ur_type2, flags, ur->ur_setno); - if (ur->ur_recid < 0) - return (mddbstatus2error(&ur->ur_mde, ur->ur_recid, - NODEV32, ur->ur_setno)); - break; - } - - case MD_DB_GETSTATUS: - if (DBSET(ur->ur_recid) < 0 || DBSET(ur->ur_recid) >= md_nsets) - return (EINVAL); - ur->ur_recstat = mddb_getrecstatus(ur->ur_recid); - break; - - case MD_DB_GETSIZE: - if (DBSET(ur->ur_recid) < 0 || DBSET(ur->ur_recid) >= md_nsets) - return (EINVAL); - ur->ur_size = mddb_getrecsize(ur->ur_recid); - break; - - case MD_DB_MAKEID: - if (ur->ur_setno >= md_nsets) - return (EINVAL); - ur->ur_recid = mddb_makerecid(ur->ur_setno, ur->ur_recid); - break; - - default: - return (EINVAL); - } - return (0); -} - -static int -setuserflags( - md_set_userflags_t *msu, - IOLOCK *lock -) -{ - minor_t mnum = msu->mnum; - set_t setno = MD_MIN2SET(mnum); - md_unit_t *un; - mdi_unit_t *ui; - - mdclrerror(&msu->mde); - - if (md_get_setstatus(setno) & MD_SET_STALE) - return (mdmddberror(&msu->mde, MDE_DB_STALE, mnum, setno)); - - if ((ui = MDI_UNIT(mnum)) == NULL) { - return (mdmderror(&msu->mde, MDE_UNIT_NOT_SETUP, mnum)); - } - - un = (md_unit_t *)md_ioctl_writerlock(lock, ui); - - un->c.un_user_flags = msu->userflags; - mddb_commitrec_wrapper(un->c.un_record_id); - - return (0); -} - -/* - * mddb_didstat_from_user -- called for DIDSTAT ioctl. 2 different calling - * scenarios. - * 1) data->mode == MD_FIND_INVDID - * when user is inquiring about the existence of invalid device id's. - * Upon return to the user d->cnt may have a value in it. - * 2) data->mode == MD_GET_INVDID - * when the user wants a list of the invalid device id's. - * In this case d->ctdp is non Null and cnt has a value in it. - * - * Basically this routine along with mddb_didstat_to_user can be eliminated - * by pushing ddi_copyout down to lower level interfaces. To minimize impact - * just keep the current implementation intact. - */ -static int -mddb_didstat_from_user( - void **d, - caddr_t data, - int mode, - caddr_t *ds_ctd_addr -) -{ - size_t sz1 = 0, sz2 = 0; - md_i_didstat_t *d1; - void *d2; - *ds_ctd_addr = 0; - - sz1 = sizeof (md_i_didstat_t); - d1 = (md_i_didstat_t *)kmem_zalloc(sz1, KM_SLEEP); - - if (ddi_copyin(data, (void *)d1, sz1, mode) != 0) { - kmem_free((void *)d1, sz1); - return (EFAULT); - } - - /* - * ds_ctd_addr has actual user ctdp - */ - *ds_ctd_addr = (caddr_t)(uintptr_t)d1->ctdp; - if (d1->mode == MD_GET_INVDID) { - sz2 = (d1->cnt * d1->maxsz) + 1; - if (sz2 <= 0) { - kmem_free(d1, sz1); - return (EINVAL); - } - d2 = kmem_zalloc(sz2, KM_SLEEP); - d1->ctdp = (uint64_t)(uintptr_t)d2; - } else if (d1->mode != MD_FIND_INVDID) { - kmem_free(d1, sz1); - return (EINVAL); - } - *d = (void *)d1; - return (0); -} - -/* - * mddb_didstat_to_user -- see comment for mddb_didstat_from_user. In this - * case d->cnt could have a value in it for either usage of - * the ioctl. - */ -/*ARGSUSED*/ -static int -mddb_didstat_to_user( - void *d, - caddr_t data, - int mode, - caddr_t ds_ctd_addr -) -{ - size_t sz1 = 0, sz2 = 0; - md_i_didstat_t *d1; - void *d2; - - - d1 = (md_i_didstat_t *)d; - sz1 = sizeof (md_i_didstat_t); - - sz2 = (d1->cnt * d1->maxsz) + 1; - d2 = (caddr_t)(uintptr_t)d1->ctdp; - if (d2 && sz2) { - /* - * Copy out from kernel ctdp to user ctdp area - */ - if (ddi_copyout(d2, (caddr_t)ds_ctd_addr, sz2, mode) != 0) { - kmem_free(d1, sz1); - kmem_free(d2, sz2); - return (EFAULT); - } - d1->ctdp = (uint64_t)(uintptr_t)ds_ctd_addr; - } - if (ddi_copyout(d1, data, sz1, mode) != 0) { - kmem_free(d1, sz1); - if (sz2 && d2) - kmem_free(d2, sz2); - return (EFAULT); - } - kmem_free(d1, sz1); - if (sz2 && d2) - kmem_free(d2, sz2); - return (0); -} - - -static int -mddb_config_from_user( - void **d, - caddr_t data, - int mode, - caddr_t *c_devid_addr, - caddr_t *c_old_devid_addr -) -{ - size_t sz1 = 0, sz2 = 0, sz3 = 0; - mddb_config_t *d1; - void *d2; - void *d3; - - *c_devid_addr = 0; - - sz1 = sizeof (mddb_config_t); - d1 = (mddb_config_t *)kmem_zalloc(sz1, KM_SLEEP); - - if (ddi_copyin(data, (void *)d1, sz1, mode) != 0) { - kmem_free((void *)d1, sz1); - return (EFAULT); - } - *c_devid_addr = (caddr_t)(uintptr_t)d1->c_locator.l_devid; - - if (d1->c_locator.l_devid_flags & MDDB_DEVID_SPACE) { - sz2 = d1->c_locator.l_devid_sz; - if (d1->c_locator.l_devid_sz <= 0 || - d1->c_locator.l_devid_sz > MAXPATHLEN) { - kmem_free((void *)d1, sz1); - return (EINVAL); - } - d2 = kmem_zalloc(sz2, KM_SLEEP); - if (ddi_copyin((caddr_t)(uintptr_t)d1->c_locator.l_devid, - d2, sz2, mode) != 0) { - kmem_free(d1, sz1); - kmem_free(d2, sz2); - return (EFAULT); - } - d1->c_locator.l_devid = (uint64_t)(uintptr_t)d2; - - if ((caddr_t)(uintptr_t)d1->c_locator.l_old_devid) { - *c_old_devid_addr = (caddr_t)(uintptr_t) - d1->c_locator.l_old_devid; - - sz3 = d1->c_locator.l_old_devid_sz; - if (d1->c_locator.l_old_devid_sz <= 0 || - d1->c_locator.l_old_devid_sz > MAXPATHLEN) { - kmem_free((void *)d1, sz1); - kmem_free(d2, sz2); - return (EINVAL); - } - d3 = kmem_zalloc(sz3, KM_SLEEP); - if (ddi_copyin( - (caddr_t)(uintptr_t)d1->c_locator.l_old_devid, - d3, sz3, mode) != 0) { - kmem_free((void *)d1, sz1); - kmem_free(d2, sz2); - kmem_free(d3, sz3); - return (EFAULT); - } - d1->c_locator.l_old_devid = (uintptr_t)d3; - } - } else { - d1->c_locator.l_devid = (uint64_t)0; - d1->c_locator.l_old_devid = (uint64_t)0; - } - - *d = (void *)d1; - return (0); -} - -/*ARGSUSED*/ -static int -mddb_config_to_user( - void *d, - caddr_t data, - int mode, - caddr_t c_devid_addr, - caddr_t c_old_devid_addr -) -{ - size_t sz1 = 0, sz2 = 0, sz3 = 0; - mddb_config_t *d1; - void *d2; - void *d3; - - d1 = (mddb_config_t *)d; - sz1 = sizeof (mddb_config_t); - - if (d1->c_locator.l_devid_flags & MDDB_DEVID_SPACE) { - sz2 = d1->c_locator.l_devid_sz; - d2 = (caddr_t)(uintptr_t)d1->c_locator.l_devid; - /* Only copyout devid if valid */ - if (d1->c_locator.l_devid_flags & MDDB_DEVID_VALID) { - if (ddi_copyout(d2, (caddr_t)c_devid_addr, - sz2, mode) != 0) { - kmem_free(d1, sz1); - kmem_free(d2, sz2); - return (EFAULT); - } - } - } - - d1->c_locator.l_devid = (uint64_t)(uintptr_t)c_devid_addr; - - if (d1->c_locator.l_old_devid) { - sz3 = d1->c_locator.l_old_devid_sz; - d3 = (caddr_t)(uintptr_t)d1->c_locator.l_old_devid; - if (ddi_copyout(d3, (caddr_t)c_old_devid_addr, - sz3, mode) != 0) { - kmem_free(d1, sz1); - kmem_free(d2, sz2); - kmem_free(d3, sz3); - } - } - d1->c_locator.l_old_devid = (uintptr_t)c_old_devid_addr; - - if (ddi_copyout(d1, data, sz1, mode) != 0) { - kmem_free(d1, sz1); - if (sz2) - kmem_free(d2, sz2); - if (sz3) - kmem_free(d3, sz3); - return (EFAULT); - } - - if (d1) - kmem_free(d1, sz1); - if (sz2) - kmem_free(d2, sz2); - if (sz3) - kmem_free(d3, sz3); - - return (0); -} - -/* - * NAME: get_tstate - * PURPOSE: Return unit's transient error state to user. - * INPUT: device node (set + metadevice number) - * OUTPUT: gu->tstate - * RETURNS: 0 on success - * EINVAL on failure - */ -static int -get_tstate(md_i_get_tstate_t *gu, IOLOCK *lock) -{ - mdi_unit_t *ui; - - ui = MDI_UNIT(gu->id); - if (ui == (mdi_unit_t *)NULL) { - (void) mdmderror(&gu->mde, MDE_UNIT_NOT_SETUP, gu->id); - return (EINVAL); - } - - (void) md_ioctl_readerlock(lock, ui); - gu->tstate = ui->ui_tstate; - md_ioctl_readerexit(lock); - - return (0); -} - -/* - * NAME: md_clu_ioctl - * PURPOSE: depending on clu_cmd: - * - Check open state, - * - lock opens and check open state - * - unlock opens again - * INPUT: metadevice and clu_cmd - * OUTPUT: open state (for MD_MN_LCU_UNLOCK always 0) - * RETURNS: 0 on success - * EINVAL on failure - */ -int -md_clu_ioctl(md_clu_open_t *clu) -{ - mdi_unit_t *ui; - minor_t mnum; - - if ((clu->clu_dev <= 0) || - (md_getmajor(clu->clu_dev)) != md_major) { - return (EINVAL); - } - - mnum = md_getminor(clu->clu_dev); - if ((ui = MDI_UNIT(mnum)) == NULL) { - return (mdmderror(&clu->clu_mde, MDE_UNIT_NOT_SETUP, mnum)); - } - - switch (clu->clu_cmd) { - case MD_MN_LCU_CHECK: - /* No lock here, just checking */ - clu->clu_isopen = md_unit_isopen(ui); - break; - case MD_MN_LCU_LOCK: - /* This inhibits later opens to succeed */ - ui->ui_tstate |= MD_OPENLOCKED; - clu->clu_isopen = md_unit_isopen(ui); - /* In case the md is opened, reset the lock immediately */ - if (clu->clu_isopen != 0) { - ui->ui_tstate &= ~MD_OPENLOCKED; - } - break; - case MD_MN_LCU_UNLOCK: - ui->ui_tstate &= ~MD_OPENLOCKED; - clu->clu_isopen = 0; /* always sucess */ - break; - } - return (0); -} - -/* - * NAME: mkdev_ioctl - * PURPOSE: Create device node for specified set / metadevice tuple - * INPUT: device tuple (set number + metadevice number) - * OUTPUT: None - * RETURNS: 0 on success - * EINVAL on failure - */ -static int -mkdev_ioctl(md_mkdev_params_t *p) -{ - set_t setno = p->md_driver.md_setno; - unit_t un; - - mdclrerror(&p->mde); - - /* Validate arguments passed in to ioctl */ - if (setno >= MD_MAXSETS) { - (void) mderror(&p->mde, MDE_NO_SET); - return (EINVAL); - } - - /* - * Get the next available unit number in this set - */ - un = md_get_nextunit(setno); - if (un == MD_UNITBAD) { - (void) mdmderror(&p->mde, MDE_UNIT_NOT_SETUP, un); - return (ENODEV); - } - - /* Create the device node */ - if (md_create_minor_node(setno, un)) { - (void) mdmderror(&p->mde, MDE_UNIT_NOT_SETUP, un); - return (ENODEV); - } - - /* Return the minor number */ - p->un = un; - - return (0); -} - -/* - * admin device ioctls - */ -static int -md_base_ioctl(md_dev64_t dev, int cmd, caddr_t data, int mode, IOLOCK *lockp) -{ - size_t sz = 0; - void *d = NULL; - mddb_config_t *cp; - set_t setno; - int err = 0; - int err_to_user = 0; - int mddb_config_case = 0; - int mddb_didstat_case = 0; - caddr_t c_devid_addr = 0; - caddr_t c_old_devid_addr = 0; - caddr_t ds_ctd_addr = 0; - mddb_set_node_params_t *snp; - - /* For now we can only handle 32-bit clients for internal commands */ - if ((cmd != DKIOCINFO) && - ((mode & DATAMODEL_MASK) != DATAMODEL_ILP32)) { - return (EINVAL); - } - - switch (cmd) { - - case DKIOCINFO: - { - if (! (mode & FREAD)) - return (EACCES); - - sz = sizeof (struct dk_cinfo); - d = kmem_alloc(sz, KM_SLEEP); - - get_info((struct dk_cinfo *)d, md_getminor(dev)); - break; - } - - case MD_DB_USEDEV: - { - if (! (mode & FWRITE)) - return (EACCES); - - mddb_config_case = 1; - - err = mddb_config_from_user(&d, data, mode, &c_devid_addr, - &c_old_devid_addr); - - if (err) - return (err); - - err = mddb_configure(MDDB_USEDEV, (mddb_config_t *)d); - break; - } - - case MD_DB_GETDEV: - { - if (! (mode & FREAD)) - return (EACCES); - - mddb_config_case = 1; - - err = mddb_config_from_user(&d, data, mode, &c_devid_addr, - &c_old_devid_addr); - - if (err) - return (err); - - err = mddb_configure(MDDB_GETDEV, (mddb_config_t *)d); - break; - } - - case MD_DB_GETDRVNM: - { - if (! (mode & FREAD)) - return (EACCES); - - mddb_config_case = 1; - - err = mddb_config_from_user(&d, data, mode, &c_devid_addr, - &c_old_devid_addr); - - if (err) - return (err); - - err = mddb_configure(MDDB_GETDRVRNAME, (mddb_config_t *)d); - break; - } - - case MD_DB_ENDDEV: - { - if (! (mode & FREAD)) - return (EACCES); - - mddb_config_case = 1; - - err = mddb_config_from_user(&d, data, mode, &c_devid_addr, - &c_old_devid_addr); - - if (err) - return (err); - - err = mddb_configure(MDDB_ENDDEV, (mddb_config_t *)d); - break; - } - - case MD_DB_DELDEV: - { - if (! (mode & FWRITE)) - return (EACCES); - - mddb_config_case = 1; - - err = mddb_config_from_user(&d, data, mode, &c_devid_addr, - &c_old_devid_addr); - - if (err) - return (err); - - cp = (mddb_config_t *)d; - setno = cp->c_setno; - err = mddb_configure(MDDB_DELDEV, cp); - if (! mdisok(&cp->c_mde)) - break; - - if (setno == MD_LOCAL_SET) - break; - - if (cp->c_dbcnt != 0) - break; - - /* - * if the last db replica of a diskset is deleted - * unload everything. - */ - - /* Requesting a release, clean up everything */ - md_clr_setstatus(setno, MD_SET_KEEPTAG); - - err = release_set(cp, mode); - - break; - } - - case MD_DB_NEWDEV: - { - if (! (mode & FWRITE)) - return (EACCES); - - mddb_config_case = 1; - - err = mddb_config_from_user(&d, data, mode, &c_devid_addr, - &c_old_devid_addr); - - if (err) - return (err); - - cp = (mddb_config_t *)d; - setno = cp->c_setno; - err = mddb_configure(MDDB_NEWDEV, cp); - if (! err && mdisok(&cp->c_mde)) - (void) md_snarf_db_set(setno, &cp->c_mde); - break; - } - - case MD_DB_NEWSIDE: - { - if (! (mode & FWRITE)) - return (EACCES); - - mddb_config_case = 1; - - err = mddb_config_from_user(&d, data, mode, &c_devid_addr, - &c_old_devid_addr); - - if (err) - return (err); - - err = mddb_configure(MDDB_NEWSIDE, (mddb_config_t *)d); - break; - } - - case MD_DB_DELSIDE: - { - if (! (mode & FWRITE)) - return (EACCES); - - mddb_config_case = 1; - - err = mddb_config_from_user(&d, data, mode, &c_devid_addr, - &c_old_devid_addr); - - if (err) - return (err); - - err = mddb_configure(MDDB_DELSIDE, (mddb_config_t *)d); - break; - } - - case MD_DB_SETDID: - { - if (!(mode & FWRITE)) { - return (EACCES); - } - - mddb_config_case = 1; - - err = mddb_config_from_user(&d, data, mode, &c_devid_addr, - &c_old_devid_addr); - - if (err) { - return (err); - } - - err = mddb_configure(MDDB_SETDID, (mddb_config_t *)d); - - break; - } - - case MD_GRAB_SET: - { - if (! (mode & FWRITE)) - return (EACCES); - - mddb_config_case = 1; - - err = mddb_config_from_user(&d, data, mode, &c_devid_addr, - &c_old_devid_addr); - - if (err) - return (err); - - cp = (mddb_config_t *)d; - setno = cp->c_setno; - - err = take_set(cp, mode); - - if (err || ! mdisok(&cp->c_mde)) - break; - - if (md_get_setstatus(setno) & MD_SET_ACCOK) - err = mdmddberror(&cp->c_mde, MDE_DB_ACCOK, NODEV32, - setno); - - md_unblock_setio(setno); - break; - } - - case MD_RELEASE_SET: - { - if (! (mode & FWRITE)) - return (EACCES); - - mddb_config_case = 1; - - err = mddb_config_from_user(&d, data, mode, &c_devid_addr, - &c_old_devid_addr); - - if (err) - return (err); - - /* shorthand */ - cp = (mddb_config_t *)d; - setno = cp->c_setno; - - /* If the user requests a release, clean up everything */ - md_clr_setstatus(setno, MD_SET_KEEPTAG); - - /* Block incoming I/Os during release_set operation */ - if (MD_MNSET_SETNO(setno)) { - /* - * md_tas_block_setio will block the set if - * there are no outstanding I/O requests, - * otherwise it returns -1. - */ - if (md_tas_block_setio(setno) != 1) { - err = EBUSY; - break; - } - } else { - /* - * Should not return something other than 1 - */ - if (md_block_setio(setno) != 1) { - md_clearblock_setio(setno); - err = EACCES; - break; - } - } - - err = release_set(cp, mode); - - /* Always unblock I/O even if release_set fails */ - md_clearblock_setio(setno); - - break; - } - - case MD_DB_GETOPTLOC: - { - if (! (mode & FREAD)) - return (EACCES); - - sz = sizeof (mddb_optloc_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode) != 0) { - err = EFAULT; - break; - } - - err = mddb_getoptloc((mddb_optloc_t *)d); - break; - } - - case MD_HALT: - { - if (! (mode & FWRITE)) - return (EACCES); - - /* already have the ioctl lock */ - return (md_halt(MD_GBL_IOCTL_LOCK)); - } - - case MD_IOCSET_NM: - { - if (! (mode & FREAD)) - return (EACCES); - - sz = sizeof (mdnm_params_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode) != 0) { - err = EFAULT; - break; - } - - /* check data integrity */ - if (((mdnm_params_t *)d)->setno >= md_nsets) { - err = EINVAL; - break; - } - - if ((((mdnm_params_t *)d)->devname_len == 0) || - (((mdnm_params_t *)d)->devname_len > MAXPATHLEN)) { - err = EINVAL; - break; - } - - if (((mdnm_params_t *)d)->devname == NULL) { - err = EINVAL; - break; - } - - err = setnm_ioctl((mdnm_params_t *)d, mode); - break; - } - - case MD_IOCGET_NM: - { - if (! (mode & FREAD)) - return (EACCES); - - sz = sizeof (mdnm_params_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode) != 0) { - err = EFAULT; - break; - } - - /* check data integrity */ - if (((mdnm_params_t *)d)->setno >= md_nsets) { - err = EINVAL; - break; - } - if (((mdnm_params_t *)d)->devname == NULL) { - err = EINVAL; - break; - } - - err = getnm_ioctl((mdnm_params_t *)d, mode); - break; - } - - case MD_IOCGET_HSP_NM: - { - if (! (mode & FREAD)) - return (EACCES); - - sz = sizeof (mdhspnm_params_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode) != 0) { - err = EFAULT; - break; - } - - /* check data integrity */ - if (((mdhspnm_params_t *)d)->setno >= md_nsets) { - err = EINVAL; - break; - } - if (((mdhspnm_params_t *)d)->hspname == NULL) { - err = EINVAL; - break; - } - - err = gethspnm_ioctl((mdhspnm_params_t *)d, mode); - break; - } - - case MD_IOCNXTKEY_NM: - { - if (! (mode & FREAD)) - return (EACCES); - - sz = sizeof (mdnm_params_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode) != 0) { - err = EFAULT; - break; - } - - err = getnextkey_ioctl((mdnm_params_t *)d, mode); - break; - } - - case MD_IOCREM_NM: - { - if (! (mode & FREAD)) - return (EACCES); - - sz = sizeof (mdnm_params_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode) != 0) { - err = EFAULT; - break; - } - - /* check data integrity */ - if (((mdnm_params_t *)d)->setno >= md_nsets) { - err = EINVAL; - break; - } - - err = remnm_ioctl((mdnm_params_t *)d, mode); - break; - } - - case MD_IOCGET_TSTATE: - { - md_i_get_tstate_t *p; - - if (! (mode & FREAD)) - return (EACCES); - - sz = sizeof (md_i_get_tstate_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode) != 0) { - err = EFAULT; - break; - } - - p = (md_i_get_tstate_t *)d; - - if ((err = verify_minor(p->id)) != 0) { - if (err == EINVAL) - (void) mdmderror(&p->mde, MDE_INVAL_UNIT, - p->id); - break; - } - - err = get_tstate(p, lockp); - break; - } - - case MD_IOCGET_DRVNM: - { - md_i_driverinfo_t *p; - - if (! (mode & FREAD)) - return (EACCES); - - sz = sizeof (md_i_driverinfo_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode) != 0) { - err = EFAULT; - break; - } - - p = (md_i_driverinfo_t *)d; - - /* check data integrity */ - if (p->md_driver.md_drivername == NULL) { - err = EINVAL; - break; - } - - if ((err = verify_minor(p->mnum)) != 0) { - if (err == EINVAL) - (void) mdmderror(&p->mde, MDE_INVAL_UNIT, - p->mnum); - break; - } - - err = getdrvnm_ioctl(dev, p, mode); - break; - } - - case MD_IOCGET_NEXT: - { - if (! (mode & FREAD)) - return (EACCES); - - sz = sizeof (md_i_getnext_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode) != 0) { - err = EFAULT; - break; - } - - /* check data integrity */ - if (((md_i_getnext_t *)d)->md_driver.md_setno >= md_nsets) { - err = EINVAL; - break; - } - - err = getnext_ioctl((md_i_getnext_t *)d, mode); - break; - } - - case MD_DB_USERREQ: - case MD_MN_DB_USERREQ: - { - if (! (mode & FREAD)) - return (EACCES); - - sz = sizeof (mddb_userreq_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode) != 0) { - err = EFAULT; - break; - } - err = mddb_userreq_ioctl((mddb_userreq_t *)d, mode); - break; - } - - case MD_IOCGET_NUM: - { - if (! (mode & FREAD)) - return (EACCES); - - sz = sizeof (md_i_getnum_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode) != 0) { - err = EFAULT; - break; - } - - err = getnum_ioctl(d, mode); - break; - } - - case MD_DB_OWNSET: - { - if (! (mode & FREAD)) - return (EACCES); - - sz = sizeof (mddb_ownset_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode) != 0) { - err = EFAULT; - break; - } - - if (((mddb_ownset_t *)d)->setno >= md_nsets) { - err = EINVAL; - break; - } - - ((mddb_ownset_t *)d)->owns_set = - mddb_ownset(((mddb_ownset_t *)d)->setno); - - break; - } - - case MD_IOCGETNSET: - { - if (! (mode & FREAD)) - return (EACCES); - - if (ddi_copyout((caddr_t)&md_nsets, data, - sizeof (set_t), mode) != 0) { - err = EFAULT; - break; - } - break; - } - - case MD_IOCGETNUNITS: - { - if (! (mode & FREAD)) - return (EACCES); - - if (ddi_copyout((caddr_t)&md_nunits, data, - sizeof (set_t), mode) != 0) { - err = EFAULT; - break; - } - break; - } - - case MD_IOCGVERSION: - { - uint_t dversion = MD_DVERSION; - - if (! (mode & FREAD)) - return (EACCES); - - if (ddi_copyout((caddr_t)&dversion, data, - sizeof (dversion), mode) != 0) { - err = EFAULT; - break; - } - break; - } - - case MD_IOCSET_FLAGS: - { - md_set_userflags_t *p; - - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (md_set_userflags_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - p = (md_set_userflags_t *)d; - - if ((err = verify_minor(p->mnum)) != 0) { - if (err == EINVAL) - (void) mdmderror(&p->mde, MDE_INVAL_UNIT, - p->mnum); - break; - } - - err = setuserflags(p, lockp); - break; - } - - case MD_IOCRENAME: - { - md_rename_t *p; - - if (! (mode & FWRITE)) { - return (EACCES); - } - - sz = sizeof (md_rename_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - p = (md_rename_t *)d; - - if ((err = verify_minor(p->to.mnum)) != 0) { - if (err == EINVAL) - (void) mdmderror(&p->mde, MDE_INVAL_UNIT, - p->to.mnum); - break; - } - - if ((err = verify_minor(p->from.mnum)) != 0) { - if (err == EINVAL) - (void) mdmderror(&p->mde, MDE_INVAL_UNIT, - p->from.mnum); - break; - } - - err = md_rename(p, lockp); - break; - } - - case MD_IOCISOPEN: - { - md_isopen_t *p; - mdi_unit_t *ui; - minor_t mnum; - - if (! (mode & FREAD)) - return (EACCES); - - sz = sizeof (md_isopen_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - p = (md_isopen_t *)d; - if ((p->dev <= 0) || (md_getmajor(p->dev)) != md_major) { - err = EINVAL; - break; - } - - mnum = md_getminor(p->dev); - - if ((err = verify_minor(mnum)) != 0) { - if (err == EINVAL) - (void) mdmderror(&p->mde, MDE_INVAL_UNIT, mnum); - break; - } - - if ((ui = MDI_UNIT(mnum)) == NULL) { - /* - * If the incore unit does not exist then rather - * than set err we need to set it to 0 because the - * multi-node code is expecting a return of - * 0 (from mdmderror() but with the mde structure - * filled with particular information - * (MDE_UNIT_NOT_SETUP). - */ - err = mdmderror(&p->mde, MDE_UNIT_NOT_SETUP, mnum); - break; - } - - p->isopen = md_unit_isopen(ui); - break; - } - - case MD_MED_GET_LST: - { - mddb_med_parm_t *medpp; - - if (! (mode & FREAD)) - return (EACCES); - - sz = sizeof (mddb_med_parm_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode) != 0) { - err = EFAULT; - break; - } - - medpp = (mddb_med_parm_t *)d; - - err = getmed_ioctl(medpp, mode); - break; - } - - case MD_MED_SET_LST: - { - mddb_med_parm_t *medpp; - - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (mddb_med_parm_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode) != 0) { - err = EFAULT; - break; - } - - medpp = (mddb_med_parm_t *)d; - - err = setmed_ioctl(medpp, mode); - - break; - } - - case MD_MED_UPD_MED: - { - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (mddb_med_upd_parm_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode) != 0) { - err = EFAULT; - break; - } - - err = updmed_ioctl((mddb_med_upd_parm_t *)d, mode); - - break; - } - - case MD_MED_GET_NMED: - { - if (! (mode & FREAD)) - return (EACCES); - - if (ddi_copyout((caddr_t)&md_nmedh, data, - sizeof (int), mode) != 0) { - err = EFAULT; - break; - } - break; - } - - case MD_MED_GET_TAG: - { - if (! (mode & FREAD)) - return (EACCES); - - sz = sizeof (mddb_dtag_get_parm_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode) != 0) { - err = EFAULT; - break; - } - - err = gettag_ioctl((mddb_dtag_get_parm_t *)d, mode); - - break; - } - - case MD_MED_USE_TAG: - { - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (mddb_dtag_use_parm_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode) != 0) { - err = EFAULT; - break; - } - - err = usetag_ioctl((mddb_dtag_use_parm_t *)d, mode); - - break; - } - - case MD_MED_ACCEPT: - { - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (mddb_accept_parm_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode) != 0) { - err = EFAULT; - break; - } - - err = accept_ioctl((mddb_accept_parm_t *)d, mode); - - break; - } - - case MD_MED_GET_TLEN: - { - if (! (mode & FREAD)) - return (EACCES); - - sz = sizeof (mddb_med_t_parm_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode) != 0) { - err = EFAULT; - break; - } - - err = med_get_t_size_ioctl((mddb_med_t_parm_t *)d, mode); - - break; - } - - case MD_MED_GET_T: - { - if (! (mode & FREAD)) - return (EACCES); - - sz = (sizeof (mddb_med_t_parm_t) - sizeof (mddb_med_t_ent_t)) + - (sizeof (mddb_med_t_ent_t) * med_addr_tab_nents); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode) != 0) { - err = EFAULT; - break; - } - - err = med_get_t_ioctl((mddb_med_t_parm_t *)d, mode); - - break; - } - - case MD_MED_SET_T: - { - if (! (mode & FWRITE)) - return (EACCES); - - sz = (sizeof (mddb_med_t_parm_t) - sizeof (mddb_med_t_ent_t)) + - (sizeof (mddb_med_t_ent_t) * med_addr_tab_nents); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode) != 0) { - err = EFAULT; - break; - } - - err = med_set_t_ioctl((mddb_med_t_parm_t *)d, mode); - - break; - } - - case MD_GET_SETSTAT: - { - md_gs_stat_parm_t *gsp; - - if (! (mode & FREAD)) - return (EACCES); - - sz = sizeof (md_gs_stat_parm_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode) != 0) { - err = EFAULT; - break; - } - - gsp = (md_gs_stat_parm_t *)d; - - if (gsp->gs_setno > (md_nsets - 1)) { - err = EINVAL; - break; - } - - gsp->gs_status = md_set[gsp->gs_setno].s_status; - - break; - } - - case MD_SETNMDID: - { - if (!(mode & FREAD)) - return (EACCES); - - sz = sizeof (mdnm_params_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode) != 0) { - err = EFAULT; - break; - } - - err = update_namespace_did_ioctl((mdnm_params_t *)d, mode); - break; - - } - case MD_IOCUPD_NM: - { - char *dname; - char *pname; - uint_t devnamelen, pathnamelen; - - if (!(mode & FREAD)) - return (EACCES); - - sz = sizeof (mdnm_params_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode) != 0) { - err = EFAULT; - break; - } - - devnamelen = ((mdnm_params_t *)d)->devname_len; - pathnamelen = ((mdnm_params_t *)d)->pathname_len; - - if ((devnamelen > MAXPATHLEN) || (pathnamelen > MAXPATHLEN) || - (devnamelen == 0) || (pathnamelen == 0)) { - kmem_free(d, sz); - return (EINVAL); - } - - /* alloc memory for devname */ - dname = kmem_alloc(devnamelen + 1, KM_SLEEP); - - if (ddi_copyin( - (void *)(uintptr_t)((mdnm_params_t *)d)->devname, - (void *)dname, devnamelen + 1, mode) != 0) { - err = EFAULT; - kmem_free(dname, devnamelen + 1); - break; - } - - pname = kmem_alloc(pathnamelen + 1, KM_SLEEP); - - if (ddi_copyin( - (void *)(uintptr_t)((mdnm_params_t *)d)->pathname, - (void *)pname, pathnamelen + 1, mode) != 0) { - err = EFAULT; - kmem_free(dname, devnamelen + 1); - kmem_free(pname, pathnamelen + 1); - break; - } - - err = update_namespace_ioctl((mdnm_params_t *)d, dname, pname, - mode); - - kmem_free(dname, devnamelen + 1); - kmem_free(pname, pathnamelen + 1); - break; - } - - case MD_IOCUPD_LOCNM: - { - char *dname; - char *pname; - uint_t devnamelen, pathnamelen; - - if (!(mode & FREAD)) - return (EACCES); - - sz = sizeof (mdnm_params_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode) != 0) { - err = EFAULT; - break; - } - - devnamelen = ((mdnm_params_t *)d)->devname_len; - pathnamelen = ((mdnm_params_t *)d)->pathname_len; - - if ((devnamelen > MAXPATHLEN) || (pathnamelen > MAXPATHLEN) || - (devnamelen == 0) || (pathnamelen == 0)) { - kmem_free(d, sz); - return (EINVAL); - } - - /* alloc memory for devname */ - dname = kmem_alloc(devnamelen + 1, KM_SLEEP); - - if (ddi_copyin( - (void *)(uintptr_t)((mdnm_params_t *)d)->devname, - (void *)dname, devnamelen + 1, mode) != 0) { - err = EFAULT; - kmem_free(dname, devnamelen + 1); - break; - } - - pname = kmem_alloc(pathnamelen + 1, KM_SLEEP); - - if (ddi_copyin( - (void *)(uintptr_t)((mdnm_params_t *)d)->pathname, - (void *)pname, pathnamelen + 1, mode) != 0) { - err = EFAULT; - kmem_free(dname, devnamelen + 1); - kmem_free(pname, pathnamelen + 1); - break; - } - - err = update_loc_namespace_ioctl((mdnm_params_t *)d, dname, - pname, mode); - - kmem_free(dname, devnamelen + 1); - kmem_free(pname, pathnamelen + 1); - break; - } - - case MD_SET_SETSTAT: - { -#ifdef DEBUG - /* Can be used to set the s_status flags from user code */ - md_gs_stat_parm_t *gsp; - - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (md_gs_stat_parm_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode) != 0) { - err = EFAULT; - break; - } - - gsp = (md_gs_stat_parm_t *)d; - - if (gsp->gs_setno > (md_nsets - 1)) { - err = EINVAL; - break; - } - - md_set[gsp->gs_setno].s_status = gsp->gs_status; - -#endif /* DEBUG */ - break; - } - - case MD_IOCGET_DID: - { - if (! (mode & FREAD)) - return (EACCES); - - sz = sizeof (mdnm_params_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode) != 0) { - err = EFAULT; - break; - } - - err = getdid_ioctl((mdnm_params_t *)d, mode); - break; - } - - case MD_IOCSET_DID: - { - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (mdnm_params_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode) != 0) { - err = EFAULT; - break; - } - - err = setdid_ioctl((mdnm_params_t *)d, mode); - break; - } - - case MD_IOCGET_DIDMIN: - { - if (! (mode & FREAD)) - return (EACCES); - - sz = sizeof (mdnm_params_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode) != 0) { - err = EFAULT; - break; - } - - if (((mdnm_params_t *)d)->setno >= md_nsets) { - err = EINVAL; - break; - } - - err = getdidmin_ioctl((mdnm_params_t *)d, mode); - break; - } - - case MD_IOCDID_STAT: - { - if (!(mode & FREAD)) - return (EACCES); - - mddb_didstat_case = 1; - - err = mddb_didstat_from_user(&d, data, mode, &ds_ctd_addr); - - if (err) { - return (err); - } - - err = didstat_ioctl((md_i_didstat_t *)d); - break; - } - - case MD_UPGRADE_STAT: - { - if (! (mode & FREAD)) - return (EACCES); - - if (ddi_copyout((caddr_t)&md_in_upgrade, data, - sizeof (int), mode) != 0) { - err = EFAULT; - break; - } - break; - } - - case MD_SETMASTER: - { - if (! (mode & FREAD)) - return (EACCES); - - sz = sizeof (mddb_setmaster_config_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode) != 0) { - err = EFAULT; - break; - } - - err = mddb_setmaster_ioctl((mddb_setmaster_config_t *)d); - break; - } - - case MD_MN_SET_DOORH: - { - /* This ioctl sets the global kernel variable mdmn_door_handle */ - if (ddi_copyin(data, &mdmn_door_did, sizeof (int), mode) != 0) { - err = EFAULT; - } else { - err = 0; - } - mdmn_door_handle = door_ki_lookup(mdmn_door_did); - - break; - } - -#ifdef DEBUG - case MD_MN_CHECK_DOOR1: - { - /* This ioctl sends a message through a previously opened door */ - int ret; - int msg_test = 11111111; - int nloops = 0; - set_t setno; - md_mn_kresult_t *result; - uint_t flags = MD_MSGF_NO_LOG | MD_MSGF_NO_BCAST; - - result = kmem_zalloc(sizeof (md_mn_kresult_t), KM_SLEEP); - if (ddi_copyin(data, &nloops, sizeof (int), mode) != 0) { - err = EFAULT; - } else { - err = 0; - } - - /* - * This is a way to tell ksend_message() to use different sets. - * Odd numbers go to set 1 even numbers go to set 2 - */ - if (nloops & 0x1) { - setno = 1; - } else { - setno = 2; - } - while (nloops--) { - ret = mdmn_ksend_message( - setno, - MD_MN_MSG_TEST1, - flags, - 0, - (char *)&msg_test, - sizeof (msg_test), - result); - - if (ret != 0) { - printf("mdmn_ksend_message failed (%d)\n", ret); - } - } - kmem_free(result, sizeof (md_mn_kresult_t)); - - break; - } - - case MD_MN_CHECK_DOOR2: - { - /* This ioctl sends a message through a previously opened door */ - int ret; - int msg_test = 22222222; - int nloops = 0; - md_mn_kresult_t *result; - set_t setno; - uint_t flags = MD_MSGF_NO_LOG; - - result = kmem_zalloc(sizeof (md_mn_kresult_t), KM_SLEEP); - if (ddi_copyin(data, &nloops, sizeof (int), mode) != 0) { - err = EFAULT; - } else { - err = 0; - } - /* - * This is a way to tell ksend_message() to use different sets. - * Odd numbers go to set 1 even numbers go to set 2 - */ - if (nloops & 0x1) { - setno = 1; - } else { - setno = 2; - } - while (nloops--) { - ret = mdmn_ksend_message( - setno, - MD_MN_MSG_TEST2, - flags, - 0, - (char *)&msg_test, - sizeof (msg_test), - result); - - if (ret != 0) { - printf("mdmn_ksend_message failed (%d)\n", ret); - } - } - kmem_free(result, sizeof (md_mn_kresult_t)); - - break; - } -#endif - - case MD_MN_OPEN_TEST: - { - md_clu_open_t *p; - minor_t mnum; - - sz = sizeof (md_clu_open_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sizeof (md_clu_open_t), mode) != 0) { - err = EFAULT; - break; - } - - p = (md_clu_open_t *)d; - mnum = md_getminor(p->clu_dev); - - if ((err = verify_minor(mnum)) != 0) { - if (err == EINVAL) - (void) mdmderror(&p->clu_mde, MDE_INVAL_UNIT, - mnum); - break; - } - err = md_clu_ioctl(p); - break; - } - - case MD_MN_SET_NODEID: - { - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (mddb_set_node_params_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode) != 0) { - err = EFAULT; - break; - } - snp = (mddb_set_node_params_t *)d; - - if (snp->sn_setno >= md_nsets) { - err = EINVAL; - break; - } - - md_set[snp->sn_setno].s_nodeid = snp->sn_nodeid; - - if (md_mn_mynode_id == MD_MN_INVALID_NID) - md_mn_mynode_id = snp->sn_nodeid; -#ifdef DEBUG - else if (md_mn_mynode_id != snp->sn_nodeid) - cmn_err(CE_WARN, "Previously set nodeid 0x%x for this" - "node doesn't match nodeid being set 0x%x\n", - md_mn_mynode_id, snp->sn_nodeid); -#endif /* DEBUG */ - err = 0; - break; - } - case MD_IOCGUNIQMSGID: - { - md_mn_msgid_t msgid; - struct timeval32 tv; - - if (! (mode & FREAD)) - return (EACCES); - - uniqtime32(&tv); - - /* high 32 bits are the seconds */ - msgid.mid_time = (u_longlong_t)tv.tv_sec << 32; - /* low 32 bits are the micro secs */ - msgid.mid_time |= tv.tv_usec; - - msgid.mid_nid = md_mn_mynode_id; - /* - * This is never called for submessages, so we better - * null out the submessage ID - */ - msgid.mid_smid = 0; - - if (ddi_copyout((caddr_t)&msgid, data, sizeof (msgid), mode) - != 0) { - err = EFAULT; - break; - } - break; - } - - /* - * suspend the IO's for a given set number. - * - * If setno = 0 is specified, try operation on all snarfed MN disksets. - * If there are no snarfed MN disksets, then return success. - * - * If a specific set number is given, then return EINVAL if unable - * to perform operation. - */ - case MD_MN_SUSPEND_SET: - { - set_t setno; - int rval = 0; - int i; - - if (! (mode & FWRITE)) - return (EACCES); - - if (ddi_copyin(data, &setno, sizeof (set_t), mode) != 0) { - return (EFAULT); - } - if (setno >= MD_MAXSETS) { - return (EINVAL); - } - - mutex_enter(&md_mx); - if (setno == 0) { - /* if set number is 0, we walk all sets */ - for (i = 1; i <= (MD_MAXSETS - 1); i++) { - if ((md_set[i].s_status & - (MD_SET_SNARFED|MD_SET_MNSET)) == - (MD_SET_SNARFED|MD_SET_MNSET)) { - md_set[i].s_status |= MD_SET_HALTED; - } - } - } else { - /* If unable to halt specified set, set EINVAL */ - if ((md_set[setno].s_status & - (MD_SET_SNARFED|MD_SET_MNSET)) == - (MD_SET_SNARFED|MD_SET_MNSET)) { - md_set[setno].s_status |= MD_SET_HALTED; - } else { - rval = EINVAL; - } - } - mutex_exit(&md_mx); - return (rval); - } - - /* - * resume the IO's for a given set number. - * - * If setno = 0 is specified, try operation on all snarfed MN disksets. - * If there are no snarfed MN disksets, then return success. - * - * If a specific set number is given, then return EINVAL if unable - * to perform operation. - */ - case MD_MN_RESUME_SET: - { - set_t setno; - int resumed_set = 0; - int rval = 0; - int i; - - if (! (mode & FWRITE)) - return (EACCES); - - if (ddi_copyin(data, &setno, sizeof (set_t), mode) != 0) { - return (EFAULT); - } - if (setno >= MD_MAXSETS) { - return (EINVAL); - } - - /* if 0 is specified as the set number, we walk all sets */ - mutex_enter(&md_mx); - if (setno == 0) { - /* if set number is 0, we walk all sets */ - for (i = 1; i <= (MD_MAXSETS - 1); i++) { - if ((md_set[i].s_status & - (MD_SET_SNARFED|MD_SET_MNSET)) == - (MD_SET_SNARFED|MD_SET_MNSET)) { - md_set[i].s_status &= ~MD_SET_HALTED; - resumed_set = 1; - } - } - } else { - /* If unable to resume specified set, set EINVAL */ - if ((md_set[setno].s_status & - (MD_SET_SNARFED|MD_SET_MNSET)) == - (MD_SET_SNARFED|MD_SET_MNSET)) { - md_set[setno].s_status &= ~MD_SET_HALTED; - resumed_set = 1; - } else { - rval = EINVAL; - } - } - - /* - * In case we actually resumed at least one set, - * Inform all threads waiting for this change - */ - if (resumed_set == 1) { - cv_broadcast(&md_cv); - } - - mutex_exit(&md_mx); - return (rval); - } - - case MD_MN_MDDB_PARSE: - { - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (mddb_parse_parm_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode) != 0) { - err = EFAULT; - break; - } - err = mddb_parse((mddb_parse_parm_t *)d); - break; - - } - - case MD_MN_MDDB_BLOCK: - { - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (mddb_block_parm_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode) != 0) { - err = EFAULT; - break; - } - err = mddb_block((mddb_block_parm_t *)d); - break; - - } - - case MD_MN_MDDB_OPTRECFIX: - { - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (mddb_optrec_parm_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode) != 0) { - err = EFAULT; - break; - } - err = mddb_optrecfix((mddb_optrec_parm_t *)d); - break; - - } - - case MD_MN_CHK_WRT_MDDB: - { - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (mddb_config_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode) != 0) { - err = EFAULT; - break; - } - - err = mddb_check_write_ioctl((mddb_config_t *)d); - break; - } - - case MD_MN_SET_SETFLAGS: - case MD_MN_GET_SETFLAGS: - { - if (! (mode & FREAD)) - return (EACCES); - - sz = sizeof (mddb_setflags_config_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode) != 0) { - err = EFAULT; - break; - } - - err = mddb_setflags_ioctl((mddb_setflags_config_t *)d); - break; - } - - case MD_MN_COMMD_ERR: - { - md_mn_commd_err_t *cmp; - char *msg; - - sz = sizeof (md_mn_commd_err_t); - d = kmem_zalloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode) != 0) { - err = EFAULT; - break; - } - - cmp = (md_mn_commd_err_t *)d; - if (cmp->size > MAXPATHLEN) { - err = EINVAL; - break; - } - - msg = (char *)kmem_zalloc(cmp->size + 1, KM_SLEEP); - if (ddi_copyin((caddr_t)(uintptr_t)cmp->md_message, msg, - cmp->size, mode) != 0) { - kmem_free(msg, cmp->size + 1); - err = EFAULT; - break; - } - cmn_err(CE_WARN, "%s\n", msg); - kmem_free(msg, cmp->size + 1); - break; - } - - case MD_IOCMAKE_DEV: - { - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (md_mkdev_params_t); - - if ((d = kmem_alloc(sz, KM_NOSLEEP)) == NULL) - return (ENOMEM); - - if (ddi_copyin(data, d, sz, mode) != 0) { - err = EFAULT; - break; - } - - err = mkdev_ioctl((md_mkdev_params_t *)d); - break; - } - - case MD_IOCREM_DEV: - { - set_t setno; - - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (minor_t); - - d = kmem_zalloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode) != 0) { - err = EFAULT; - break; - } - - /* - * This ioctl is called to cleanup the device name - * space when metainit fails or -n is invoked - * In this case, reclaim the dispatched un slot - */ - setno = MD_MIN2SET(*(minor_t *)d); - if (setno >= md_nsets) { - err = EINVAL; - break; - } else if (md_set[setno].s_un_next <= 0) { - err = EFAULT; - break; - } else { - md_set[setno].s_un_next--; - } - - /* - * Attempt to remove the assocated device node - */ - md_remove_minor_node(*(minor_t *)d); - break; - } - - /* - * Update md_mn_commd_pid global to reflect presence or absence of - * /usr/sbin/rpc.mdcommd. This allows us to determine if an RPC failure - * is expected during a mdmn_ksend_message() handshake. If the commd is - * not present then an RPC failure is acceptable. If the commd _is_ - * present then an RPC failure means we have an inconsistent view across - * the cluster. - */ - case MD_MN_SET_COMMD_RUNNING: - { - if (! (mode & FWRITE)) - return (EACCES); - - md_mn_commd_pid = (pid_t)(intptr_t)data; - err = 0; - break; - } - - case MD_IOCIMP_LOAD: - { - if (! (mode & FWRITE)) - return (EACCES); - - mddb_config_case = 1; - - err = mddb_config_from_user(&d, data, mode, &c_devid_addr, - &c_old_devid_addr); - - if (err) { - return (err); - } - - err = md_imp_snarf_set((mddb_config_t *)d); - break; - - } - - case MD_DB_LBINITTIME: - { - if (! (mode & FWRITE)) - return (EACCES); - - mddb_config_case = 1; - - err = mddb_config_from_user(&d, data, mode, &c_devid_addr, - &c_old_devid_addr); - - if (err) - return (err); - - err = get_lb_inittime_ioctl((mddb_config_t *)d); - break; - } - case MD_IOCUPDATE_NM_RR_DID: - { - if (! (mode & FWRITE)) - return (EACCES); - - mddb_config_case = 1; - - err = mddb_config_from_user(&d, data, mode, &c_devid_addr, - &c_old_devid_addr); - - if (err) - return (err); - - err = md_update_nm_rr_did_ioctl((mddb_config_t *)d); - break; - } - default: - return (ENOTTY); /* used by next level up */ - } - - /* - * copyout and free any args - */ - if (mddb_config_case) { - err_to_user = mddb_config_to_user(d, data, mode, c_devid_addr, - c_old_devid_addr); - } else if (mddb_didstat_case) { - err_to_user = mddb_didstat_to_user(d, data, mode, ds_ctd_addr); - } else if (sz != 0) { - if (ddi_copyout(d, data, sz, mode) != 0) { - err = EFAULT; - } - kmem_free(d, sz); - } - - if (err) - return (err); - return (err_to_user); -} - -int -md_admin_ioctl(md_dev64_t dev, int cmd, caddr_t data, int mode, IOLOCK *lockp) -{ - md_driver_t drv; - int modindex; - int err; - - /* - * see if we can do this without involving the subdriver - */ - if ((err = md_base_ioctl(dev, cmd, data, mode, lockp)) != ENOTTY) - return (err); - - /* - * see what subdriver we need - */ - if (! ISMDIOC(cmd)) - return (ENOTTY); - - if ((!NODBNEEDED(cmd)) && md_snarf_db_set(MD_LOCAL_SET, NULL) != 0) - return (ENODEV); - - if (ddi_copyin(data, (caddr_t)&drv, sizeof (drv), mode) != 0) - return (EFAULT); - - /* - * load subdriver if not already loaded - */ - if (((modindex = md_getmodindex(&drv, 0, NODBNEEDED(cmd))) == -1) || - (md_ops[modindex]->md_ioctl == NULL)) - return (ENOTTY); - - /* - * dispatch to subdriver - */ - return ((*md_ops[modindex]->md_ioctl)(md_dev64_to_dev(dev), cmd, data, - mode, lockp)); -} - -void -md_get_geom( - md_unit_t *un, - struct dk_geom *gp -) -{ - diskaddr_t tb = un->c.un_total_blocks; - uint_t cylsize = un->c.un_nhead * un->c.un_nsect; - - bzero((caddr_t)gp, sizeof (*gp)); - gp->dkg_nhead = un->c.un_nhead; - gp->dkg_nsect = un->c.un_nsect; - gp->dkg_rpm = un->c.un_rpm; - gp->dkg_write_reinstruct = un->c.un_wr_reinstruct; - gp->dkg_read_reinstruct = un->c.un_rd_reinstruct; - gp->dkg_ncyl = (ushort_t)(tb / cylsize); - if (! (un->c.un_flag & MD_LABELED)) /* skip first cyl */ - gp->dkg_ncyl += 1; - gp->dkg_pcyl = gp->dkg_ncyl; -} - -void -md_get_vtoc(md_unit_t *un, struct vtoc *vtoc) -{ - caddr_t v; - mddb_recstatus_t status; - struct vtoc32 *vt32; - - /* - * Return vtoc structure fields in the provided VTOC area, addressed - * by *vtoc. - * - */ - - if (un->c.un_vtoc_id) { - status = mddb_getrecstatus(un->c.un_vtoc_id); - if (status == MDDB_OK) { - v = mddb_getrecaddr(un->c.un_vtoc_id); - /* if this seems to be a sane vtoc, just copy it ... */ - if (((struct vtoc *)v)->v_sanity == VTOC_SANE) { - bcopy(v, (caddr_t)vtoc, sizeof (struct vtoc)); - } else { - /* ... else assume a vtoc32 was stored here */ - vt32 = (struct vtoc32 *)v; - vtoc32tovtoc((*vt32), (*vtoc)); - } - if (un->c.un_flag & MD_LABELED) - vtoc->v_part[0].p_start = 0ULL; - else - vtoc->v_part[0].p_start = (diskaddr_t) - (un->c.un_nhead * un->c.un_nsect); - vtoc->v_part[0].p_size = un->c.un_total_blocks; - vtoc->v_version = V_VERSION; - vtoc->v_sectorsz = DEV_BSIZE; - return; - } - - un->c.un_vtoc_id = 0; - mddb_commitrec_wrapper(un->c.un_record_id); - } - - bzero((caddr_t)vtoc, sizeof (struct vtoc)); - vtoc->v_sanity = VTOC_SANE; - vtoc->v_nparts = 1; - vtoc->v_version = V_VERSION; - vtoc->v_sectorsz = DEV_BSIZE; - if (un->c.un_flag & MD_LABELED) - vtoc->v_part[0].p_start = 0ULL; - else - vtoc->v_part[0].p_start = (diskaddr_t)(un->c.un_nhead * - un->c.un_nsect); - vtoc->v_part[0].p_size = un->c.un_total_blocks; -} - -int -md_set_vtoc(md_unit_t *un, struct vtoc *vtoc) -{ - - struct partition *vpart; - int i; - mddb_recid_t recid; - mddb_recid_t recids[3]; - mddb_recstatus_t status; - caddr_t v; - diskaddr_t sb; - - /* - * Sanity-check the vtoc - */ - if (vtoc->v_sanity != VTOC_SANE || vtoc->v_nparts != 1) - return (EINVAL); - - /* don't allow to create a vtoc for a big metadevice */ - if (un->c.un_revision & MD_64BIT_META_DEV) - return (ENOTSUP); - /* - * Validate the partition table - */ - vpart = vtoc->v_part; - for (i = 0; i < V_NUMPAR; i++, vpart++) { - if (i == 0) { - if (un->c.un_flag & MD_LABELED) - sb = 0ULL; - else - sb = (diskaddr_t)(un->c.un_nhead * - un->c.un_nsect); - if (vpart->p_start != sb) - return (EINVAL); - if (vpart->p_size != un->c.un_total_blocks) - return (EINVAL); - continue; - } - /* all other partitions must be zero */ - if (vpart->p_start != 0ULL) - return (EINVAL); - if (vpart->p_size != 0ULL) - return (EINVAL); - } - - if (un->c.un_vtoc_id) { - recid = un->c.un_vtoc_id; - status = mddb_getrecstatus(recid); - if (status == MDDB_OK) { - /* - * If there's enough space in the record, and the - * existing record is a vtoc record (not EFI), - * we just can use the existing space. - * Otherwise, we create a new MDDB_VTOC record for - * this unit. - */ - if ((mddb_getrecsize(recid) >= sizeof (struct vtoc)) && - ((un->c.un_flag & MD_EFILABEL) == 0)) { - v = mddb_getrecaddr(recid); - bcopy((caddr_t)vtoc, v, sizeof (struct vtoc)); - mddb_commitrec_wrapper(recid); - recids[0] = recid; - recids[1] = un->c.un_record_id; - recids[2] = 0; - un->c.un_flag &= ~MD_EFILABEL; - mddb_commitrecs_wrapper(recids); - return (0); - } - - un->c.un_vtoc_id = 0; - mddb_commitrec_wrapper(un->c.un_record_id); - mddb_deleterec_wrapper(recid); - } - } - - recid = mddb_createrec(sizeof (struct vtoc), MDDB_VTOC, 0, - MD_CRO_32BIT, MD_UN2SET(un)); - - if (recid < 0) { - return (ENOSPC); - } - - recids[0] = recid; - recids[1] = un->c.un_record_id; - recids[2] = 0; - v = mddb_getrecaddr(recid); - bcopy((caddr_t)vtoc, v, sizeof (struct vtoc)); - - un->c.un_vtoc_id = recid; - un->c.un_flag &= ~MD_EFILABEL; - mddb_commitrecs_wrapper(recids); - return (0); -} - -void -md_get_extvtoc(md_unit_t *un, struct extvtoc *extvtoc) -{ - caddr_t v; - mddb_recstatus_t status; - struct vtoc32 *vt32; - struct vtoc *vtoc; - - /* - * Return extvtoc structure fields in the provided VTOC area, addressed - * by *extvtoc. - * - */ - - bzero((caddr_t)extvtoc, sizeof (struct extvtoc)); - if (un->c.un_vtoc_id) { - status = mddb_getrecstatus(un->c.un_vtoc_id); - if (status == MDDB_OK) { - v = mddb_getrecaddr(un->c.un_vtoc_id); - if (un->c.un_flag & MD_EFILABEL) { - bcopy(v, (caddr_t)&(extvtoc->v_volume), - LEN_DKL_VVOL); - } else { - /* - * if this seems to be a sane vtoc, - * just copy it ... - */ - if (((struct vtoc *)v)->v_sanity == VTOC_SANE) { - vtoc = (struct vtoc *)v; - vtoctoextvtoc((*vtoc), (*extvtoc)); - } else { - /* assume a vtoc32 was stored here */ - vt32 = (struct vtoc32 *)v; - vtoc32toextvtoc((*vt32), (*extvtoc)); - } - } - } else { - un->c.un_vtoc_id = 0; - mddb_commitrec_wrapper(un->c.un_record_id); - } - } - - extvtoc->v_sanity = VTOC_SANE; - extvtoc->v_nparts = 1; - extvtoc->v_version = V_VERSION; - extvtoc->v_sectorsz = DEV_BSIZE; - if (un->c.un_flag & MD_LABELED) - extvtoc->v_part[0].p_start = 0ULL; - else - extvtoc->v_part[0].p_start = (diskaddr_t)(un->c.un_nhead * - un->c.un_nsect); - extvtoc->v_part[0].p_size = un->c.un_total_blocks; -} - -int -md_set_extvtoc(md_unit_t *un, struct extvtoc *extvtoc) -{ - - struct extpartition *vpart; - int i; - mddb_recid_t recid; - mddb_recid_t recids[3]; - mddb_recstatus_t status; - caddr_t v; - diskaddr_t sb; - struct vtoc vtoc; - - /* - * Sanity-check the vtoc - */ - if (extvtoc->v_sanity != VTOC_SANE || extvtoc->v_nparts != 1) - return (EINVAL); - - /* - * Validate the partition table - */ - vpart = extvtoc->v_part; - for (i = 0; i < V_NUMPAR; i++, vpart++) { - if (i == 0) { - if (un->c.un_flag & MD_LABELED) - sb = 0ULL; - else - sb = (diskaddr_t)(un->c.un_nhead * - un->c.un_nsect); - if (vpart->p_start != sb) - return (EINVAL); - if (vpart->p_size != un->c.un_total_blocks) - return (EINVAL); - continue; - } - /* all other partitions must be zero */ - if (vpart->p_start != 0ULL) - return (EINVAL); - if (vpart->p_size != 0) - return (EINVAL); - } - - if (!(un->c.un_revision & MD_64BIT_META_DEV)) { - extvtoctovtoc((*extvtoc), (vtoc)); - return (md_set_vtoc(un, &vtoc)); - } - - /* - * Since the size is greater than 1 TB the information can either - * be stored as a VTOC or EFI. Since EFI uses less space just use - * it. md_get_extvtoc can reconstruct the label information from - * either format. - */ - if (un->c.un_vtoc_id) { - recid = un->c.un_vtoc_id; - status = mddb_getrecstatus(recid); - if (status == MDDB_OK) { - /* - * If there's enough space in the record, and the - * existing record is an EFI record (not vtoc), - * we just can use the existing space. - * Otherwise, we create a new MDDB_EFILABEL record for - * this unit. - */ - if ((mddb_getrecsize(recid) >= MD_EFI_PARTNAME_BYTES) && - (un->c.un_flag & MD_EFILABEL)) { - v = mddb_getrecaddr(recid); - bzero((caddr_t)v, MD_EFI_PARTNAME_BYTES); - bcopy((caddr_t)&(extvtoc->v_volume), - v, LEN_DKL_VVOL); - mddb_commitrec_wrapper(recid); - return (0); - } - - un->c.un_vtoc_id = 0; - mddb_commitrec_wrapper(un->c.un_record_id); - mddb_deleterec_wrapper(recid); - } - } - - recid = mddb_createrec(MD_EFI_PARTNAME_BYTES, MDDB_EFILABEL, 0, - MD_CRO_32BIT, MD_UN2SET(un)); - - if (recid < 0) { - return (ENOSPC); - } - - recids[0] = recid; - recids[1] = un->c.un_record_id; - recids[2] = 0; - v = mddb_getrecaddr(recid); - bzero((caddr_t)v, MD_EFI_PARTNAME_BYTES); - bcopy((caddr_t)&(extvtoc->v_volume), v, LEN_DKL_VVOL); - - un->c.un_vtoc_id = recid; - un->c.un_flag |= MD_EFILABEL; - mddb_commitrecs_wrapper(recids); - return (0); -} - - -void -md_get_cgapart(md_unit_t *un, struct dk_map *dkmapp) -{ - - /* skip the first cyl */ - dkmapp->dkl_cylno = 1; - - dkmapp->dkl_nblk = (daddr_t)un->c.un_total_blocks; -} - -static struct uuid md_efi_reserved = EFI_RESERVED; - -/* - * md_get_efi - * INPUT: - * un; the md_unit - * buf; the buffer that is preallocated by the calling routine and - * capable of taking the EFI label for this unit - * OUTPUT: - * A filled buffer, containing one struct efi_gpt followed by one - * struct efi_gpe, because a md efi only has one valid partition - * We fetch that date either from the mddb (like vtoc) - * or we a fake an EFI label. - * - * NOTES: - * We do not provide for any global unique identifiers, - * We also use the field c.un_vtoc_id, as the semantic is very similar - * When we are called, it's already checked, that this unit has an EFI - * label and not a vtoc - */ - -void -md_get_efi(md_unit_t *un, char *buf) -{ - caddr_t v; - efi_gpt_t *efi_header = (efi_gpt_t *)buf; - efi_gpe_t *efi_part = (efi_gpe_t *)(buf + sizeof (efi_gpt_t)); - mddb_recstatus_t status; - - /* first comes the header */ - efi_header->efi_gpt_Signature = LE_64(EFI_SIGNATURE); - efi_header->efi_gpt_HeaderSize = LE_32(sizeof (efi_gpt_t)); - efi_header->efi_gpt_NumberOfPartitionEntries = LE_32(1); - efi_header->efi_gpt_SizeOfPartitionEntry = LE_32(sizeof (efi_gpe_t)); - efi_header->efi_gpt_LastUsableLBA = LE_64(un->c.un_total_blocks - 1); - efi_header->efi_gpt_FirstUsableLBA = 0; - efi_header->efi_gpt_Revision = LE_32(EFI_VERSION_CURRENT); - - /* - * We don't fill out any of these: - * - * efi_header->efi_gpt_HeaderCRC32; - * efi_header->efi_gpt_DiskGUID; - * efi_header->efi_gpt_PartitionEntryArrayCRC32; - * efi_header->efi_gpt_Reserved1; - * efi_header->efi_gpt_MyLBA; - * efi_header->efi_gpt_AlternateLBA; - * efi_header->efi_gpt_Reserved2[LEN_EFI_PAD]; - * efi_header->efi_gpt_PartitionEntryLBA; - */ - - /* - * We copy back one partition, of type reserved, - * which may contain the name of the metadevice - * (this is what was used to be v_volume for a vtoc device) - * if no name is stored in the vtoc record, we hand an empty name - * to the user - */ - - UUID_LE_CONVERT(efi_part->efi_gpe_PartitionTypeGUID, md_efi_reserved); - if (un->c.un_flag & MD_LABELED) - efi_part->efi_gpe_StartingLBA = LE_64(1ULL); - else - efi_part->efi_gpe_StartingLBA = 0; - - efi_part->efi_gpe_EndingLBA = LE_64(un->c.un_total_blocks - 1); - - if (un->c.un_vtoc_id) { - status = mddb_getrecstatus(un->c.un_vtoc_id); - if (status == MDDB_OK) { - v = mddb_getrecaddr(un->c.un_vtoc_id); - bcopy(v, (caddr_t)&(efi_part->efi_gpe_PartitionName), - MD_EFI_PARTNAME_BYTES); - return; - } - un->c.un_vtoc_id = 0; - mddb_commitrec_wrapper(un->c.un_record_id); - } - - /* - * We don't fill out any of these - * efi_part->efi_gpe_UniquePartitionGUID - * efi_part->efi_gpe_Attributes - */ -} - - -/* - * md_set_efi - * INPUT: - * un; a md_unit - * buf; a buffer that is holding an EFI label for this unit - * - * PURPOSE: - * Perform some sanity checks on the EFI label provided, - * Then store efi_gpe_PartitionName in the mddb - * and link the unit's c.un_vtoc_id field to it. - * - * RETURN: - * EINVAL if any of the sanity checks fail - * 0 on succes - * - * NOTES: - * We do not provide for any global unique identifiers, - * We also use the field c.un_vtoc_id, as the semantic is very similar - * When we are called, it's already checked, that this unit has an EFI - * label and not a vtoc - */ - - -int -md_set_efi(md_unit_t *un, char *buf) -{ - - mddb_recid_t recid; - mddb_recid_t recids[3]; - mddb_recstatus_t status; - caddr_t v; - efi_gpt_t *efi_header = (efi_gpt_t *)buf; - efi_gpe_t *efi_part = (efi_gpe_t *)(buf + sizeof (efi_gpt_t)); - struct uuid md_efi_reserved_le; - - /* - * Sanity-check the EFI label - */ - if ((efi_header->efi_gpt_Signature != LE_64(EFI_SIGNATURE)) || - (efi_header->efi_gpt_NumberOfPartitionEntries != LE_32(1))) - return (EINVAL); - - UUID_LE_CONVERT(md_efi_reserved_le, md_efi_reserved); - - /* - * Validate the partition - */ - if (efi_part->efi_gpe_StartingLBA != 0 || - efi_part->efi_gpe_EndingLBA != LE_64(un->c.un_total_blocks - 1) || - bcmp(&efi_part->efi_gpe_PartitionTypeGUID, &md_efi_reserved_le, - sizeof (struct uuid))) { - return (EINVAL); - } - /* - * If no name is specified, we have nothing to do and return success. - * because efi_gpe_PartitionName is in unicode form, we have to - * check the first two bytes of efi_gpe_PartitionName. - */ - if (((char *)(uintptr_t)efi_part->efi_gpe_PartitionName[0] == NULL) && - ((char *)(uintptr_t)efi_part->efi_gpe_PartitionName[1] == NULL)) { - return (0); - } - - if (un->c.un_vtoc_id) { - recid = un->c.un_vtoc_id; - status = mddb_getrecstatus(recid); - if (status == MDDB_OK) { - /* - * If there's enough space in the record, and the - * existing record is an EFI record (not vtoc), - * we just can use the existing space. - * Otherwise, we create a new MDDB_EFILABEL record for - * this unit. - */ - if ((mddb_getrecsize(recid) >= MD_EFI_PARTNAME_BYTES) && - (un->c.un_flag & MD_EFILABEL)) { - v = mddb_getrecaddr(recid); - bcopy((caddr_t)&efi_part->efi_gpe_PartitionName, - v, MD_EFI_PARTNAME_BYTES); - mddb_commitrec_wrapper(recid); - return (0); - } - - un->c.un_vtoc_id = 0; - mddb_commitrec_wrapper(un->c.un_record_id); - mddb_deleterec_wrapper(recid); - } - } - - recid = mddb_createrec(MD_EFI_PARTNAME_BYTES, MDDB_EFILABEL, 0, - MD_CRO_32BIT, MD_UN2SET(un)); - - if (recid < 0) { - return (ENOSPC); - } - - recids[0] = recid; - recids[1] = un->c.un_record_id; - recids[2] = 0; - v = mddb_getrecaddr(recid); - bcopy((caddr_t)&efi_part->efi_gpe_PartitionName, v, - MD_EFI_PARTNAME_BYTES); - - un->c.un_vtoc_id = recid; - un->c.un_flag |= MD_EFILABEL; - mddb_commitrecs_wrapper(recids); - return (0); -} - -int -md_dkiocgetefi(minor_t mnum, void *data, int mode) -{ - dk_efi_t efi; - caddr_t *buf; - int rval = 0; - mdi_unit_t *ui; - md_unit_t *mdun; - - if (!(mode & FREAD)) - return (EACCES); - - if (ddi_copyin(data, &efi, sizeof (dk_efi_t), mode)) - return (EFAULT); - - efi.dki_data = (void *)(uintptr_t)efi.dki_data_64; - - /* - * If the user specified a zero length or a null pointer, we give them - * the number of bytes to alloc in user land. - */ - if (efi.dki_length == 0 || efi.dki_data == NULL) { - efi.dki_length = MD_EFI_LABEL_SIZE; - if (ddi_copyout(&efi, data, sizeof (dk_efi_t), mode)) - return (EFAULT); - return (0); - } - /* Bad size specified, better not answer to that query */ - if (efi.dki_length < MD_EFI_LABEL_SIZE) - return (EINVAL); - - if ((ui = MDI_UNIT(mnum)) == NULL) - return (ENXIO); - - /* - * We don't want to allocate as much bytes as we are told, - * because we know the good size is MD_EFI_LABEL_SIZE - */ - efi.dki_length = MD_EFI_LABEL_SIZE; - buf = kmem_zalloc(MD_EFI_LABEL_SIZE, KM_SLEEP); - - mdun = (md_unit_t *)md_unit_readerlock(ui); - md_get_efi(mdun, (char *)buf); - md_unit_readerexit(ui); - - if (ddi_copyout(buf, efi.dki_data, efi.dki_length, mode)) - rval = EFAULT; - - kmem_free(buf, MD_EFI_LABEL_SIZE); - return (rval); -} - -int -md_dkiocsetefi(minor_t mnum, void *data, int mode) -{ - dk_efi_t efi; - caddr_t *buf; - int rval = 0; - mdi_unit_t *ui; - md_unit_t *mdun; - - if (!(mode & FREAD)) - return (EACCES); - - if ((ui = MDI_UNIT(mnum)) == NULL) - return (ENXIO); - - if (ddi_copyin(data, &efi, sizeof (dk_efi_t), mode)) - return (EFAULT); - - efi.dki_data = (void *)(uintptr_t)efi.dki_data_64; - - /* Sanity check of the skeleton */ - if ((efi.dki_length > sizeof (efi_gpt_t) + EFI_MIN_ARRAY_SIZE) || - (efi.dki_length < sizeof (efi_gpt_t) + sizeof (efi_gpe_t)) || - (efi.dki_data == NULL)) - return (EINVAL); - - /* - * It's only a real EFI label if the location is 1 - * in all other cases, we do nothing but say we did. - */ - if (efi.dki_lba != 1) - return (0); /* success */ - - buf = kmem_alloc(efi.dki_length, KM_SLEEP); - /* And here we copy in the real data */ - if (ddi_copyin(efi.dki_data, buf, efi.dki_length, mode)) { - rval = EFAULT; - } else { - mdun = (md_unit_t *)md_unit_readerlock(ui); - rval = md_set_efi(mdun, (char *)buf); - md_unit_readerexit(ui); - } - - kmem_free(buf, efi.dki_length); - return (rval); -} - -/* - * md_dkiocpartition() - * Return the appropriate partition64 structure for a given metadevice. - * - * Actually the only real information being returned is the number of blocks - * of the specified metadevice. - * The starting block is always 0, and so is the partition number, because - * metadevices don't have slices. - * - * This function is generic for all types of metadevices. - */ -int -md_dkiocpartition(minor_t mnum, void *data, int mode) -{ - struct partition64 p64; - mdi_unit_t *ui; - md_unit_t *un; - int rval = 0; - - if (!(mode & FREAD)) - return (EACCES); - - - if ((ui = MDI_UNIT(mnum)) == NULL) - return (ENXIO); - - if (ddi_copyin(data, &p64, sizeof (struct partition64), mode)) - return (EFAULT); - - if (p64.p_partno != 0) - return (ESRCH); - - un = (md_unit_t *)md_unit_readerlock(ui); - /* All metadevices share the same PartitionTypeGUID (see md_get_efi) */ - UUID_LE_CONVERT(p64.p_type, md_efi_reserved); - - p64.p_partno = 0; - p64.p_start = 0; - p64.p_size = un->c.un_total_blocks; - md_unit_readerexit(ui); - - if (ddi_copyout(&p64, data, sizeof (struct partition64), mode)) { - rval = EFAULT; - } - - return (rval); -} - - -/* - * Remove device node - */ -void -md_remove_minor_node(minor_t mnum) -{ - char name[16]; - extern dev_info_t *md_devinfo; - - /* - * Attempt release of its minor node - */ - (void) snprintf(name, sizeof (name), "%d,%d,blk", MD_MIN2SET(mnum), - MD_MIN2UNIT(mnum)); - ddi_remove_minor_node(md_devinfo, name); - - (void) snprintf(name, sizeof (name), "%d,%d,raw", MD_MIN2SET(mnum), - MD_MIN2UNIT(mnum)); - ddi_remove_minor_node(md_devinfo, name); -} diff --git a/usr/src/uts/common/io/lvm/md/md_mddb.c b/usr/src/uts/common/io/lvm/md/md_mddb.c deleted file mode 100644 index fd373a87521e..000000000000 --- a/usr/src/uts/common/io/lvm/md/md_mddb.c +++ /dev/null @@ -1,12963 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -mhd_mhiargs_t defmhiargs = { - 1000, - { 6000, 6000, 30000 } -}; - -#define MDDB - -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -extern char svm_bootpath[]; - -int md_maxbootlist = MAXBOOTLIST; -static ulong_t mddb_maxblocks = 0; /* tune for small records */ -static int mddb_maxbufheaders = 50; -static uint_t mddb_maxcopies = MDDB_NLB; - -/* - * If this is set, more detailed messages about DB init will be given, instead - * of just the MDE_DB_NODB. - */ -static int mddb_db_err_detail = 0; - -/* - * This lock is used to single-thread load/unload of all sets - */ -static kmutex_t mddb_lock; - -/* - * You really do NOT want to change this boolean. - * It can be VERY dangerous to do so. Loss of - * data may occur. USE AT YOUR OWN RISK!!!! - */ -static int mddb_allow_half = 0; -/* - * For mirrored root allow reboot with only half the replicas available - * Flag inserted for Santa Fe project. - */ -int mirrored_root_flag; - -#define ISWHITE(c) (((c) == ' ') || ((c) == '\t') || \ - ((c) == '\r') || ((c) == '\n')) -#define ISNUM(c) (((c) >= '0') && ((c) <= '9')) - -#define SETMUTEX(setno) (&md_set[setno].s_dbmx) - -extern md_krwlock_t md_unit_array_rw; /* md.c */ -extern set_t md_nsets; /* md.c */ -extern int md_nmedh; /* md.c */ -extern md_set_t md_set[]; /* md.c */ -extern int (*mdv_strategy_tstpnt)(buf_t *, int, void*); -extern dev_info_t *md_devinfo; -extern int md_init_debug; -extern int md_status; -extern md_ops_t *md_opslist; -extern md_krwlock_t nm_lock; - -static int update_locatorblock(mddb_set_t *s, md_dev64_t dev, - ddi_devid_t didptr, ddi_devid_t old_didptr); - -/* - * Defines for crc calculation for records - * rec_crcgen generates a crc checksum for a record block - * rec_crcchk checks the crc checksum for a record block - */ -#define REC_CRCGEN 0 -#define REC_CRCCHK 1 -#define rec_crcgen(s, dep, rbp) \ - (void) rec_crcfunc(s, dep, rbp, REC_CRCGEN) -#define rec_crcchk(s, dep, rbp) \ - rec_crcfunc(s, dep, rbp, REC_CRCCHK) - -/* - * During upgrade, SVM basically runs with the devt from the target - * being upgraded. Translations are made from the target devt to the - * miniroot devt when writing data out to the disk. This is done by - * the following routines: - * wrtblklst - * writeblks - * readblklst - * readblks - * dt_read - * - * The following routines are used by the routines listed above and - * expect a translated (aka miniroot) devt: - * getblks - * getmasters - * - * Also, when calling any system routines, such as ddi_lyr_get_devid, - * the translated (aka miniroot) devt must be used. - * - * By the same token, the major number and major name conversion operations - * need to use the name_to_major file from the target system instead - * of the name_to_major file on the miniroot. So, calls to - * ddi_name_to_major must be replaced with calls to md_targ_name_to_major - * when running on an upgrade. Same is true with calls to - * ddi_major_to_name. - */ - - -#ifndef MDDB_FAKE - -static int -mddb_rwdata( - mddb_set_t *s, /* incore db set structure */ - int flag, /* B_ASYNC, B_FAILFAST or 0 passed in here */ - buf_t *bp -) -{ - int err = 0; - - bp->b_flags = (flag | B_BUSY) & (~B_ASYNC); - - mutex_exit(SETMUTEX(s->s_setno)); - if (mdv_strategy_tstpnt == NULL || - (*mdv_strategy_tstpnt)(bp, 0, NULL) == 0) - (void) bdev_strategy(bp); - - if (flag & B_ASYNC) { - mutex_enter(SETMUTEX(s->s_setno)); - return (0); - } - - err = biowait(bp); - mutex_enter(SETMUTEX(s->s_setno)); - return (err); -} - -static void -setidentifier( - mddb_set_t *s, - identifier_t *ident -) -{ - if (s->s_setno == MD_LOCAL_SET) - (void) strcpy(&ident->serial[0], s->s_ident.serial); - else - ident->createtime = s->s_ident.createtime; -} - -static int -cmpidentifier( - mddb_set_t *s, - identifier_t *ident -) -{ - if (s->s_setno == MD_LOCAL_SET) - return (strcmp(ident->serial, s->s_ident.serial)); - else - return (timercmp(&ident->createtime, - /*CSTYLED*/ - &s->s_ident.createtime, !=)); -} - -static int -mddb_devopen( - md_dev64_t dev -) -{ - dev_t ddi_dev = md_dev64_to_dev(dev); - - if (dev_lopen(&ddi_dev, FREAD|FWRITE, OTYP_LYR, kcred) == 0) - return (0); - return (1); -} - -static void -mddb_devclose( - md_dev64_t dev -) -{ - (void) dev_lclose(md_dev64_to_dev(dev), FREAD|FWRITE, OTYP_LYR, kcred); -} - -/* - * stripe_skip_ts - * - * Returns a list of fields to be skipped in the stripe record structure. - * These fields are ms_timestamp in the component structure. - * Used to skip these fields when calculating the checksum. - */ -static crc_skip_t * -stripe_skip_ts(void *un, uint_t revision) -{ - struct ms_row32_od *small_mdr; - struct ms_row *big_mdr; - uint_t row, comp, ncomps, compoff; - crc_skip_t *skip; - crc_skip_t *skip_prev; - crc_skip_t skip_start = {0, 0, 0}; - ms_unit_t *big_un; - ms_unit32_od_t *small_un; - uint_t rb_off = offsetof(mddb_rb32_t, rb_data[0]); - - switch (revision) { - case MDDB_REV_RB: - case MDDB_REV_RBFN: - small_un = (ms_unit32_od_t *)un; - skip_prev = &skip_start; - - if (small_un->un_nrows == 0) - return (NULL); - /* - * walk through all rows to find the total number - * of components - */ - small_mdr = &small_un->un_row[0]; - ncomps = 0; - for (row = 0; (row < small_un->un_nrows); row++) { - ncomps += small_mdr[row].un_ncomp; - } - - /* Now walk through the components */ - compoff = small_un->un_ocomp + rb_off; - for (comp = 0; (comp < ncomps); ++comp) { - uint_t mdcp = compoff + - (comp * sizeof (ms_comp32_od_t)); - skip = (crc_skip_t *)kmem_zalloc(sizeof (crc_skip_t), - KM_SLEEP); - skip->skip_offset = mdcp + - offsetof(ms_comp32_od_t, un_mirror.ms_timestamp); - skip->skip_size = sizeof (md_timeval32_t); - skip_prev->skip_next = skip; - skip_prev = skip; - } - break; - case MDDB_REV_RB64: - case MDDB_REV_RB64FN: - big_un = (ms_unit_t *)un; - skip_prev = &skip_start; - - if (big_un->un_nrows == 0) - return (NULL); - /* - * walk through all rows to find the total number - * of components - */ - big_mdr = &big_un->un_row[0]; - ncomps = 0; - for (row = 0; (row < big_un->un_nrows); row++) { - ncomps += big_mdr[row].un_ncomp; - } - - /* Now walk through the components */ - compoff = big_un->un_ocomp + rb_off; - for (comp = 0; (comp < ncomps); ++comp) { - uint_t mdcp = compoff + - (comp * sizeof (ms_comp_t)); - skip = (crc_skip_t *)kmem_zalloc(sizeof (crc_skip_t), - KM_SLEEP); - skip->skip_offset = mdcp + - offsetof(ms_comp_t, un_mirror.ms_timestamp); - skip->skip_size = sizeof (md_timeval32_t); - skip_prev->skip_next = skip; - skip_prev = skip; - } - break; - } - /* Return the start of the list of fields to skip */ - return (skip_start.skip_next); -} - -/* - * mirror_skip_ts - * - * Returns a list of fields to be skipped in the mirror record structure. - * This includes un_last_read and sm_timestamp for each submirror - * Used to skip these fields when calculating the checksum. - */ -static crc_skip_t * -mirror_skip_ts(uint_t revision) -{ - int i; - crc_skip_t *skip; - crc_skip_t *skip_prev; - crc_skip_t skip_start = {0, 0, 0}; - uint_t rb_off = offsetof(mddb_rb32_t, rb_data[0]); - - skip_prev = &skip_start; - - skip = (crc_skip_t *)kmem_zalloc(sizeof (crc_skip_t), KM_SLEEP); - switch (revision) { - case MDDB_REV_RB: - case MDDB_REV_RBFN: - skip->skip_offset = offsetof(mm_unit32_od_t, - un_last_read) + rb_off; - break; - case MDDB_REV_RB64: - case MDDB_REV_RB64FN: - skip->skip_offset = offsetof(mm_unit_t, - un_last_read) + rb_off; - break; - } - skip->skip_size = sizeof (int); - skip_prev->skip_next = skip; - skip_prev = skip; - - for (i = 0; i < NMIRROR; i++) { - skip = (crc_skip_t *)kmem_zalloc(sizeof (crc_skip_t), KM_SLEEP); - switch (revision) { - case MDDB_REV_RB: - case MDDB_REV_RBFN: - skip->skip_offset = offsetof(mm_unit32_od_t, - un_sm[i].sm_timestamp) + rb_off; - break; - case MDDB_REV_RB64: - case MDDB_REV_RB64FN: - skip->skip_offset = offsetof(mm_unit_t, - un_sm[i].sm_timestamp) + rb_off; - break; - } - skip->skip_size = sizeof (md_timeval32_t); - skip_prev->skip_next = skip; - skip_prev = skip; - } - /* Return the start of the list of fields to skip */ - return (skip_start.skip_next); -} - -/* - * hotspare_skip_ts - * - * Returns a list of the timestamp fields in the hotspare record structure. - * Used to skip these fields when calculating the checksum. - */ -static crc_skip_t * -hotspare_skip_ts(uint_t revision) -{ - crc_skip_t *skip; - uint_t rb_off = offsetof(mddb_rb32_t, rb_data[0]); - - skip = (crc_skip_t *)kmem_zalloc(sizeof (crc_skip_t), KM_SLEEP); - switch (revision) { - case MDDB_REV_RB: - case MDDB_REV_RBFN: - skip->skip_offset = offsetof(hot_spare32_od_t, hs_timestamp) + - rb_off; - break; - case MDDB_REV_RB64: - case MDDB_REV_RB64FN: - skip->skip_offset = offsetof(hot_spare_t, hs_timestamp) + - rb_off; - break; - } - skip->skip_size = sizeof (md_timeval32_t); - return (skip); -} - -/* - * rec_crcfunc - * - * Calculate or check the checksum for a record - * Calculate the crc if check == 0, Check the crc if check == 1 - * - * Record block may be written by different nodes in a multi-owner diskset - * (in case of master change), the function rec_crcchk excludes timestamp - * fields in crc computation of record data. - * Otherwise, timestamp fields will cause each node to have a different - * checksum for same record block causing the exclusive-or of all record block - * checksums and data block record sums to be non-zero after new master writes - * at least one record block. - */ -static uint_t -rec_crcfunc( - mddb_set_t *s, - mddb_de_ic_t *dep, - mddb_rb32_t *rbp, - int check -) -{ - crc_skip_t *skip; - crc_skip_t *skip_tail; - mddb_type_t type = dep->de_type1; - uint_t ret; - - /* - * Generate a list of the areas to be skipped when calculating - * the checksum. - * First skip rb_checksum, rb_private and rb_userdata. - */ - skip = (crc_skip_t *)kmem_zalloc(sizeof (crc_skip_t), KM_SLEEP); - skip->skip_offset = offsetof(mddb_rb32_t, rb_checksum_fiddle); - skip->skip_size = 3 * sizeof (uint_t); - skip_tail = skip; - if (MD_MNSET_SETNO(s->s_setno)) { - /* For a MN set, skip rb_timestamp */ - skip_tail = (crc_skip_t *)kmem_zalloc(sizeof (crc_skip_t), - KM_SLEEP); - skip_tail->skip_offset = offsetof(mddb_rb32_t, rb_timestamp); - skip_tail->skip_size = sizeof (md_timeval32_t); - skip->skip_next = skip_tail; - - /* Now add a list of timestamps to be skipped */ - if (type >= MDDB_FIRST_MODID) { - switch (dep->de_flags) { - case MDDB_F_STRIPE: - skip_tail->skip_next = - stripe_skip_ts((void *)rbp->rb_data, - rbp->rb_revision); - break; - case MDDB_F_MIRROR: - skip_tail->skip_next = - mirror_skip_ts(rbp->rb_revision); - break; - case MDDB_F_HOTSPARE: - skip_tail->skip_next = - hotspare_skip_ts(rbp->rb_revision); - break; - default: - break; - } - } - } - - if (check) { - ret = crcchk(rbp, &rbp->rb_checksum, dep->de_recsize, skip); - } else { - crcgen(rbp, &rbp->rb_checksum, dep->de_recsize, skip); - ret = rbp->rb_checksum; - } - while (skip) { - crc_skip_t *skip_save = skip; - - skip = skip->skip_next; - kmem_free(skip_save, sizeof (crc_skip_t)); - } - return (ret); -} - -static mddb_bf_t * -allocbuffer( - mddb_set_t *s, - int sleepflag -) -{ - mddb_bf_t *bfp; - - while ((bfp = s->s_freebufhead) == NULL) { - if (sleepflag == MDDB_NOSLEEP) - return ((mddb_bf_t *)NULL); - ++s->s_bufmisses; -#ifdef DEBUG - if (s->s_bufmisses == 1) - cmn_err(CE_NOTE, - "md: mddb: set %u sleeping for buffer", s->s_setno); -#endif - s->s_bufwakeup = 1; - cv_wait(&s->s_buf_cv, SETMUTEX(s->s_setno)); - } - s->s_freebufhead = bfp->bf_next; - bzero((caddr_t)bfp, sizeof (*bfp)); - bfp->bf_buf.b_back = bfp->bf_buf.b_forw = &bfp->bf_buf; - bfp->bf_buf.b_flags = B_BUSY; /* initialize flags */ - return (bfp); -} - -static void -freebuffer( - mddb_set_t *s, - mddb_bf_t *bfp -) -{ - bfp->bf_next = s->s_freebufhead; - s->s_freebufhead = bfp; - if (s->s_bufwakeup) { - cv_broadcast(&s->s_buf_cv); - s->s_bufwakeup = 0; - } -} - - -static void -blkbusy( - mddb_set_t *s, - mddb_block_t blk -) -{ - int bit, byte; - - s->s_freeblkcnt--; - byte = blk / 8; - bit = 1 << (blk & 7); - ASSERT(! (s->s_freebitmap[byte] & bit)); - s->s_freebitmap[byte] |= bit; -} - -static void -blkfree( - mddb_set_t *s, - mddb_block_t blk -) -{ - int bit, byte; - - s->s_freeblkcnt++; - byte = blk / 8; - bit = 1 << (blk & 7); - ASSERT(s->s_freebitmap[byte] & bit); - s->s_freebitmap[byte] &= ~bit; -} - -static int -blkcheck( - mddb_set_t *s, - mddb_block_t blk -) -{ - int bit, byte; - - byte = blk / 8; - bit = 1 << (blk & 7); - return (s->s_freebitmap[byte] & bit); -} - -/* - * not fast but simple - */ -static mddb_block_t -getfreeblks( - mddb_set_t *s, - size_t count -) -{ - int i; - size_t contig; - - contig = 0; - for (i = 0; i < s->s_totalblkcnt; i++) { - if (blkcheck(s, i)) { - contig = 0; - } else { - contig++; - if (contig == count) { - contig = i - count + 1; - for (i = (int)contig; i < contig + count; i++) - blkbusy(s, i); - return ((mddb_block_t)contig); - } - } - } - return (0); -} - -static void -computefreeblks( - mddb_set_t *s -) -{ - mddb_db_t *dbp; - mddb_de_ic_t *dep; - int i; - int minblks; - int freeblks; - mddb_mb_ic_t *mbip; - mddb_lb_t *lbp; - mddb_block_t maxblk; - mddb_did_db_t *did_dbp; - int nblks; - - minblks = 0; - lbp = s->s_lbp; - maxblk = 0; - - /* - * Determine the max number of blocks. - */ - nblks = (lbp->lb_flags & MDDB_MNSET) ? MDDB_MN_MAXBLKS : MDDB_MAXBLKS; - /* - * go through and find highest logical block - */ - for (dbp = s->s_dbp; dbp != 0; dbp = dbp->db_next) { - if (dbp->db_blknum > maxblk) - maxblk = dbp->db_blknum; - for (dep = dbp->db_firstentry; dep != 0; dep = dep->de_next) - for (i = 0; i < dep->de_blkcount; i++) - if (dep->de_blks[i] > maxblk) - maxblk = dep->de_blks[i]; - } - - for (i = 0; i < lbp->lb_loccnt; i++) { - mddb_locator_t *lp = &lbp->lb_locators[i]; - - if ((lp->l_flags & MDDB_F_DELETED) || - (lp->l_flags & MDDB_F_EMASTER)) - continue; - - freeblks = 0; - for (mbip = s->s_mbiarray[i]; mbip != NULL; - mbip = mbip->mbi_next) { - freeblks += mbip->mbi_mddb_mb.mb_blkcnt; - } - if (freeblks == 0) /* this happen when there is no */ - continue; /* master blk */ - - if (freeblks <= maxblk) { - lp->l_flags |= MDDB_F_TOOSMALL; - lp->l_flags &= ~MDDB_F_ACTIVE; - } - - if (freeblks < minblks || minblks == 0) - minblks = freeblks; - } - /* - * set up reasonable freespace if no - * data bases exist - */ - if (minblks == 0) - minblks = 100; - if (minblks > nblks) - minblks = nblks; - s->s_freeblkcnt = minblks; - s->s_totalblkcnt = minblks; - if (! s->s_freebitmapsize) { - s->s_freebitmapsize = nblks / 8; - s->s_freebitmap = (uchar_t *)kmem_zalloc(s->s_freebitmapsize, - KM_SLEEP); - } - bzero((caddr_t)s->s_freebitmap, s->s_freebitmapsize); - - /* locator block sectors */ - for (i = 0; i < s->s_lbp->lb_blkcnt; i++) - blkbusy(s, i); - - /* locator name sectors */ - for (i = 0; i < s->s_lbp->lb_lnblkcnt; i++) - blkbusy(s, (s->s_lbp->lb_lnfirstblk + i)); - - if (lbp->lb_flags & MDDB_DEVID_STYLE) { - /* locator block device id information */ - for (i = 0; i < s->s_lbp->lb_didblkcnt; i++) - blkbusy(s, (s->s_lbp->lb_didfirstblk + i)); - - /* disk blocks containing actual device ids */ - did_dbp = s->s_did_icp->did_ic_dbp; - while (did_dbp) { - for (i = 0; i < did_dbp->db_blkcnt; i++) { - blkbusy(s, did_dbp->db_firstblk + i); - } - did_dbp = did_dbp->db_next; - } - } - - /* Only use data tags if not a MN set */ - if (!(lbp->lb_flags & MDDB_MNSET)) { - /* Found a bad tag, do NOT mark the data tag blks busy here */ - if (! (md_get_setstatus(s->s_setno) & MD_SET_BADTAG)) { - for (i = 0; i < s->s_lbp->lb_dtblkcnt; i++) - blkbusy(s, (s->s_lbp->lb_dtfirstblk + i)); - } - } - - /* directory block/entry sectors */ - for (dbp = s->s_dbp; dbp != 0; dbp = dbp->db_next) { - blkbusy(s, dbp->db_blknum); - for (dep = dbp->db_firstentry; dep != 0; dep = dep->de_next) - for (i = 0; i < dep->de_blkcount; i++) - blkbusy(s, dep->de_blks[i]); - } -} - -/* - * Add free space to the device id incore free list. - * Called: - * - During startup when all devid blocks are temporarily placed on the - * free list - * - After a devid has been deleted via the metadb command. - * - When mddb_devid_free_get adds unused space from a disk block - * to free list - */ -static int -mddb_devid_free_add( - mddb_set_t *s, - uint_t firstblk, - uint_t offset, - uint_t length -) -{ - mddb_did_free_t *did_freep; - - if (!(s->s_lbp->lb_flags & MDDB_DEVID_STYLE)) { - return (0); - } - - did_freep = (mddb_did_free_t *)kmem_zalloc(sizeof (mddb_did_free_t), - KM_SLEEP); - did_freep->free_blk = firstblk; - did_freep->free_offset = offset; - did_freep->free_length = length; - did_freep->free_next = s->s_did_icp->did_ic_freep; - s->s_did_icp->did_ic_freep = did_freep; - - return (0); -} - -/* - * Remove specific free space from the device id incore free list. - * Called at startup (after all devid blocks have been placed on - * free list) in order to remove the free space from the list that - * contains actual devids. - * Returns 0 if area successfully removed. - * Returns 1 if no matching area is found - so nothing removed. - */ -static int -mddb_devid_free_delete( - mddb_set_t *s, - uint_t firstblk, - uint_t offset, - uint_t length -) -{ - int block_found = 0; - mddb_did_free_t *did_freep1; /* next free block */ - mddb_did_free_t *did_freep2 = 0; /* previous free block */ - mddb_did_free_t *did_freep_before; /* area before offset, len */ - mddb_did_free_t *did_freep_after; /* area after offset, len */ - uint_t old_length; - - if (!(s->s_lbp->lb_flags & MDDB_DEVID_STYLE)) { - return (1); - } - - /* find free block for this devid */ - did_freep1 = s->s_did_icp->did_ic_freep; - while (did_freep1) { - /* - * Look through free list of to - * find our entry in the free list. Our entry should - * exist since the entire devid block was placed into - * this free list at startup. This code is just removing - * the non-free (in-use) portions of the devid block so - * that the remaining linked list does indeed just - * contain a free list. - * - * Our entry has been found if - * - the blocks match, - * - the offset (starting address) in the free list is - * less than the offset of our entry and - * - the length+offset (ending address) in the free list is - * greater than the length+offset of our entry. - */ - if ((did_freep1->free_blk == firstblk) && - (did_freep1->free_offset <= offset) && - ((did_freep1->free_length + did_freep1->free_offset) >= - (length + offset))) { - /* Have found our entry - remove from list */ - block_found = 1; - did_freep_before = did_freep1; - old_length = did_freep1->free_length; - /* did_freep1 - pts to next free block */ - did_freep1 = did_freep1->free_next; - if (did_freep2) { - did_freep2->free_next = did_freep1; - } else { - s->s_did_icp->did_ic_freep = did_freep1; - } - - /* - * did_freep_before points to area in block before - * offset, length. - */ - did_freep_before->free_length = offset - - did_freep_before->free_offset; - /* - * did_freep_after points to area in block after - * offset, length. - */ - did_freep_after = (mddb_did_free_t *)kmem_zalloc - (sizeof (mddb_did_free_t), KM_SLEEP); - did_freep_after->free_blk = did_freep_before->free_blk; - did_freep_after->free_offset = offset + length; - did_freep_after->free_length = old_length - length - - did_freep_before->free_length; - /* - * Add before and after areas to free list - * If area before or after offset, length has length - * of 0, that entry is not added. - */ - if (did_freep_after->free_length) { - did_freep_after->free_next = did_freep1; - if (did_freep2) { - did_freep2->free_next = - did_freep_after; - } else { - s->s_did_icp->did_ic_freep = - did_freep_after; - } - did_freep1 = did_freep_after; - } else { - kmem_free(did_freep_after, - sizeof (mddb_did_free_t)); - } - - if (did_freep_before->free_length) { - did_freep_before->free_next = did_freep1; - if (did_freep2) { - did_freep2->free_next = - did_freep_before; - } else { - s->s_did_icp->did_ic_freep = - did_freep_before; - } - } else { - kmem_free(did_freep_before, - sizeof (mddb_did_free_t)); - } - break; - } else { - did_freep2 = did_freep1; - did_freep1 = did_freep1->free_next; - } - } - if (block_found == 0) { - return (1); - } else { - return (0); - } -} - -/* - * Find free space of devid length and remove free space from list. - * Return a pointer to the previously free area. - * - * If there's not enough free space on the free list, get an empty - * disk block, put the empty disk block on the did_ic_dbp linked list, - * and add the disk block space not used for devid to the free list. - * - * Return pointer to address (inside disk block) of free area for devid. - * Return 0 if error. - */ -static caddr_t -mddb_devid_free_get( - mddb_set_t *s, - uint_t len, - uint_t *blk, - uint_t *cnt, - uint_t *offset -) -{ - mddb_did_free_t *freep, *freep2; - mddb_did_db_t *dbp; - uint_t blk_cnt, blk_num; - ddi_devid_t devid_ptr = NULL; - - if (!(s->s_lbp->lb_flags & MDDB_DEVID_STYLE)) { - return (0); - } - - freep = s->s_did_icp->did_ic_freep; - freep2 = (mddb_did_free_t *)NULL; - while (freep) { - /* found a free area - remove from free list */ - if (len <= freep->free_length) { - *blk = freep->free_blk; - *offset = freep->free_offset; - /* find disk block pointer that contains free area */ - dbp = s->s_did_icp->did_ic_dbp; - while (dbp) { - if (dbp->db_firstblk == *blk) - break; - else - dbp = dbp->db_next; - } - /* - * If a disk block pointer can't be found - something - * is wrong, so don't use this free space. - */ - if (dbp == NULL) { - freep2 = freep; - freep = freep->free_next; - continue; - } - - devid_ptr = (ddi_devid_t)(dbp->db_ptr + *offset); - *cnt = dbp->db_blkcnt; - - /* Update free list information */ - freep->free_offset += len; - freep->free_length -= len; - if (freep->free_length == 0) { - if (freep2) { - freep2->free_next = - freep->free_next; - } else { - s->s_did_icp->did_ic_freep = - freep->free_next; - } - kmem_free(freep, sizeof (mddb_did_free_t)); - } - break; - } - freep2 = freep; - freep = freep->free_next; - } - - /* Didn't find a free spot */ - if (freep == NULL) { - /* get free logical disk blk in replica */ - blk_cnt = btodb(len + (MDDB_BSIZE - 1)); - blk_num = getfreeblks(s, blk_cnt); - if (blk_num == 0) - return (0); - - /* Add disk block to disk block linked list */ - dbp = kmem_zalloc(sizeof (mddb_did_db_t), KM_SLEEP); - dbp->db_firstblk = blk_num; - dbp->db_blkcnt = blk_cnt; - dbp->db_ptr = (caddr_t)kmem_zalloc(dbtob(blk_cnt), KM_SLEEP); - dbp->db_next = s->s_did_icp->did_ic_dbp; - s->s_did_icp->did_ic_dbp = dbp; - devid_ptr = (ddi_devid_t)dbp->db_ptr; - - /* Update return values */ - *blk = blk_num; - *offset = 0; - *cnt = blk_cnt; - - /* Add unused part of block to free list */ - (void) mddb_devid_free_add(s, blk_num, - len, (dbtob(blk_cnt) - len)); - } - - return ((caddr_t)devid_ptr); -} - -/* - * Add device id information for locator index to device id area in set. - * Get free area to store device id from free list. Update checksum - * for mddb_did_blk. - * - * This routine does not write any data out to disk. - * After this routine has been called, the routine, writelocall, should - * be called to write both the locator block and device id area out - * to disk. - */ -static int -mddb_devid_add( - mddb_set_t *s, - uint_t index, - ddi_devid_t devid, - char *minor_name -) -{ - uint_t devid_len; - uint_t blk, offset; - ddi_devid_t devid_ptr; - mddb_did_info_t *did_info; - uint_t blkcnt, i; - mddb_did_blk_t *did_blk; - - if (!(s->s_lbp->lb_flags & MDDB_DEVID_STYLE)) { - return (1); - } - if (strlen(minor_name) > (MDDB_MINOR_NAME_MAX - 1)) - return (1); - - /* Check if device id has already been added */ - did_blk = s->s_did_icp->did_ic_blkp; - did_info = &(did_blk->blk_info[index]); - if (did_info->info_flags & MDDB_DID_EXISTS) - return (0); - - devid_len = ddi_devid_sizeof(devid); - devid_ptr = (ddi_devid_t)mddb_devid_free_get(s, - devid_len, &blk, &blkcnt, &offset); - - if (devid_ptr == NULL) { - return (1); - } - - /* Copy devid into devid free area */ - for (i = 0; i < devid_len; i++) - ((char *)devid_ptr)[i] = ((char *)devid)[i]; - - /* Update mddb_did_info area for new device id */ - did_info->info_flags = MDDB_DID_EXISTS | MDDB_DID_VALID; - - /* - * Only set UPDATED flag for non-replicated import cases. - * This allows the side locator driver name index to get - * updated in load_old_replicas. - */ - if (!(md_get_setstatus(s->s_setno) & MD_SET_REPLICATED_IMPORT)) - did_info->info_flags |= MDDB_DID_UPDATED; - - did_info->info_firstblk = blk; - did_info->info_blkcnt = blkcnt; - did_info->info_offset = offset; - did_info->info_length = devid_len; - (void) strcpy(did_info->info_minor_name, minor_name); - crcgen(devid_ptr, &did_info->info_checksum, devid_len, NULL); - - /* Add device id pointer to did_ic_devid array */ - s->s_did_icp->did_ic_devid[index] = devid_ptr; - - return (0); -} - - -/* - * Delete device id information for locator index from device id area in set. - * Add device id space to free area. - * - * This routine does not write any data out to disk. - * After this routine has been called, the routine, writelocall, should - * be called to write both the locator block and device id area out - * to disk. - */ -static int -mddb_devid_delete(mddb_set_t *s, uint_t index) -{ - mddb_did_info_t *did_info; - mddb_did_blk_t *did_blk; - - if (!(s->s_lbp->lb_flags & MDDB_DEVID_STYLE)) { - return (1); - } - - /* Get device id information from mddb_did_blk */ - did_blk = s->s_did_icp->did_ic_blkp; - did_info = &(did_blk->blk_info[index]); - - /* - * Ensure that the underlying device supports device ids - * before arbitrarily removing them. - */ - if (!(did_info->info_flags & MDDB_DID_EXISTS)) { - return (1); - } - - /* Remove device id information from mddb_did_blk */ - did_info->info_flags = 0; - - /* Remove device id from incore area */ - s->s_did_icp->did_ic_devid[index] = (ddi_devid_t)NULL; - - /* Add new free space in disk block to free list */ - (void) mddb_devid_free_add(s, did_info->info_firstblk, - did_info->info_offset, did_info->info_length); - - return (0); -} - -/* - * Check if there is a device id for a locator index. - * - * Caller of this routine should not free devid or minor_name since - * these will point to internal data structures that should not - * be freed. - */ -static int -mddb_devid_get( - mddb_set_t *s, - uint_t index, - ddi_devid_t *devid, - char **minor_name -) -{ - mddb_did_info_t *did_info; - - if (!(s->s_lbp->lb_flags & MDDB_DEVID_STYLE)) { - return (0); - } - did_info = &(s->s_did_icp->did_ic_blkp->blk_info[index]); - - if (did_info->info_flags & MDDB_DID_EXISTS) { - *devid = s->s_did_icp->did_ic_devid[index]; - *minor_name = - s->s_did_icp->did_ic_blkp->blk_info[index].info_minor_name; - return (1); - } else - return (0); - - -} - -/* - * Check if device id is valid on current system. - * Needs devid, previously known dev_t and current minor_name. - * - * Success: - * Returns 0 if valid device id is found and updates - * dev_t if the dev_t associated with the device id is - * different than dev_t. - * Failure: - * Returns 1 if device id not valid on current system. - */ -static int -mddb_devid_validate(ddi_devid_t devid, md_dev64_t *dev, char *minor_name) -{ - int retndevs; - dev_t *ddi_devs; - int devid_flag = 0; - int cnt; - - if (dev == 0) - return (1); - /* - * See if devid is valid in the current system. - * If so, set dev to match the devid. - */ - if (ddi_lyr_devid_to_devlist(devid, minor_name, - &retndevs, &ddi_devs) == DDI_SUCCESS) { - if (retndevs > 0) { - /* devid is valid to use */ - devid_flag = 1; - /* does dev_t in list match dev */ - cnt = 0; - while (cnt < retndevs) { - if (*dev == md_expldev(ddi_devs[cnt])) - break; - cnt++; - } - /* - * If a different dev_t, then setup - * new dev and new major name - */ - if (cnt == retndevs) { - *dev = md_expldev(ddi_devs[0]); - } - ddi_lyr_free_devlist(ddi_devs, retndevs); - } - } - if (devid_flag) - return (0); - else - return (1); -} - - -/* - * Free the devid incore data areas - */ -static void -mddb_devid_icp_free(mddb_did_ic_t **did_icp, mddb_lb_t *lbp) -{ - mddb_did_free_t *did_freep1, *did_freep2; - mddb_did_db_t *did_dbp1, *did_dbp2; - mddb_did_ic_t *icp = *did_icp; - - if (icp) { - if (icp->did_ic_blkp) { - kmem_free((caddr_t)icp->did_ic_blkp, - dbtob(lbp->lb_didblkcnt)); - icp->did_ic_blkp = (mddb_did_blk_t *)NULL; - } - - if (icp->did_ic_dbp) { - did_dbp1 = icp->did_ic_dbp; - while (did_dbp1) { - did_dbp2 = did_dbp1->db_next; - kmem_free((caddr_t)did_dbp1->db_ptr, - dbtob(did_dbp1->db_blkcnt)); - kmem_free((caddr_t)did_dbp1, - sizeof (mddb_did_db_t)); - did_dbp1 = did_dbp2; - } - } - - if (icp->did_ic_freep) { - did_freep1 = icp->did_ic_freep; - while (did_freep1) { - did_freep2 = did_freep1->free_next; - kmem_free((caddr_t)did_freep1, - sizeof (mddb_did_free_t)); - did_freep1 = did_freep2; - } - } - - kmem_free((caddr_t)icp, sizeof (mddb_did_ic_t)); - *did_icp = (mddb_did_ic_t *)NULL; - } - -} - -static daddr_t -getphysblk( - mddb_block_t blk, - mddb_mb_ic_t *mbip -) -{ - mddb_mb_t *mbp = &(mbip->mbi_mddb_mb); - - while (blk >= mbp->mb_blkcnt) { - if (! mbip->mbi_next) - return ((daddr_t)-1); /* no such block */ - blk -= mbp->mb_blkcnt; - mbip = mbip->mbi_next; - mbp = &(mbip->mbi_mddb_mb); - } - - if (blk >= mbp->mb_blkmap.m_consecutive) - return ((daddr_t)-1); /* no such block */ - - return ((daddr_t)(mbp->mb_blkmap.m_firstblk + blk)); -} - -/* - * when a buf header is passed in the new buffer must be - * put on the front of the chain. writerec counts on it - */ -static int -putblks( - mddb_set_t *s, /* incore db set structure */ - caddr_t buffer, /* adr of buffer to be written */ - daddr_t blk, /* block number for first block */ - int cnt, /* number of blocks to be written */ - md_dev64_t device, /* device to be written to */ - mddb_bf_t **bufhead /* if non-zero then ASYNC I/O */ - /* and put buf address here */ -) -{ - buf_t *bp; - mddb_bf_t *bfp; - int err = 0; - - bfp = allocbuffer(s, MDDB_SLEEPOK); - bp = &bfp->bf_buf; - bp->b_bcount = MDDB_BSIZE * cnt; - bp->b_un.b_addr = buffer; - bp->b_blkno = blk; - bp->b_edev = md_dev64_to_dev(device); - /* - * if a header for a buf chain is passed in this is async io. - * currently only done for optimize records - */ - if (bufhead) { - bfp->bf_next = *bufhead; - *bufhead = bfp; - (void) mddb_rwdata(s, B_WRITE|B_ASYNC, bp); - return (0); - } - err = mddb_rwdata(s, B_WRITE, bp); - freebuffer(s, bfp); - if (err) { - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ERRED, SVM_TAG_REPLICA, - s->s_setno, device); - return (MDDB_F_EWRITE); - } - return (0); -} - -/* - * wrtblklst - takes an array of logical block numbers - * and writes the buffer to those blocks (scatter). - * If called during upgrade, this routine expects a - * non-translated (aka target) dev. - */ -static int -wrtblklst( - mddb_set_t *s, /* incore set structure */ - caddr_t buffer, /* buffer to be written (record blk) */ - mddb_block_t blka[], /* list of logical blks for record */ - daddr_t cnt, /* number of logical blks */ - const int li, /* locator index */ - mddb_bf_t **bufhead, /* if non-zero then ASYNC I/O */ - /* and put buf address here */ - int master_only /* allow only master node to write */ -) -{ - daddr_t blk; - daddr_t blk1; - int err = 0; - int cons; - mddb_lb_t *lbp = s->s_lbp; - mddb_locator_t *lp = &lbp->lb_locators[li]; - md_dev64_t dev; - mddb_mb_ic_t *mbip = s->s_mbiarray[li]; - - /* - * If a MN diskset and only the master can write, - * then a non-master node will just return success. - */ - if (lbp->lb_flags & MDDB_MNSET) { - if (master_only == MDDB_WR_ONLY_MASTER) { - /* return successfully if we aren't the master */ - if (!(md_set[s->s_setno].s_am_i_master)) { - return (0); - } - } - if (mbip == NULL) - return (MDDB_F_EWRITE); - } - - dev = md_xlate_targ_2_mini(md_expldev(lp->l_dev)); - if (dev == NODEV64) { - return (1); - } - - blk = getphysblk(blka[0], mbip); - ASSERT(blk >= 0); - - cons = 1; - while (cnt) { - if (cons != cnt) { - blk1 = getphysblk(blka[cons], mbip); - ASSERT(blk1 >= 0); - if ((blk + cons) == blk1) { - cons++; - continue; - } - } - if (err = putblks(s, buffer, blk, cons, dev, bufhead)) { - /* - * If an MN diskset and any_node_can_write - * then this request is coming from writeoptrecord - * and l_flags field should not be updated. - * l_flags will be updated as a result of sending - * a class1 message to the master. Setting l_flags - * here will cause slave to be out of sync with - * master. - * - * Otherwise, set the error in l_flags - * (this occurs if this is not a MN diskset or - * only_master_can_write is set). - */ - if ((!(lbp->lb_flags & MDDB_MNSET)) || - (master_only == MDDB_WR_ONLY_MASTER)) { - lp->l_flags |= MDDB_F_EWRITE; - } - return (err); - } - if (bufhead) - (*bufhead)->bf_locator = lp; - - buffer += MDDB_BSIZE * cons; - cnt -= cons; - blka += cons; - if (cnt) { - blk = getphysblk(blka[0], mbip); - ASSERT(blk >= 0); - } - cons = 1; - } - - return (0); -} - -/* - * writeblks - takes a logical block number/block count pair - * and writes the buffer to those contiguous logical blocks. - * If called during upgrade, this routine expects a non-translated - * (aka target) dev. - */ -static int -writeblks( - mddb_set_t *s, /* incore set structure */ - caddr_t buffer, /* buffer to be written */ - mddb_block_t blk, /* starting logical block number */ - int cnt, /* number of log blocks to be written */ - const int li, /* locator index */ - int master_only /* allow only master node to write */ -) -{ - daddr_t physblk; - int err = 0; - int i; - mddb_lb_t *lbp = s->s_lbp; - mddb_locator_t *lp = &lbp->lb_locators[li]; - md_dev64_t dev; - mddb_block_t *blkarray; - int size; - int ret; - - /* - * If a MN diskset and only the master can write, - * then a non-master node will just return success. - */ - if ((lbp->lb_flags & MDDB_MNSET) && - (master_only == MDDB_WR_ONLY_MASTER)) { - /* return successfully if we aren't the master */ - if (!(md_set[s->s_setno].s_am_i_master)) { - return (0); - } - } - - dev = md_xlate_targ_2_mini(md_expldev(lp->l_dev)); - if (dev == NODEV64) { - return (1); - } - - if (cnt > 1) { - size = sizeof (mddb_block_t) * cnt; - blkarray = (mddb_block_t *)kmem_alloc(size, KM_SLEEP); - for (i = 0; i < cnt; i++) - blkarray[i] = blk + i; - ret = wrtblklst(s, buffer, blkarray, cnt, - li, 0, MDDB_WR_ONLY_MASTER); - kmem_free(blkarray, size); - return (ret); - } - physblk = getphysblk(blk, s->s_mbiarray[li]); - ASSERT(physblk > 0); - if (err = putblks(s, buffer, physblk, 1, dev, (mddb_bf_t **)0)) { - lp->l_flags |= MDDB_F_EWRITE; - return (err); - } - return (0); -} - -/* - * writeall - will write the buffer to all ACTIVE/NON-ERRORED replicas. - */ -static int -writeall( - mddb_set_t *s, /* incore set structure */ - caddr_t buffer, /* buffer to be written */ - mddb_block_t block, /* starting logical block number */ - int cnt, /* number of log blocks to be written */ - int master_only /* allow only master node to write */ -) -{ - int li; - int err = 0; - mddb_lb_t *lbp = s->s_lbp; - - for (li = 0; li < lbp->lb_loccnt; li++) { - mddb_locator_t *lp = &lbp->lb_locators[li]; - - if ((! (lp->l_flags & MDDB_F_ACTIVE)) || - (lp->l_flags & MDDB_F_EWRITE)) - continue; - - err |= writeblks(s, buffer, block, cnt, li, master_only); - } - - return (err); -} - -/* - * writelocall - write the locator block and device id information (if - * replica is in device id format) to all ACTIVE/NON-ERRORER replicas. - * - * Increments the locator block's commitcnt. Updates the device id area's - * commitcnt if the replica is in device id format. Regenerates the - * checksums after updating the commitcnt(s). - */ -static int -writelocall( - mddb_set_t *s /* incore set structure */ -) -{ - int li; - int err = 0; - mddb_lb_t *lbp = s->s_lbp; - mddb_did_blk_t *did_blk; - mddb_did_db_t *did_dbp; - - s->s_lbp->lb_commitcnt++; - if (lbp->lb_flags & MDDB_DEVID_STYLE) { - did_blk = s->s_did_icp->did_ic_blkp; - did_blk->blk_commitcnt = s->s_lbp->lb_commitcnt; - crcgen(did_blk, &did_blk->blk_checksum, - dbtob(lbp->lb_didblkcnt), NULL); - } - crcgen(lbp, &lbp->lb_checksum, dbtob(lbp->lb_blkcnt), NULL); - - for (li = 0; li < lbp->lb_loccnt; li++) { - mddb_locator_t *lp = &lbp->lb_locators[li]; - - if ((! (lp->l_flags & MDDB_F_ACTIVE)) || - (lp->l_flags & MDDB_F_EWRITE)) - continue; - - if (lbp->lb_flags & MDDB_DEVID_STYLE) { - /* write out blocks containing actual device ids */ - did_dbp = s->s_did_icp->did_ic_dbp; - while (did_dbp) { - err |= writeblks(s, (caddr_t)did_dbp->db_ptr, - did_dbp->db_firstblk, - did_dbp->db_blkcnt, li, - MDDB_WR_ONLY_MASTER); - did_dbp = did_dbp->db_next; - } - - /* write out device id area block */ - err |= writeblks(s, (caddr_t)did_blk, - lbp->lb_didfirstblk, lbp->lb_didblkcnt, li, - MDDB_WR_ONLY_MASTER); - } - /* write out locator block */ - err |= writeblks(s, (caddr_t)lbp, 0, lbp->lb_blkcnt, li, - MDDB_WR_ONLY_MASTER); - } - - /* - * If a MN diskset and this is the master, set the PARSE_LOCBLK flag - * in the mddb_set structure to show that the locator block has - * been changed. - */ - - if ((lbp->lb_flags & MDDB_MNSET) && - (md_set[s->s_setno].s_am_i_master)) { - s->s_mn_parseflags |= MDDB_PARSE_LOCBLK; - } - return (err); -} - -/* - * If called during upgrade, this routine expects a translated - * (aka miniroot) dev. - */ -static int -getblks( - mddb_set_t *s, /* incore db set structure */ - caddr_t buffer, /* buffer to read data into */ - md_dev64_t device, /* device to read from */ - daddr_t blk, /* physical block number to read */ - int cnt, /* number of blocks to read */ - int flag /* flags for I/O */ -) -{ - buf_t *bp; - mddb_bf_t *bfp; - int err = 0; - - bfp = allocbuffer(s, MDDB_SLEEPOK); /* this will never sleep */ - bp = &bfp->bf_buf; - bp->b_bcount = MDDB_BSIZE * cnt; - bp->b_un.b_addr = buffer; - bp->b_blkno = blk; - bp->b_edev = md_dev64_to_dev(device); - err = mddb_rwdata(s, (B_READ | flag), bp); - freebuffer(s, bfp); - if (err) { - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ERRED, SVM_TAG_REPLICA, - s->s_setno, device); - return (MDDB_F_EREAD); - } - return (0); -} - -/* - * readblklst - takes an array of logical block numbers - * and reads those blocks (gather) into the buffer. - * If called during upgrade, this routine expects a non-translated - * (aka target) dev. - */ -static int -readblklst( - mddb_set_t *s, /* incore set structure */ - caddr_t buffer, /* buffer to be read (record block) */ - mddb_block_t blka[], /* list of logical blocks to be read */ - daddr_t cnt, /* number of logical blocks */ - int li, /* locator index */ - int flag /* flags for I/O */ -) -{ - daddr_t blk; - daddr_t blk1; - int err = 0; - int cons; - md_dev64_t dev; - mddb_mb_ic_t *mbip; - - mbip = s->s_mbiarray[li]; - dev = md_expldev(s->s_lbp->lb_locators[li].l_dev); - dev = md_xlate_targ_2_mini(dev); - if (dev == NODEV64) { - return (1); - } - - blk = getphysblk(blka[0], mbip); - ASSERT(blk >= 0); - - cons = 1; - while (cnt) { - if (cons != cnt) { - blk1 = getphysblk(blka[cons], mbip); - ASSERT(blk1 >= 0); - if ((blk + cons) == blk1) { - cons++; - continue; - } - } - if (err = getblks(s, buffer, dev, blk, cons, flag)) - return (err); - buffer += MDDB_BSIZE * cons; - cnt -= cons; - blka += cons; - if (cnt) { - blk = getphysblk(blka[0], mbip); - ASSERT(blk >= 0); - } - cons = 1; - } - return (0); -} - -/* - * readblks - takes a logical block number/block count pair - * and reads those contiguous logical blocks into the buffer. - * If called during upgrade, this routine expects a non-translated - * (aka target) dev. - */ -static int -readblks( - mddb_set_t *s, /* incore set structure */ - caddr_t buffer, /* buffer to be read into */ - mddb_block_t blk, /* logical block number to be read */ - int cnt, /* number of logical blocks to be read */ - int li /* locator index */ -) -{ - daddr_t physblk; - md_dev64_t device; - int i; - mddb_block_t *blkarray; - int size; - int ret; - - if (cnt > 1) { - size = sizeof (mddb_block_t) * cnt; - blkarray = (mddb_block_t *)kmem_alloc(size, KM_SLEEP); - for (i = 0; i < cnt; i++) - blkarray[i] = blk + i; - ret = readblklst(s, buffer, blkarray, cnt, li, 0); - kmem_free(blkarray, size); - return (ret); - } - physblk = getphysblk(blk, s->s_mbiarray[li]); - ASSERT(physblk > 0); - device = md_expldev(s->s_lbp->lb_locators[li].l_dev); - device = md_xlate_targ_2_mini(device); - if (device == NODEV64) { - return (1); - } - return (getblks(s, buffer, device, physblk, 1, 0)); -} - -static void -single_thread_start( - mddb_set_t *s -) -{ - while (s->s_singlelockgotten) { - s->s_singlelockwanted++; - cv_wait(&s->s_single_thread_cv, SETMUTEX(s->s_setno)); - } - s->s_singlelockgotten++; -} - -static void -single_thread_end( - mddb_set_t *s -) -{ - ASSERT(s->s_singlelockgotten); - s->s_singlelockgotten = 0; - if (s->s_singlelockwanted) { - s->s_singlelockwanted = 0; - cv_broadcast(&s->s_single_thread_cv); - } -} - -static size_t -sizeofde( - mddb_de_ic_t *dep -) -{ - size_t size; - - size = sizeof (mddb_de_ic_t) - sizeof (mddb_block_t) + - sizeof (mddb_block_t) * dep->de_blkcount; - return (size); -} - -static size_t -sizeofde32( - mddb_de32_t *dep -) -{ - size_t size; - - size = sizeof (*dep) - sizeof (dep->de32_blks) + - sizeof (mddb_block_t) * dep->de32_blkcount; - return (size); -} - -static mddb_de32_t * -nextentry( - mddb_de32_t *dep -) -{ - mddb_de32_t *ret; - - ret = (mddb_de32_t *)((void *)((caddr_t)dep + sizeofde32(dep))); - return (ret); -} - -static void -create_db32rec( - mddb_db32_t *db32p, - mddb_db_t *dbp -) -{ - mddb_de_ic_t *dep; - mddb_de32_t *de32p; - -#if defined(_ILP32) && !defined(lint) - ASSERT(sizeof (mddb_de_t) == sizeof (mddb_de32_t)); - ASSERT(sizeof (mddb_db_t) == sizeof (mddb_db32_t)); -#endif - - dbtodb32(dbp, db32p); - if ((dbp->db_firstentry != NULL) && (db32p->db32_firstentry == 0)) - db32p->db32_firstentry = 0x4; - de32p = (mddb_de32_t *)((void *) ((caddr_t)(&db32p->db32_firstentry) - + sizeof (db32p->db32_firstentry))); - for (dep = dbp->db_firstentry; dep; dep = dep->de_next) { - detode32(dep, de32p); - if ((dep->de_next != NULL) && (de32p->de32_next == 0)) - de32p->de32_next = 0x4; - de32p = nextentry(de32p); - } - ASSERT((uintptr_t)de32p <= (uintptr_t)de32p + MDDB_BSIZE); -} - -/* - * If called during upgrade, this routine expects a translated - * (aka miniroot) dev. - * If master blocks are found, set the mn_set parameter to 1 if the - * the master block revision number is MDDB_REV_MNMB; otherwise, - * set it to 0. - * If master blocks are not found, do not change the mnset parameter. - */ -static mddb_mb_ic_t * -getmasters( - mddb_set_t *s, - md_dev64_t dev, - daddr_t blkno, - uint_t *flag, - int *mn_set -) -{ - mddb_mb_ic_t *mbi = NULL; - mddb_mb_t *mb; - int error = 0; - ddi_devid_t devid; - - - if (mddb_devopen(dev)) { - if (flag) - *flag |= MDDB_F_EMASTER; - return ((mddb_mb_ic_t *)NULL); - } - - - mbi = (mddb_mb_ic_t *)kmem_zalloc(MDDB_IC_BSIZE, KM_SLEEP); - mb = &(mbi->mbi_mddb_mb); - if (error = getblks(s, (caddr_t)mb, dev, blkno, - btodb(MDDB_BSIZE), 0)) { - error |= MDDB_F_EMASTER; - } - if (mb->mb_magic != MDDB_MAGIC_MB) { - error = MDDB_F_EFMT | MDDB_F_EMASTER; - } - /* Check for MDDB_REV_MNMB and lower */ - if (revchk(MDDB_REV_MNMB, mb->mb_revision)) { - error = MDDB_F_EFMT | MDDB_F_EMASTER; - } - if (crcchk(mb, &mb->mb_checksum, MDDB_BSIZE, NULL)) { - error = MDDB_F_EFMT | MDDB_F_EMASTER; - } - - if (!(md_get_setstatus(s->s_setno) & - (MD_SET_IMPORT | MD_SET_REPLICATED_IMPORT)) && - (mb->mb_setno != s->s_setno)) { - error = MDDB_F_EFMT | MDDB_F_EMASTER; - } - if (mb->mb_blkno != blkno) { - error = MDDB_F_EFMT | MDDB_F_EMASTER; - } - mb->mb_next = NULL; - mbi->mbi_next = NULL; - - if (error) - goto out; - - /* - * Check the md_devid_destroy and md_keep_repl_state flags - * to see if we need to regen the devid or not. - * - * Don't care about devid in local set since it is not used - * and this should not be part of set importing - */ - if ((s->s_setno != MD_LOCAL_SET) && - !(md_get_setstatus(s->s_setno) & - (MD_SET_IMPORT | MD_SET_REPLICATED_IMPORT))) { - /* - * Now check the destroy flag. We also need to handle - * the case where the destroy flag is reset after the - * destroy - */ - if (md_devid_destroy || (mb->mb_devid_len == 0)) { - - if (md_devid_destroy) { - bzero(mb->mb_devid, mb->mb_devid_len); - mb->mb_devid_len = 0; - } - - /* - * Try to regenerate it if the 'keep' flag is not set - */ - if (!md_keep_repl_state) { - if (ddi_lyr_get_devid(md_dev64_to_dev(dev), - &devid) == DDI_SUCCESS) { - mb->mb_devid_len = - ddi_devid_sizeof(devid); - bcopy(devid, mb->mb_devid, - mb->mb_devid_len); - ddi_devid_free(devid); - } else { - error = MDDB_F_EFMT | MDDB_F_EMASTER; - } - } - - crcgen(mb, &mb->mb_checksum, MDDB_BSIZE, NULL); - - /* - * Push - */ - if (putblks(s, (caddr_t)mb, blkno, 1, dev, 0) != 0) { - error = MDDB_F_EFMT | MDDB_F_EMASTER; - } - } - } - - if (! error) { - /* Set mn_set parameter to 1 if a MN set */ - if (mb->mb_revision == MDDB_REV_MNMB) - *mn_set = 1; - else - *mn_set = 0; - return (mbi); - } - -out: - /* Error Out */ - if (flag) - *flag |= error; - - kmem_free((caddr_t)mbi, MDDB_IC_BSIZE); - mddb_devclose(dev); - return ((mddb_mb_ic_t *)NULL); -} - -static int -getrecord( - mddb_set_t *s, - mddb_de_ic_t *dep, - int li -) -{ - int err = 0; - mddb_rb32_t *rbp; - -#if defined(_ILP32) && !defined(lint) - ASSERT(sizeof (mddb_rb_t) == sizeof (mddb_rb32_t)); -#endif - - - dep->de_rb = (mddb_rb32_t *)kmem_zalloc(dep->de_recsize, KM_SLEEP); - rbp = dep->de_rb; - - err = readblklst(s, (caddr_t)rbp, dep->de_blks, - dep->de_blkcount, li, 0); - if (err) { - return (MDDB_F_EDATA | err); - } - if (rbp->rb_magic != MDDB_MAGIC_RB) { - return (MDDB_F_EFMT | MDDB_F_EDATA); - } - if ((revchk(MDDB_REV_RB, rbp->rb_revision) != 0) && - (revchk(MDDB_REV_RB64, rbp->rb_revision) != 0) && - (revchk(MDDB_REV_RBFN, rbp->rb_revision) != 0) && - (revchk(MDDB_REV_RB64FN, rbp->rb_revision) != 0)) { - return (MDDB_F_EFMT | MDDB_F_EDATA); - } - /* Check crc for this record */ - if (rec_crcchk(s, dep, rbp)) { - return (MDDB_F_EFMT | MDDB_F_EDATA); - } - return (0); -} - -/* - * Code to read in the locator name information - */ -static int -readlocnames( - mddb_set_t *s, - int li -) -{ - mddb_ln_t *lnp; - int err = 0; - mddb_block_t ln_blkcnt, ln_blkno; - - /* - * read in the locator name blocks - */ - s->s_lnp = NULL; - - ln_blkno = s->s_lbp->lb_lnfirstblk; - ln_blkcnt = s->s_lbp->lb_lnblkcnt; - lnp = (mddb_ln_t *)kmem_zalloc(dbtob(ln_blkcnt), KM_SLEEP); - - err = readblks(s, (caddr_t)lnp, ln_blkno, ln_blkcnt, li); - if (err) { - err |= MDDB_F_EDATA; - goto out; - } - if (lnp->ln_magic != MDDB_MAGIC_LN) { - err = MDDB_F_EDATA | MDDB_F_EFMT; - goto out; - } - if (s->s_lbp->lb_flags & MDDB_MNSET) { - if (revchk(MDDB_REV_MNLN, lnp->ln_revision)) { - err = MDDB_F_EDATA | MDDB_F_EFMT; - goto out; - } - } else { - if (revchk(MDDB_REV_LN, lnp->ln_revision)) { - err = MDDB_F_EDATA | MDDB_F_EFMT; - goto out; - } - } - if (crcchk(lnp, &lnp->ln_checksum, dbtob(ln_blkcnt), NULL)) { - err = MDDB_F_EDATA | MDDB_F_EFMT; - goto out; - } -out: - /* - * if error occurred in locator name blocks free them - * and return - */ - if (err) { - kmem_free((caddr_t)lnp, dbtob(ln_blkcnt)); - return (err); - } - s->s_lnp = lnp; - return (0); -} - -/* - * code to read in a copy of the database. - */ - -static int -readcopy( - mddb_set_t *s, - int li -) -{ - uint_t blk; - mddb_db_t *dbp, *dbp1, *dbhp; - mddb_db32_t *db32p; - mddb_de_ic_t *dep, *dep2; - mddb_de32_t *de32p, *de32p2; - int err = 0; - uint_t checksum; - - -#if defined(_ILP32) && !defined(lint) - ASSERT(sizeof (mddb_de_t) == sizeof (mddb_de32_t)); - ASSERT(sizeof (mddb_db_t) == sizeof (mddb_db32_t)); -#endif - - dbp = NULL; - dbhp = NULL; - /* - * read in all the directory blocks - */ - blk = s->s_lbp->lb_dbfirstblk; - db32p = (mddb_db32_t *)kmem_zalloc(MDDB_BSIZE, KM_SLEEP); - - for (; blk != 0; blk = dbp->db_nextblk) { - dbp1 = (mddb_db_t *)kmem_zalloc(sizeof (mddb_db_t), KM_SLEEP); - if (! dbhp) { - dbhp = dbp1; - } else { - dbp->db_next = dbp1; - } - dbp = dbp1; - - err = readblks(s, (caddr_t)db32p, blk, 1, li); - if (err) { - err |= MDDB_F_EDATA; - break; - } - db32todb(db32p, dbp); - if (db32p->db32_magic != MDDB_MAGIC_DB) { - err = MDDB_F_EDATA | MDDB_F_EFMT; - break; - } - if (revchk(MDDB_REV_DB, db32p->db32_revision)) { - err = MDDB_F_EDATA | MDDB_F_EFMT; - break; - } - if (crcchk(db32p, &db32p->db32_checksum, MDDB_BSIZE, NULL)) { - err = MDDB_F_EDATA | MDDB_F_EFMT; - break; - } - /* - * first go through and fix up all de_next pointers - */ - if (dbp->db_firstentry) { - - de32p = (mddb_de32_t *) - ((void *) ((caddr_t)(&db32p->db32_firstentry) - + sizeof (db32p->db32_firstentry))); - - dep = (mddb_de_ic_t *) - kmem_zalloc(sizeof (mddb_de_ic_t) - - sizeof (mddb_block_t) + - sizeof (mddb_block_t) * de32p->de32_blkcount, - KM_SLEEP); - de32tode(de32p, dep); - - dbp->db_firstentry = dep; - while (de32p && de32p->de32_next) { - - de32p2 = nextentry(de32p); - - dep2 = (mddb_de_ic_t *)kmem_zalloc( - sizeof (mddb_de_ic_t) - - sizeof (mddb_block_t) + - sizeof (mddb_block_t) * - de32p2->de32_blkcount, KM_SLEEP); - - de32tode(de32p2, dep2); - - dep->de_next = dep2; - dep = dep2; - de32p = de32p2; - } - } - /* - * go through and make all of the pointer to record blocks - * are null; - */ - for (dep = dbp->db_firstentry; dep != NULL; dep = dep->de_next) - dep->de_rb = NULL; - } - kmem_free((caddr_t)db32p, MDDB_BSIZE); - dbp->db_next = NULL; - /* - * if error occurred in directory blocks free them - * and return - */ - if (err) { - dbp = dbhp; - while (dbp) { - dep = dbp->db_firstentry; - while (dep) { - /* No mddb_rb32_t structures yet */ - dep2 = dep->de_next; - kmem_free((caddr_t)dep, sizeofde(dep)); - dep = dep2; - } - dbp1 = dbp->db_next; - kmem_free((caddr_t)dbp, sizeof (mddb_db_t)); - dbp = dbp1; - } - s->s_dbp = NULL; - return (err); - - } - /* - */ - err = 0; - checksum = MDDB_GLOBAL_XOR; - for (dbp = dbhp; dbp != NULL; dbp = dbp->db_next) { - checksum ^= dbp->db_recsum; - for (dep = dbp->db_firstentry; dep; dep = dep->de_next) { - if (dep->de_flags & MDDB_F_OPT) - continue; - err = getrecord(s, dep, li); - if (err) - break; - /* Don't include CHANGELOG in big XOR */ - if (dep->de_flags & MDDB_F_CHANGELOG) - continue; - checksum ^= dep->de_rb->rb_checksum; - checksum ^= dep->de_rb->rb_checksum_fiddle; - } - if (err) - break; - } - if (checksum) { - if (! err) - err = MDDB_F_EDATA | MDDB_F_EFMT; - } - if (err) { - dbp = dbhp; - dbhp = NULL; - while (dbp) { - dep = dbp->db_firstentry; - while (dep) { - if (dep->de_rb) - kmem_free((caddr_t)dep->de_rb, - dep->de_recsize); - dep2 = dep->de_next; - kmem_free((caddr_t)dep, sizeofde(dep)); - dep = dep2; - } - dbp1 = dbp->db_next; - kmem_free((caddr_t)dbp, sizeof (mddb_db_t)); - dbp = dbp1; - } - } - s->s_dbp = dbhp; - return (err); -} - -static int -getoptcnt( - mddb_set_t *s, - int li) -{ - int result; - mddb_de_ic_t *dep; - mddb_db_t *dbp; - -#if defined(_ILP32) && !defined(lint) - ASSERT(sizeof (mddb_de_t) == sizeof (mddb_de32_t)); - ASSERT(sizeof (mddb_db_t) == sizeof (mddb_db32_t)); -#endif - - result = 0; - for (dbp = s->s_dbp; dbp != NULL; dbp = dbp->db_next) { - dep = dbp->db_firstentry; - for (; dep != NULL; dep = dep->de_next) { - if (! (dep->de_flags & MDDB_F_OPT)) - continue; - if (((dep->de_optinfo[0].o_flags & MDDB_F_ACTIVE) && - (li == dep->de_optinfo[0].o_li)) || - ((dep->de_optinfo[1].o_flags & MDDB_F_ACTIVE) && - (li == dep->de_optinfo[1].o_li))) - result++; - } - } - return (result); -} - -static void -getoptdev( - mddb_set_t *s, - mddb_de_ic_t *rdep, - int opti -) -{ - mddb_lb_t *lbp; - mddb_locator_t *lp; - mddb_optinfo_t *otherop; - mddb_optinfo_t *resultop; - int li; - dev_t otherdev; - int blkonly = 0; - int mincnt; - int thiscnt; - - lbp = s->s_lbp; - - resultop = &rdep->de_optinfo[opti]; - otherop = &rdep->de_optinfo[1-opti]; - - resultop->o_flags = 0; - - /* - * scan through and see if data bases have to vary by only device - */ - - if (otherop->o_flags & MDDB_F_ACTIVE) { - blkonly = 1; - otherdev = expldev(lbp->lb_locators[otherop->o_li].l_dev); - for (li = 0; li < lbp->lb_loccnt; li++) { - lp = &lbp->lb_locators[li]; - if (! (lp->l_flags & MDDB_F_ACTIVE)) - continue; - if (expldev(lp->l_dev) != otherdev) { - blkonly = 0; - break; - } - } - } - - mincnt = 999999; - for (li = 0; li < lbp->lb_loccnt; li++) { - dev_info_t *devi; - int removable = 0; - - lp = &lbp->lb_locators[li]; - if (! (lp->l_flags & MDDB_F_ACTIVE)) - continue; - if (otherop->o_flags & MDDB_F_ACTIVE) { - if (blkonly) { - if (otherop->o_li == li) - continue; - } else { - if (otherdev == expldev(lp->l_dev)) - continue; - } - } - - /* - * Check if this is a removable device. If it is we - * assume it is something like a USB flash disk, a zip disk - * or even a floppy that is being used to help maintain - * mddb quorum. We don't want to put any optimized resync - * records on these kinds of disks since they are usually - * slower or don't have the same read/write lifetimes as - * a regular fixed disk. - */ - if ((devi = e_ddi_hold_devi_by_dev(lp->l_dev, 0)) != NULL) { - int error; - struct cb_ops *cb; - ddi_prop_op_t prop_op = PROP_LEN_AND_VAL_BUF; - int propvalue = 0; - int proplength = sizeof (int); - - if ((cb = devopsp[getmajor(lp->l_dev)]->devo_cb_ops) - != NULL) { - error = (*cb->cb_prop_op)(DDI_DEV_T_ANY, devi, - prop_op, DDI_PROP_NOTPROM | - DDI_PROP_DONTPASS, "removable-media", - (caddr_t)&propvalue, &proplength); - - if (error == DDI_PROP_SUCCESS) - removable = 1; - } - - ddi_release_devi(devi); - } - - if (removable) - continue; - - thiscnt = getoptcnt(s, li); - if (thiscnt < mincnt) { - resultop->o_li = li; - mincnt = thiscnt; - resultop->o_flags = MDDB_F_ACTIVE; - } - } -} - -static void -allocuserdata( - mddb_de_ic_t *dep -) -{ - mddb_rb32_t *rbp; - -#if defined(_ILP32) && !defined(lint) - ASSERT(sizeof (mddb_rb_t) == sizeof (mddb_rb32_t)); -#endif - - rbp = dep->de_rb; - rbp->rb_private = 0; - dep->de_rb_userdata = kmem_zalloc(dep->de_reqsize, KM_SLEEP); - rbp->rb_userdata = 0x4; /* Make sure this is non-zero */ - bcopy((caddr_t)rbp->rb_data, dep->de_rb_userdata, dep->de_reqsize); -} - - -static void -getuserdata( - set_t setno, - mddb_de_ic_t *dep -) -{ - mddb_rb32_t *rbp; - - - mddb_type_t type = dep->de_type1; - caddr_t data, udata; - -#if defined(_ILP32) && !defined(lint) - ASSERT(sizeof (mddb_rb_t) == sizeof (mddb_rb32_t)); -#endif - rbp = dep->de_rb; - data = (caddr_t)rbp->rb_data; - udata = (caddr_t)dep->de_rb_userdata; - - /* - * If it's a driver record, and an old style record, and not a DRL - * record, we must convert it because it was incore as a 64 bit - * structure but its on disk layout has only 32 bit for block sizes - */ - if (!(md_get_setstatus(setno) & - (MD_SET_IMPORT | MD_SET_REPLICATED_IMPORT)) && - (type >= MDDB_FIRST_MODID) && - ((rbp->rb_revision == MDDB_REV_RB) || - (rbp->rb_revision == MDDB_REV_RBFN))) { - - switch (dep->de_flags) { - - case MDDB_F_STRIPE: - stripe_convert(data, udata, BIG_2_SMALL); - break; - - case MDDB_F_MIRROR: - mirror_convert(data, udata, BIG_2_SMALL); - break; - - case MDDB_F_RAID: - raid_convert(data, udata, BIG_2_SMALL); - break; - - case MDDB_F_SOFTPART: - softpart_convert(data, udata, BIG_2_SMALL); - break; - - case MDDB_F_TRANS_MASTER: - trans_master_convert(data, udata, BIG_2_SMALL); - break; - - case MDDB_F_TRANS_LOG: - trans_log_convert(data, udata, BIG_2_SMALL); - break; - - case MDDB_F_HOTSPARE: - hs_convert(data, udata, BIG_2_SMALL); - break; - - case MDDB_F_OPT: - default: - bcopy(udata, data, dep->de_reqsize); - } - } else { - bcopy(udata, data, dep->de_reqsize); - } -} - -static void -getoptrecord( - mddb_set_t *s, - mddb_de_ic_t *dep -) -{ - mddb_lb_t *lbp; - mddb_locator_t *lp; - mddb_rb32_t *rbp, *crbp; - int li; - int i; - int err = 0; - size_t recsize; - -#if defined(_ILP32) && !defined(lint) - ASSERT(sizeof (mddb_rb_t) == sizeof (mddb_rb32_t)); -#endif - - lbp = s->s_lbp; - - recsize = dep->de_recsize; - dep->de_rb = (mddb_rb32_t *)kmem_zalloc(recsize, KM_SLEEP); - rbp = dep->de_rb; - crbp = (mddb_rb32_t *)kmem_zalloc(recsize, KM_SLEEP); - - dep->de_optinfo[0].o_flags |= MDDB_F_EDATA; - dep->de_optinfo[1].o_flags |= MDDB_F_EDATA; - - for (i = 0; i < 2; i++) { - if (! (dep->de_optinfo[i].o_flags & MDDB_F_ACTIVE)) - continue; - li = dep->de_optinfo[i].o_li; - lp = &lbp->lb_locators[li]; - - if (! (lp->l_flags & MDDB_F_ACTIVE) || - (lp->l_flags & MDDB_F_EMASTER)) - continue; - - err = readblklst(s, (caddr_t)rbp, dep->de_blks, - dep->de_blkcount, li, 0); - - if (err) - continue; - - if (rbp->rb_magic != MDDB_MAGIC_RB) - continue; - - if (revchk(MDDB_REV_RB, rbp->rb_revision)) - continue; - - /* Check the crc for this record */ - if (rec_crcchk(s, dep, rbp)) { - continue; - } - - dep->de_optinfo[i].o_flags = MDDB_F_ACTIVE; - - if (rbp == crbp) { - if (rbp->rb_checksum != crbp->rb_checksum) - dep->de_optinfo[1].o_flags |= MDDB_F_EDATA; - break; - } - rbp = crbp; - } - - if (rbp == crbp) { - rbp->rb_private = 0; - kmem_free((caddr_t)crbp, recsize); - return; - } - bzero((caddr_t)rbp, recsize); - rbp->rb_magic = MDDB_MAGIC_RB; - rbp->rb_revision = MDDB_REV_RB; - uniqtime32(&rbp->rb_timestamp); - /* Generate the crc for this record */ - rec_crcgen(s, dep, rbp); - kmem_free((caddr_t)crbp, recsize); -} - -/* - * writeoptrecord writes out an optimized record. - */ -static int -writeoptrecord( - mddb_set_t *s, - mddb_de_ic_t *dep -) -{ - mddb_rb32_t *rbp; - int li; - int err = 0, wrt_err = 0; - mddb_bf_t *bufhead, *bfp; - mddb_lb_t *lbp = s->s_lbp; - mddb_locator_t *lp; - int i; - -#if defined(_ILP32) && !defined(lint) - ASSERT(sizeof (mddb_rb_t) == sizeof (mddb_rb32_t)); -#endif - - bufhead = NULL; - err = 0; - - while (s->s_opthavequeuinglck) { - s->s_optwantqueuinglck++; - cv_wait(&s->s_optqueuing_cv, SETMUTEX(s->s_setno)); - } - s->s_opthavequeuinglck++; - rbp = dep->de_rb; - for (i = 0; i < 2; i++) { - /* - * only possible error is xlate. This can - * occur if a replica was off line and came - * back. During the mean time the database grew - * large than the now on line replica can store - */ - if (! (dep->de_optinfo[i].o_flags & MDDB_F_ACTIVE)) - continue; - li = dep->de_optinfo[i].o_li; - /* - * In a MN diskset, any node can write optimized record(s). - */ - wrt_err = wrtblklst(s, (caddr_t)rbp, dep->de_blks, - dep->de_blkcount, li, &bufhead, MDDB_WR_ANY_NODE); - /* - * For MN diskset, set error in optinfo structure so - * that mddb_commitrec knows which replica failed. - */ - if ((MD_MNSET_SETNO(s->s_setno)) && - (wrt_err & MDDB_F_EWRITE)) { - dep->de_optinfo[i].o_flags |= MDDB_F_EWRITE; - } - err |= wrt_err; - } - s->s_opthavequeuinglck = 0; - if (s->s_optwantqueuinglck) { - s->s_optwantqueuinglck = 0; - cv_broadcast(&s->s_optqueuing_cv); - } - for (bfp = bufhead; bfp; bfp = bufhead) { - mutex_exit(SETMUTEX(s->s_setno)); - (void) biowait(&bfp->bf_buf); - mutex_enter(SETMUTEX(s->s_setno)); - if (bfp->bf_buf.b_flags & B_ERROR) { - /* - * If an MN diskset, don't set replica - * in error since this hasn't been set in master. - * Setting replica in error before master could - * leave the nodes with different views of the - * world since a class 1 configuration change - * could occur in mddb_commitrec as soon as - * all locks are dropped. Must keep this - * node the same as master and can't afford a - * failure from the class 1 config change - * if master succeeded. - */ - if (!(MD_MNSET_SETNO(s->s_setno))) { - bfp->bf_locator->l_flags |= MDDB_F_EWRITE; - } else { - /* - * Find which de_optinfo (which replica) - * had a failure and set the failure in - * the o_flags field. - */ - lp = &lbp->lb_locators[dep->de_optinfo[0].o_li]; - if (lp == bfp->bf_locator) { - dep->de_optinfo[0].o_flags |= - MDDB_F_EWRITE; - } else { - dep->de_optinfo[1].o_flags |= - MDDB_F_EWRITE; - } - } - err |= MDDB_F_EWRITE; - } - bufhead = bfp->bf_next; - freebuffer(s, bfp); - } - return (err); -} - -/* - * Fix up the optimized resync record. Used in the traditional and local - * disksets to move an optimized record from a failed or deleted mddb - * to an active one. - * - * In a MN diskset, the fixing of the optimized record is split between - * the master and slave nodes. If the master node moves the optimized - * resync record, then the master node will send a MDDB_PARSE_OPTRECS - * message to the slave nodes causing the slave nodes to reget the - * directory entry containing the location of the optimized resync record. - * After the record is reread from disk, then writeoptrecord is called - * if the location of the optimized resync record or flags have changed. - * When writeoptrecord is called, the node that is the owner of this record - * will write the optimized record to the location specified in the directory - * entry. Since the master node uses the highest class message (PARSE) - * the record owner node is guaranteed to already have an updated - * directory entry incore. - * - * The other difference between the traditional/local set and MN diskset - * is that the directory entry can be written to disk before the optimized - * record in a MN diskset if the record is owned by a slave node. So, - * the users of an optimized record must handle the failure case when no - * data is available from an optimized record since the master node could - * have failed during the relocation of the optimized record to another mddb. - */ -static int -fixoptrecord( - mddb_set_t *s, - mddb_de_ic_t *dep, - mddb_db_t *dbp -) -{ - int changed; - int writedata; - int err = 0; - int i; - mddb_lb_t *lbp; - mddb_optinfo_t *op; - mddb_db32_t *db32p; - int rec_owner; /* Is node owner of record? */ - -#if defined(_ILP32) && !defined(lint) - ASSERT(sizeof (mddb_db_t) == sizeof (mddb_db32_t)); -#endif - - lbp = s->s_lbp; - changed = 0; - writedata = 0; - for (i = 0; i < 2; i++) { - op = &dep->de_optinfo[i]; - - if (! (lbp->lb_locators[op->o_li].l_flags & MDDB_F_ACTIVE)) - op->o_flags = 0; - - /* - * If optimized record has seen a replica failure, - * assign new replica to record and re-write data - * to new record. - */ - if (! (op->o_flags & MDDB_F_ACTIVE)) { - getoptdev(s, dep, i); - writedata++; - changed++; - /* Set flag for slaves to reread dep and write rec */ - if (lbp->lb_flags & MDDB_MNSET) { - s->s_mn_parseflags |= MDDB_PARSE_OPTRECS; - } - } - - /* - * If just an error in the data was seen, set - * the optimized record's replica flag to active (ok) - * and try again. - */ - if (op->o_flags & MDDB_F_EDATA) { - dep->de_optinfo[0].o_flags = MDDB_F_ACTIVE; - writedata++; - } - } - - rec_owner = 0; - if (lbp->lb_flags & MDDB_MNSET) { - /* - * If a MN diskset then check the owner of optimized record. - * If the master node owns the record or if there is - * no owner of the record, then the master can write the - * optimized record to disk. - * Master node can write the optimized record now, but - * slave nodes write their records during handling of - * the MDDB_PARSE_OPTRECS message. - */ - if ((dep->de_owner_nodeid == MD_MN_INVALID_NID) || - (dep->de_owner_nodeid == md_set[s->s_setno].s_nodeid)) { - rec_owner = 1; - } - } else { - /* - * In traditional diskset and local set, this node - * is always the record owner and always the master. - */ - rec_owner = 1; - } - - /* - * If this node is the record owner, write out record. - */ - if ((writedata) && (rec_owner)) { - if (err = writeoptrecord(s, dep)) { - return (err); - } - } - if (! changed) - return (0); - uniqtime32(&dbp->db_timestamp); - dbp->db_revision = MDDB_REV_DB; - db32p = (mddb_db32_t *)kmem_zalloc(MDDB_BSIZE, KM_SLEEP); - create_db32rec(db32p, dbp); - crcgen(db32p, &db32p->db32_checksum, MDDB_BSIZE, NULL); - err = writeall(s, (caddr_t)db32p, db32p->db32_blknum, - 1, MDDB_WR_ONLY_MASTER); - kmem_free((caddr_t)db32p, MDDB_BSIZE); - return (err); -} - -static int -fixoptrecords( - mddb_set_t *s -) -{ - mddb_de_ic_t *dep; - mddb_db_t *dbp; - int err = 0; - set_t setno; - - /* - * In a MN diskset, the master node is the only node that runs - * fixoptrecords. If the master node changes anything, then the - * master node sends PARSE message to the slave nodes. The slave - * nodes will then re-read in the locator block or re-read in the - * directory blocks and re-write the optimized resync records. - */ - setno = s->s_setno; - if ((setno != MD_LOCAL_SET) && (s->s_lbp->lb_flags & MDDB_MNSET) && - (md_set[setno].s_am_i_master == 0)) { - return (0); - } - - for (dbp = s->s_dbp; dbp != NULL; dbp = dbp->db_next) { - for (dep = dbp->db_firstentry; dep; dep = dep->de_next) { - if (! (dep->de_flags & MDDB_F_OPT)) - continue; - err = fixoptrecord(s, dep, dbp); - if (err != 0) - return (err); - } - } - return (0); -} - -/* - * Checks incore version of mddb data to mddb data ondisk. - * - * Returns: - * - 0 if the data was successfully read and is good. - * - MDDB_F_EREAD if a read error occurred. - * - 1 if the data read is bad (checksum failed, etc) - */ -static int -checkcopy -( - mddb_set_t *s, - int li -) -{ - mddb_db_t *dbp; - mddb_db32_t *cdb32p; - mddb_de_ic_t *dep; - mddb_de32_t *cde32p; - mddb_rb32_t *rbp, *crbp; - size_t size; - int i; - int retval = 1; - -#if defined(_ILP32) && !defined(lint) - ASSERT(sizeof (mddb_de_t) == sizeof (mddb_de32_t)); - ASSERT(sizeof (mddb_db_t) == sizeof (mddb_db32_t)); - ASSERT(sizeof (mddb_rb_t) == sizeof (mddb_rb32_t)); -#endif - - if (s->s_databuffer_size == 0) { - size_t maxrecsize = MDDB_BSIZE; - - for (dbp = s->s_dbp; dbp != NULL; dbp = dbp->db_next) - for (dep = dbp->db_firstentry; dep; dep = dep->de_next) - if (! (dep->de_flags & MDDB_F_OPT) && - dep->de_recsize > maxrecsize) - maxrecsize = dep->de_recsize; - - s->s_databuffer = (caddr_t)kmem_zalloc(maxrecsize, KM_SLEEP); - s->s_databuffer_size = maxrecsize; - } - - cdb32p = (mddb_db32_t *)s->s_databuffer; - - /* - * first go through and make sure all directory stuff - * is the same - */ - for (dbp = s->s_dbp; dbp != NULL; dbp = dbp->db_next) { - if (readblks(s, (caddr_t)cdb32p, dbp->db_blknum, 1, li)) { - retval = MDDB_F_EREAD; - goto err; - } - if (cdb32p->db32_magic != MDDB_MAGIC_DB) - goto err; - if (revchk(MDDB_REV_DB, cdb32p->db32_revision)) - goto err; - if (crcchk(cdb32p, &cdb32p->db32_checksum, MDDB_BSIZE, NULL)) - goto err; - if (cdb32p->db32_nextblk != dbp->db_nextblk) - goto err; - if (cdb32p->db32_recsum != dbp->db_recsum) - goto err; - if (cdb32p->db32_firstentry) { - cde32p = (mddb_de32_t *) - ((void *)((caddr_t)(&cdb32p->db32_firstentry) - + sizeof (cdb32p->db32_firstentry))); - } else - cde32p = NULL; - - dep = dbp->db_firstentry; - /* - * check if all directory entries are identical - */ - while (dep && cde32p) { - if (dep->de_recid != cde32p->de32_recid) - goto err; - if (dep->de_type1 != cde32p->de32_type1) - goto err; - if (dep->de_type2 != cde32p->de32_type2) - goto err; - if (dep->de_reqsize != cde32p->de32_reqsize) - goto err; - if (dep->de_flags != cde32p->de32_flags) - goto err; - - for (i = 0; i < 2; i++) { - if (dep->de_optinfo[i].o_li != - cde32p->de32_optinfo[i].o_li) - break; - } - if (i != 2) - goto err; - size = sizeof (mddb_block_t) * dep->de_blkcount; - if (bcmp((caddr_t)dep->de_blks, - (caddr_t)cde32p->de32_blks, size)) - goto err; - dep = dep->de_next; - if (cde32p->de32_next) - cde32p = nextentry(cde32p); - else - cde32p = NULL; - } - if (dep || cde32p) - goto err; - } - /* - * If here, all directories are functionally identical - * check to make sure all records are identical - * the reason the records are not just bcmped is that the - * lock flag does not want to be compared. - */ - crbp = (mddb_rb32_t *)cdb32p; - for (dbp = s->s_dbp; dbp != NULL; dbp = dbp->db_next) { - for (dep = dbp->db_firstentry; dep; dep = dep->de_next) { - if ((dep->de_flags & MDDB_F_OPT) || - (dep->de_flags & MDDB_F_CHANGELOG)) - continue; - rbp = (mddb_rb32_t *)dep->de_rb; - if (readblklst(s, (caddr_t)crbp, dep->de_blks, - dep->de_blkcount, li, 0)) { - retval = MDDB_F_EREAD; - goto err; - } - /* Check the crc for this record */ - if (rec_crcchk(s, dep, crbp)) - goto err; - - if (rbp->rb_checksum != crbp->rb_checksum || - rbp->rb_checksum_fiddle != crbp->rb_checksum_fiddle) - goto err; - } - } - return (0); -err: - return (retval); -} - -/* - * Determine if the location information for two mddbs is the same. - * The device slice and block offset should match. If both have devids then - * use that for the comparison, otherwise we compare the dev_ts. - * Comparing with the devid allows us to handle the case where a mddb was - * relocated to a dead mddbs dev_t. The live mddb will have the dev_t of - * the dead mddb but the devid comparison will catch this and not match. - * - * Return 1 if the location of the two mddbs match, 0 if not. - */ -static int -match_mddb(mddb_ri_t *rip, ddi_devid_t devid, char *minor, md_dev64_t dev, - daddr32_t blkno) -{ - if (rip->ri_flags & MDDB_F_EMASTER) { - /* - * If this element is errored then we don't try to match on it. - * If we try to match we could erroneously match on the dev_t - * of a relocated disk. - */ - return (0); - } - - if (rip->ri_devid && devid && minor) { - /* - * If old devid exists, then this is a replicated diskset - * and both old and new devids must be checked. - */ - if (rip->ri_old_devid) { - if (((ddi_devid_compare(rip->ri_devid, devid) != 0) && - (ddi_devid_compare(rip->ri_old_devid, - devid) != 0)) || - (strcmp(rip->ri_minor_name, minor) != 0)) - return (0); - } else { - if (ddi_devid_compare(rip->ri_devid, devid) != 0 || - strcmp(rip->ri_minor_name, minor) != 0) - return (0); - } - } else { - if (rip->ri_dev != dev) - return (0); - } - - if (rip->ri_blkno != blkno) - return (0); - - return (1); -} - -static int -ridev( - mddb_ri_t **rip, - mddb_cfg_loc_t *clp, - dev32_t *dev_2b_fixed, - int flag) -{ - mddb_ri_t *r, *r1; - md_dev64_t ldev, ndev; - major_t majordev; - int sz; - - if (MD_UPGRADE) { - ldev = md_makedevice(md_targ_name_to_major(clp->l_driver), - clp->l_mnum); - } else { - if (ddi_name_to_major(clp->l_driver) == (major_t)-1) - return (EINVAL); - - ldev = md_makedevice(ddi_name_to_major(clp->l_driver), - clp->l_mnum); - } - - if (clp->l_devid != 0) { - /* - * Get dev associated with device id and minor name. - * Setup correct driver name if dev is now different. - * Don't change driver name if during upgrade. - */ - ndev = ldev; - if (!mddb_devid_validate((ddi_devid_t)(uintptr_t)clp->l_devid, - &ndev, clp->l_minor_name)) { - if ((ndev != ldev) && (!(MD_UPGRADE))) { - majordev = md_getmajor(ndev); - (void) strcpy(clp->l_driver, - ddi_major_to_name(majordev)); - clp->l_mnum = md_getminor(ndev); - clp->l_devid_flags |= MDDB_DEVID_VALID; - ldev = ndev; - } - } else { - /* Mark as invalid */ - clp->l_devid_flags &= ~MDDB_DEVID_VALID; - } - } - - clp->l_dev = md_cmpldev(ldev); - if (dev_2b_fixed) - *dev_2b_fixed = clp->l_dev; - r = *rip; - - while (r) { - if (match_mddb(r, (ddi_devid_t)(uintptr_t)clp->l_devid, - clp->l_minor_name, ldev, clp->l_blkno)) { - if ((clp->l_devid != 0) && - !(clp->l_devid_flags & MDDB_DEVID_VALID)) { - r->ri_flags |= MDDB_F_EMASTER; - } else { - r->ri_flags |= flag; - } - return (0); /* already entered return success */ - } - r = r->ri_next; - } - - /* - * This replica not represented in the current rip list, - * so add it to the list. - */ - r = (mddb_ri_t *)kmem_zalloc(sizeof (**rip), KM_SLEEP); - r->ri_dev = ldev; - r->ri_blkno = clp->l_blkno; - (void) strncpy(r->ri_driver, clp->l_driver, MD_MAXDRVNM); - if (strlen(clp->l_driver) >= MD_MAXDRVNM) { - r->ri_driver[(MD_MAXDRVNM -1)] = '\0'; - } - if (clp->l_devname != NULL) { - (void) strcpy(r->ri_devname, clp->l_devname); - } - r->ri_flags |= flag; - if (clp->l_devid != 0) { - sz = clp->l_devid_sz; - r->ri_devid = (ddi_devid_t)kmem_zalloc(sz, KM_SLEEP); - bcopy((void *)(uintptr_t)clp->l_devid, (char *)r->ri_devid, sz); - - if (clp->l_old_devid != NULL) { - sz = clp->l_old_devid_sz; - r->ri_old_devid = (ddi_devid_t)kmem_zalloc(sz, - KM_SLEEP); - bcopy((char *)(uintptr_t)clp->l_old_devid, - (char *)r->ri_old_devid, sz); - } else { - r->ri_old_devid = 0; - } - if (strlen(clp->l_minor_name) < MDDB_MINOR_NAME_MAX) - (void) strcpy(r->ri_minor_name, clp->l_minor_name); - - if (!(clp->l_devid_flags & MDDB_DEVID_VALID)) { - /* - * Devid is present, but not valid. This could - * happen if device has been powered off or if - * the device has been removed. Mark the device in - * error. Don't allow any writes to this device - * based on the dev_t since another device could - * have been placed in its spot and be responding to - * the dev_t accesses. - */ - r->ri_flags |= MDDB_F_EMASTER; - } - } else { - r->ri_devid = 0; - r->ri_old_devid = 0; - } - - /* - * If the rip list is empty then this entry - * is the list. - */ - if (*rip == NULL) { - *rip = r; - return (0); - } - - /* - * Add this entry to the end of the rip list - */ - r1 = *rip; - while (r1->ri_next) - r1 = r1->ri_next; - r1->ri_next = r; - return (0); -} - -/* - * writecopy writes the incore data blocks out to all of the replicas. - * This is called from writestart - * - when a diskset is started or - * - when an error has been enountered during the write to a mddb. - * and from newdev when a new mddb is being added. - * - * flag can be 2 values: - * MDDB_WRITECOPY_ALL - write all records to all mddbs. This is - * always used for traditional and local disksets. - * For MN diskset: - * All nodes can call writecopy, but only the - * master node actually writes data to the disk - * except for optimized resync records. - * An optimized resync record can only be written to - * by the record owner. - * MDDB_WRITECOPY_SYNC - special case for MN diskset. When a new - * master has been chosen, the new master may need to - * write its incore mddb to disk (this is the case where the - * old master had executed a message but hadn't relayed it - * to this slave yet). New master should not write the - * change log records since new master would be overwriting - * valuable data. Only used during a reconfig cycle. - */ -static int -writecopy( - mddb_set_t *s, - int li, - int flag -) -{ - mddb_db_t *dbp; - mddb_db32_t *db32p; - mddb_de_ic_t *dep; - mddb_rb32_t *rbp; - uint_t checksum; - int err = 0; - -#if defined(_ILP32) && !defined(lint) - ASSERT(sizeof (mddb_rb_t) == sizeof (mddb_rb32_t)); - ASSERT(sizeof (mddb_db_t) == sizeof (mddb_db32_t)); -#endif - - for (dbp = s->s_dbp; dbp != NULL; dbp = dbp->db_next) { - db32p = (mddb_db32_t *)kmem_zalloc(MDDB_BSIZE, KM_SLEEP); - create_db32rec(db32p, dbp); - crcgen(db32p, &db32p->db32_checksum, MDDB_BSIZE, NULL); - err = writeblks(s, (caddr_t)db32p, dbp->db_blknum, 1, li, - MDDB_WR_ONLY_MASTER); - kmem_free((caddr_t)db32p, MDDB_BSIZE); - if (err) - return (err); - for (dep = dbp->db_firstentry; dep; dep = dep->de_next) { - /* - * In a multinode diskset, when a new master is - * chosen the new master may need to write its - * incore copy of the mddb to disk. In this case, - * don't want to overwrite the change log records - * so new master sets flag to MDDB_WRITECOPY_SYNC. - */ - if (flag == MDDB_WRITECOPY_SYNC) { - if (dep->de_flags & MDDB_F_CHANGELOG) - continue; - } - /* - * In a multinode diskset, don't write out optimized - * resync resyncs since only the mirror owner node - * will have the correct data. If writecopy is - * being called from writestart as a result of - * an mddb failure, then writestart will handle - * the optimized records when it calls fixoptrecords. - */ - if ((MD_MNSET_SETNO(s->s_setno)) && - (dep->de_flags & MDDB_F_OPT)) { - continue; - } - - rbp = dep->de_rb; - checksum = rbp->rb_checksum_fiddle; - checksum ^= rbp->rb_checksum; - /* Generate the crc for this record */ - rec_crcgen(s, dep, rbp); - checksum ^= rbp->rb_checksum; - rbp->rb_checksum_fiddle = checksum; - if (err = wrtblklst(s, (caddr_t)rbp, dep->de_blks, - dep->de_blkcount, li, (mddb_bf_t **)0, - MDDB_WR_ONLY_MASTER)) - return (err); - } - } - return (0); -} - -static int -upd_med( - mddb_set_t *s, - char *tag -) -{ - med_data_t meddb; - int medok; - mddb_lb_t *lbp = s->s_lbp; - set_t setno = s->s_setno; - int li; - int alc; - int lc; - - - /* If no mediator hosts, nothing to do */ - if (s->s_med.n_cnt == 0) - return (0); - - /* - * If this is a MN set and we are not the master, then don't - * update mediator hosts or mark mediator as golden since - * only master node should do that. - */ - if ((setno != MD_LOCAL_SET) && (s->s_lbp->lb_flags & MDDB_MNSET) && - (md_set[setno].s_am_i_master == 0)) { - return (0); - } - - bzero((char *)&meddb, sizeof (med_data_t)); - meddb.med_dat_mag = MED_DATA_MAGIC; - meddb.med_dat_rev = MED_DATA_REV; - meddb.med_dat_fl = 0; - meddb.med_dat_sn = setno; - meddb.med_dat_cc = lbp->lb_commitcnt; - TIMEVAL32_TO_TIMEVAL(&meddb.med_dat_id, &lbp->lb_ident.createtime); - crcgen(&meddb, &meddb.med_dat_cks, sizeof (med_data_t), NULL); - - /* count accessible mediators */ - medok = upd_med_hosts(&s->s_med, s->s_setname, &meddb, tag); - - /* count accessible and existing replicas */ - for (li = 0, alc = 0, lc = 0; li < lbp->lb_loccnt; li++) { - mddb_locator_t *lp = &lbp->lb_locators[li]; - - if (lp->l_flags & MDDB_F_DELETED) - continue; - - lc++; - - if (! (lp->l_flags & MDDB_F_ACTIVE) || - (lp->l_flags & MDDB_F_EMASTER) || - (lp->l_flags & MDDB_F_EWRITE)) - continue; - - alc++; - } - - /* - * Mediator update quorum is >= 50%: check for less than - * "mediator update" quorum. - */ - if ((medok * 2) < s->s_med.n_cnt) { - /* panic if <= 50% of all replicas are accessible */ - if ((lc > 0) && ((alc * 2) <= lc)) { - cmn_err(CE_PANIC, - "md: Update of 50%% of the mediator hosts failed"); - /* NOTREACHED */ - } - - cmn_err(CE_WARN, - "md: Update of 50%% of the mediator hosts failed"); - } - - /* - * If we have mediator update quorum and exactly 50% of the replicas - * are accessible then mark the mediator as golden. - */ - if (((medok * 2) >= (s->s_med.n_cnt + 1)) && (lc > 0) && - ((alc * 2) == lc)) { - meddb.med_dat_fl = MED_DFL_GOLDEN; - crcgen(&meddb, &meddb.med_dat_cks, sizeof (med_data_t), NULL); - (void) upd_med_hosts(&s->s_med, s->s_setname, &meddb, tag); - } - - return (0); -} - -static int -push_lb(mddb_set_t *s) -{ - mddb_lb_t *lbp = s->s_lbp; - - /* push the change to all the replicas */ - uniqtime32(&lbp->lb_timestamp); - if (MD_MNSET_SETNO(s->s_setno)) { - lbp->lb_revision = MDDB_REV_MNLB; - } else { - lbp->lb_revision = MDDB_REV_LB; - } - /* - * The updates to the mediator hosts are done - * by the callers of this function. - */ - return (writelocall(s)); -} - -/* Should not call for MN diskset since data tags are not supported */ -static int -dtl_cmp(const mddb_dtag_t *odtp, const mddb_dtag_t *ndtp) -{ - int diff = 0; - - diff = (int)(odtp->dt_setno - ndtp->dt_setno); - if (diff) - return (diff); - - diff = strncmp(odtp->dt_sn, ndtp->dt_sn, MDDB_SN_LEN); - if (diff) - return (diff); - - diff = strncmp(odtp->dt_hn, ndtp->dt_hn, MD_MAX_NODENAME_PLUS_1); - if (diff) - return (diff); - - /*CSTYLED*/ - return (timercmp(&odtp->dt_tv, &ndtp->dt_tv, !=)); -} - -/* Should not call for MN diskset since data tags are not supported */ -static int -dtl_addl(mddb_set_t *s, const mddb_dtag_t *ndtp) -{ - int nextid = 0; - mddb_dtag_lst_t **dtlpp = &s->s_dtlp; - - /* Run to the end of the list */ - for (/* void */; (*dtlpp != NULL); dtlpp = &(*dtlpp)->dtl_nx) { - if (dtl_cmp(&(*dtlpp)->dtl_dt, ndtp) == 0) - return (0); - nextid++; - } - - /* Add the new member */ - *dtlpp = kmem_zalloc(sizeof (**dtlpp), KM_SLEEP); - - /* Update the dtag portion of the list */ - bcopy((caddr_t)ndtp, (caddr_t)&((*dtlpp)->dtl_dt), - sizeof (mddb_dtag_t)); - - /* Fix up the id value */ - (*dtlpp)->dtl_dt.dt_id = ++nextid; - - return (0); -} - -/* - * Even though data tags are not supported in MN disksets, dt_cntl may - * be called for a MN diskset since this routine is called even before - * it is known the kind of diskset being read in from disk. - * For a MNdiskset, s_dtlp is 0 so a count of 0 is returned. - */ -static int -dtl_cntl(mddb_set_t *s) -{ - mddb_dtag_lst_t *dtlp = s->s_dtlp; - int ndt = 0; - - while (dtlp != NULL) { - ndt++; - dtlp = dtlp->dtl_nx; - } - - return (ndt); -} - -/* - * Even though data tags are not supported in MN disksets, dt_cntl may - * be called for a MN diskset since this routine is called even before - * it is known the kind of diskset being read in from disk. - * For a MNdiskset, s_dtlp is 0 so a 0 is returned. - */ -static mddb_dtag_t * -dtl_findl(mddb_set_t *s, int id) -{ - mddb_dtag_lst_t *dtlp = s->s_dtlp; - - while (dtlp != NULL) { - if (dtlp->dtl_dt.dt_id == id) - return (&dtlp->dtl_dt); - dtlp = dtlp->dtl_nx; - } - return ((mddb_dtag_t *)NULL); -} - -/* Should not call for MN diskset since data tags are not supported */ -static void -dtl_freel(mddb_dtag_lst_t **dtlpp) -{ - mddb_dtag_lst_t *dtlp; - mddb_dtag_lst_t *tdtlp; - - - for (tdtlp = *dtlpp; tdtlp != NULL; tdtlp = dtlp) { - dtlp = tdtlp->dtl_nx; - kmem_free(tdtlp, sizeof (mddb_dtag_lst_t)); - } - *dtlpp = (mddb_dtag_lst_t *)NULL; -} - -/* - * Even though data tags are not supported in MN disksets, dt_setup will - * be called for a MN diskset since this routine is called even before - * it is known the kind of diskset being read in from disk. - * Once this set is known as a MN diskset, the dtp area will be freed. - */ -static void -dt_setup(mddb_set_t *s, const mddb_dtag_t *dtagp) -{ - mddb_dt_t *dtp; - set_t setno = s->s_setno; - - - if (md_set[setno].s_dtp == (mddb_dt_t *)NULL) - md_set[setno].s_dtp = kmem_zalloc(MDDB_DT_BYTES, KM_SLEEP); - else if (dtagp == (mddb_dtag_t *)NULL) - bzero((caddr_t)md_set[setno].s_dtp, MDDB_DT_BYTES); - - /* shorthand */ - dtp = (mddb_dt_t *)md_set[setno].s_dtp; - - dtp->dt_mag = MDDB_MAGIC_DT; - dtp->dt_rev = MDDB_REV_DT; - - if (dtagp != NULL) - dtp->dt_dtag = *dtagp; /* structure assignment */ - - /* Initialize the setno */ - dtp->dt_dtag.dt_setno = setno; - - /* Clear the id and flags, this is only used in user land */ - dtp->dt_dtag.dt_id = 0; - - /* Checksum it */ - crcgen(dtp, &dtp->dt_cks, MDDB_DT_BYTES, NULL); -} - -/* Should not call for MN diskset since data tags are not supported */ -static int -set_dtag(mddb_set_t *s, md_error_t *ep) -{ - mddb_lb_t *lbp = s->s_lbp; - mddb_dtag_t tag; - - if (lbp->lb_dtblkcnt == 0) { - /* Data tags not used in a MN set - so no failure returned */ - if (lbp->lb_flags & MDDB_MNSET) - return (0); - - cmn_err(CE_WARN, - "No tag record allocated, unable to tag data"); - (void) mdmddberror(ep, MDE_DB_NOTAGREC, NODEV32, s->s_setno); - return (1); - } - - /* Clear the stack variable */ - bzero((caddr_t)&tag, sizeof (mddb_dtag_t)); - - /* Get the HW serial number for this host */ - (void) snprintf(tag.dt_sn, MDDB_SN_LEN, "%u", zone_get_hostid(NULL)); - tag.dt_sn[MDDB_SN_LEN - 1] = '\0'; - - /* Get the nodename that this host goes by */ - (void) strncpy(tag.dt_hn, utsname.nodename, MD_MAX_NODENAME); - tag.dt_hn[MD_MAX_NODENAME] = '\0'; - - /* Get a time stamp for NOW */ - uniqtime32(&tag.dt_tv); - - /* Setup the data tag record */ - dt_setup(s, &tag); - - /* Free any list of tags if they exist */ - dtl_freel(&s->s_dtlp); - - /* Put the new tag onto the tag list */ - (void) dtl_addl(s, &tag); - - return (0); -} - -/* - * If called during upgrade, this routine expects a non-translated - * (aka target) dev. - * Should not call for MN diskset since data tags are not supported. - */ -static int -dt_read(mddb_set_t *s, mddb_lb_t *lbp, mddb_ri_t *rip) -{ - int err = 0; - md_dev64_t dev; - caddr_t tbuf; - daddr_t physblk; - mddb_block_t blk; - mddb_dt_t *dtp; - mddb_dtag_t *dtagp; - set_t setno = s->s_setno; - - /* If have not allocated a data tag record, there is nothing to do */ - if (lbp->lb_dtblkcnt == 0) - return (1); - - dtp = rip->ri_dtp = (mddb_dt_t *)kmem_zalloc(MDDB_DT_BYTES, KM_SLEEP); - - if (dtp == (mddb_dt_t *)NULL) - return (1); - - /* shorthand */ - dev = md_xlate_targ_2_mini(rip->ri_dev); - if (dev == NODEV64) { - return (1); - } - - tbuf = (caddr_t)rip->ri_dtp; - - for (blk = 0; blk < lbp->lb_dtblkcnt; blk++) { - physblk = getphysblk((blk + lbp->lb_dtfirstblk), rip->ri_mbip); - err = getblks(s, tbuf, dev, physblk, btodb(MDDB_BSIZE), 0); - /* error reading the tag */ - if (err) { - err = 1; - goto out; - } - tbuf += MDDB_BSIZE; - } - - /* magic is valid? */ - if (dtp->dt_mag != MDDB_MAGIC_DT) { - err = 1; - goto out; - } - - /* revision is valid? */ - if (revchk(MDDB_REV_DT, dtp->dt_rev)) { - err = 1; - goto out; - } - - /* crc is valid? */ - if (crcchk(dtp, &dtp->dt_cks, MDDB_DT_BYTES, NULL)) { - err = 1; - goto out; - } - - /* shorthand */ - dtagp = &dtp->dt_dtag; - - /* set number match? */ - if (dtagp->dt_setno != setno) { - err = 1; - goto out; - } - - /* tag is not empty? */ - if (dtagp->dt_sn[0] == '\0' && dtagp->dt_hn[0] == '\0' && - (dtagp->dt_tv.tv_sec == 0 && dtagp->dt_tv.tv_usec == 0) && - dtagp->dt_id == 0) { - err = 2; - goto out; - } - - /* Mark the locator as having tagged data */ - rip->ri_flags |= MDDB_F_TAGDATA; - -out: - if (err) { - if (err == 1) { - md_set_setstatus(setno, MD_SET_BADTAG); - rip->ri_flags |= MDDB_F_BADTAG; - } - if (dtp != NULL) { - kmem_free(dtp, MDDB_DT_BYTES); - rip->ri_dtp = (mddb_dt_t *)NULL; - } - } - - return (err); -} - -/* Should not call for MN diskset since data tags are not supported */ -static int -dt_write(mddb_set_t *s) -{ - int li; - int err = 0; - int werr; - int empty_tag = 0; - mddb_dtag_t *dtagp; - mddb_dt_t *dtp; - mddb_lb_t *lbp = s->s_lbp; - set_t setno = s->s_setno; - uint_t set_status = md_get_setstatus(setno); - - - ASSERT(md_set[setno].s_dtp != NULL); - - /* Nowhere to write to */ - if (lbp->lb_dtblkcnt == 0) - return (err); - - if (set_status & MD_SET_BADTAG) - return (err); - - /* shorthand */ - dtp = (mddb_dt_t *)md_set[setno].s_dtp; - dtagp = &dtp->dt_dtag; - - /* See if the tag is empty. */ - if (dtagp->dt_sn[0] == '\0' && dtagp->dt_hn[0] == '\0' && - (dtagp->dt_tv.tv_sec == 0 && dtagp->dt_tv.tv_usec == 0) && - dtagp->dt_id == 0) - empty_tag = 1; - - /* Write the tag to the locators and reset appropriate flags. */ - for (li = 0; li < lbp->lb_loccnt; li++) { - mddb_locator_t *lp = &lbp->lb_locators[li]; - - if ((! (lp->l_flags & MDDB_F_ACTIVE)) || - (lp->l_flags & MDDB_F_DELETED) || - (lp->l_flags & MDDB_F_EWRITE)) - continue; - - werr = writeblks(s, (caddr_t)dtp, lbp->lb_dtfirstblk, - MDDB_DT_BLOCKS, li, MDDB_WR_ONLY_MASTER); - - if (werr) { - err |= werr; - continue; - } - - if (empty_tag) - lp->l_flags &= ~(MDDB_F_BADTAG | MDDB_F_TAGDATA); - else { - lp->l_flags |= MDDB_F_TAGDATA; - lp->l_flags &= ~MDDB_F_BADTAG; - } - } - - if (err) - return (err); - - - /* If the tags were written, check to see if any tags remain. */ - for (li = 0; li < lbp->lb_loccnt; li++) { - mddb_locator_t *lp = &lbp->lb_locators[li]; - - if ((! (lp->l_flags & MDDB_F_ACTIVE)) || - (lp->l_flags & MDDB_F_DELETED) || - (lp->l_flags & MDDB_F_EWRITE)) - continue; - - if (lp->l_flags & MDDB_F_TAGDATA) - break; - } - - /* If there are no tags, then clear CLRTAG and TAGDATA */ - if (li == lbp->lb_loccnt) { - md_clr_setstatus(setno, MD_SET_CLRTAG); - md_clr_setstatus(setno, MD_SET_TAGDATA); - } - - return (err); -} - -/* Should not call for MN diskset since data tags are not supported */ -static int -dt_alloc_if_needed(mddb_set_t *s) -{ - int i; - int li; - int moveit = 0; - mddb_lb_t *lbp = s->s_lbp; - mddb_block_t blkcnt = lbp->lb_dtblkcnt; - set_t setno = s->s_setno; - uint_t set_status = md_get_setstatus(setno); - - /* - * If the data tag record is allocated (blkcnt != 0) and a bad tag was - * not detected, there is nothing to do. - */ - if (blkcnt != 0 && ! (set_status & MD_SET_BADTAG)) - return (0); - - /* Bitmap not setup, checks can't be done */ - if (s->s_totalblkcnt == 0) - return (0); - - /* While reading the tag(s) an invalid tag data record was seen */ - if (set_status & MD_SET_BADTAG) - /* See if the invalid tag needs to be moved */ - for (i = 0; i < MDDB_DT_BLOCKS; i++) - if (blkcheck(s, (i + lbp->lb_dtfirstblk))) { - moveit = 1; - break; - } - - /* Need to move or allocate the tag data record */ - if (moveit || blkcnt == 0) { - lbp->lb_dtfirstblk = getfreeblks(s, MDDB_DT_BLOCKS); - if (lbp->lb_dtfirstblk == 0) { - cmn_err(CE_WARN, - "Unable to allocate data tag record"); - return (0); - } - lbp->lb_dtblkcnt = MDDB_DT_BLOCKS; - - /* Mark the locators so that they get written to disk. */ - for (li = 0; li < lbp->lb_loccnt; li++) { - mddb_locator_t *lp = &lbp->lb_locators[li]; - - if ((! (lp->l_flags & MDDB_F_ACTIVE)) || - (lp->l_flags & MDDB_F_DELETED) || - (lp->l_flags & MDDB_F_EWRITE)) - continue; - - lp->l_flags |= MDDB_F_BADTAG; - } - return (1); - } - - /* - * Make sure the blocks are owned, since the calculation in - * computefreeblks() is bypassed when MD_SET_BADTAG is set. - */ - for (i = 0; i < MDDB_DT_BLOCKS; i++) - blkbusy(s, (i + lbp->lb_dtfirstblk)); - - return (1); -} - -/* - * Writestart writes the incore mddb out to all of the replicas. - * This is called when a diskset is started and when an error has - * been enountered during the write to a mddb. - * - * flag can be 2 values: - * MDDB_WRITECOPY_ALL - write all records to all mddbs. This is - * always used for traditional and local disksets. - * This is the normal path for MN disksets since the slave - * nodes aren't actually allowed to write to disk. - * MDDB_WRITECOPY_SYNC - special case for MN diskset. When a new - * master has been chosen, the new master may need to - * write its incore mddb to disk (this is the case where the - * old master had executed a message but hadn't relayed it - * to this slave yet). New master should not write the - * change log records since new master would be overwriting - * valuable data. Only used during a reconfig cycle. - */ -static int -writestart( - mddb_set_t *s, - int flag -) -{ - int li; - mddb_locator_t *lp; - mddb_lb_t *lbp; - mddb_ln_t *lnp; - int err = 0; - uint_t set_status; - - lbp = s->s_lbp; - - for (li = 0; li < lbp->lb_loccnt; li++) { - lp = &lbp->lb_locators[li]; - if (! (lp->l_flags & MDDB_F_ACTIVE)) - continue; - if (! (lp->l_flags & MDDB_F_SUSPECT)) - continue; - if (writecopy(s, li, flag)) - return (1); - lp->l_flags |= MDDB_F_UP2DATE; - } - - for (li = 0; li < lbp->lb_loccnt; li++) { - lp = &lbp->lb_locators[li]; - if (! (lp->l_flags & MDDB_F_ACTIVE)) - continue; - if ((lp->l_flags & MDDB_F_UP2DATE)) - continue; - if (checkcopy(s, li)) - if (err = writecopy(s, li, flag)) - return (1); - lp->l_flags |= MDDB_F_UP2DATE; - } - - /* - * Call fixoptrecord even during a reconfig cycle since a replica - * failure may force the master to re-assign the optimized - * resync record to another replica. - */ - if (fixoptrecords(s)) - return (1); - - set_status = md_get_setstatus(s->s_setno); - - /* See if any (ACTIVE and not OLDACT) or (not ACTIVE and OLDACT) */ - for (li = 0; li < lbp->lb_loccnt; li++) { - lp = &lbp->lb_locators[li]; - - if (lp->l_flags & MDDB_F_DELETED) - continue; - - if (((lp->l_flags & MDDB_F_ACTIVE) != 0 && - (lp->l_flags & MDDB_F_OLDACT) == 0) || - ((lp->l_flags & MDDB_F_ACTIVE) == 0 && - (lp->l_flags & MDDB_F_OLDACT) != 0)) - break; - - if ((set_status & MD_SET_TAGDATA) || - (set_status & MD_SET_CLRTAG)) - if ((lp->l_flags & MDDB_F_TAGDATA) || - (lp->l_flags & MDDB_F_BADTAG)) - break; - } - - /* - * If we found (ACTIVE and not OLDACT) or (not ACTIVE and OLDACT) - * the lbp identifier and the set identifier doesn't match. - */ - if (li != lbp->lb_loccnt || cmpidentifier(s, &lbp->lb_ident)) { - - /* Only call for traditional and local sets */ - if (!(lbp->lb_flags & MDDB_MNSET)) - (void) dt_write(s); - - setidentifier(s, &lbp->lb_ident); - - if (err = push_lb(s)) { - (void) upd_med(s, "writestart(0)"); - return (err); - } - - (void) upd_med(s, "writestart(0)"); - - if (err = push_lb(s)) { - (void) upd_med(s, "writestart(1)"); - return (err); - } - - (void) upd_med(s, "writestart(1)"); - - lnp = s->s_lnp; - uniqtime32(&lnp->ln_timestamp); - if (lbp->lb_flags & MDDB_MNSET) - lnp->ln_revision = MDDB_REV_MNLN; - else - lnp->ln_revision = MDDB_REV_LN; - crcgen(lnp, &lnp->ln_checksum, dbtob(lbp->lb_lnblkcnt), NULL); - err = writeall(s, (caddr_t)lnp, lbp->lb_lnfirstblk, - lbp->lb_lnblkcnt, 0); - /* - * If a MN diskset and this is the master, set the PARSE_LOCNM - * flag in the mddb_set structure to show that the locator - * names have changed. - * Don't set parseflags as a result of a new master sync - * during reconfig cycle since slaves nodes are already - * in-sync with the new master. - */ - - if ((lbp->lb_flags & MDDB_MNSET) && - (md_set[s->s_setno].s_am_i_master) && - (flag != MDDB_WRITECOPY_SYNC)) { - s->s_mn_parseflags |= MDDB_PARSE_LOCNM; - } - - if (err) - return (err); - } - - for (li = 0; li < lbp->lb_loccnt; li++) { - lp = &lbp->lb_locators[li]; - if (lp->l_flags & MDDB_F_DELETED) - continue; - if (lp->l_flags & MDDB_F_ACTIVE) { - lp->l_flags |= MDDB_F_OLDACT; - } else { - lp->l_flags &= ~MDDB_F_OLDACT; - } - } - - md_clr_setstatus(s->s_setno, MD_SET_STALE); - - return (0); -} - -/* - * selectreplicas selects the working replicas and may write the incore - * version of the mddb out to the replicas ondisk. - * - * flag can be 3 values: - * MDDB_RETRYSCAN - quick scan to see if there is an error. - * If no new error, returns without writing mddb - * to disks. If a new error is seen, writes out - * mddb to disks. - * MDDB_SCANALL - lengthy scan to check out mddbs and always writes - * out mddb to the replica ondisk. Calls writecopy - * with MDDB_WRITECOPY_ALL flag which writes out - * all records to the replicas ondisk. - * MDDB_SCANALLSYNC - called during reconfig cycle to sync up incore - * and ondisk mddbs by writing incore values to disk. - * Calls writecopy with MDDB_WRITECOPY_SYNC flag so - * that change log records are not written out. - * Only used by MN disksets. - * - * Returns: - * 0 - Successful - * 1 - Unable to write incore mddb data to disk since < 50% replicas. - */ -int -selectreplicas( - mddb_set_t *s, - int flag -) -{ - int li; - int alc; - int lc; - mddb_locator_t *lp; - mddb_lb_t *lbp = s->s_lbp; - set_t setno = s->s_setno; - int wc_flag; - - /* - * can never transition from stale to not stale - */ - if (md_get_setstatus(setno) & MD_SET_STALE) { - for (li = 0; li < lbp->lb_loccnt; li++) { - lp = &lbp->lb_locators[li]; - if (lp->l_flags & MDDB_F_DELETED) - continue; - if (! (lp->l_flags & MDDB_F_EMASTER)) { - lp->l_flags |= MDDB_F_ACTIVE; - } else { - lp->l_flags &= ~MDDB_F_ACTIVE; - } - } - return (1); - } - - if ((flag == MDDB_SCANALL) || (flag == MDDB_SCANALLSYNC)) { - for (li = 0; li < lbp->lb_loccnt; li++) { - lp = &lbp->lb_locators[li]; - if (lp->l_flags & MDDB_F_DELETED) - continue; - if (lp->l_flags & MDDB_F_ACTIVE) { - lp->l_flags |= MDDB_F_OLDACT; - lp->l_flags &= ~MDDB_F_SUSPECT; - } else { - lp->l_flags |= MDDB_F_SUSPECT; - lp->l_flags &= ~MDDB_F_OLDACT; - } - - if (! (lp->l_flags & MDDB_F_EMASTER)) { - lp->l_flags |= MDDB_F_ACTIVE; - lp->l_flags &= ~MDDB_F_EWRITE; - lp->l_flags &= ~MDDB_F_TOOSMALL; - } else { - lp->l_flags &= ~MDDB_F_ACTIVE; - } - } - computefreeblks(s); /* set up free block bits */ - } else { - for (li = 0; li < lbp->lb_loccnt; li++) { - lp = &lbp->lb_locators[li]; - if (! (lp->l_flags & MDDB_F_ACTIVE)) - continue; - if (lp->l_flags & MDDB_F_EWRITE) - break; - } - - /* - * if there are no errors this is error has already - * been processed return current state - */ - if (li == lbp->lb_loccnt) - return (md_get_setstatus(setno) & MD_SET_TOOFEW); - - lp->l_flags &= ~MDDB_F_ACTIVE; - do { - lp = &lbp->lb_locators[li]; - lp->l_flags &= ~MDDB_F_UP2DATE; - } while (++li < lbp->lb_loccnt); - } - - alc = 0; - lc = 0; - for (li = 0; li < lbp->lb_loccnt; li++) { - lp = &lbp->lb_locators[li]; - if (lp->l_flags & MDDB_F_DELETED) - continue; - lc++; - if (! (lp->l_flags & MDDB_F_ACTIVE)) - continue; - alc++; - } - - if (alc < ((lc + 1) / 2)) { - md_set_setstatus(setno, MD_SET_TOOFEW); - return (1); - } - - /* Set wc_flag based on flag passed in. */ - if (flag == MDDB_SCANALLSYNC) - wc_flag = MDDB_WRITECOPY_SYNC; - else - wc_flag = MDDB_WRITECOPY_ALL; - - do { - if (! writestart(s, wc_flag)) { - md_clr_setstatus(setno, MD_SET_TOOFEW); - return (0); - } - alc = 0; - for (li = 0; li < lbp->lb_loccnt; li++) { - lp = &lbp->lb_locators[li]; - if ((lp->l_flags & MDDB_F_DELETED) || - (lp->l_flags & MDDB_F_EMASTER)) - continue; - - if (lp->l_flags & MDDB_F_EWRITE) { - lp->l_flags &= ~MDDB_F_ACTIVE; - lp->l_flags &= ~MDDB_F_UP2DATE; - continue; - } - alc++; - } - } while (alc >= ((lc + 1) / 2)); - md_set_setstatus(setno, MD_SET_TOOFEW); - return (1); -} - -static int -checkstate( - mddb_set_t *s, - int probe -) -{ - int error; - uint_t set_status = md_get_setstatus(s->s_setno); - - ASSERT(s != NULL); - - if (! (set_status & MD_SET_STALE) && ! (set_status & MD_SET_TOOFEW)) - return (0); - - if (probe == MDDB_NOPROBE) - return (1); - - single_thread_start(s); - error = selectreplicas(s, MDDB_SCANALL); - single_thread_end(s); - - if (error == 0 && s->s_zombie != 0) { - mutex_exit(SETMUTEX(s->s_setno)); - error = mddb_deleterec(s->s_zombie); - mutex_enter(SETMUTEX(s->s_setno)); - if (error == 0) - s->s_zombie = 0; - } - return (error); -} - -static int -writeretry( - mddb_set_t *s -) -{ - if (selectreplicas(s, MDDB_RETRYSCAN)) - if (selectreplicas(s, MDDB_SCANALL)) - return (1); - return (0); -} - -static void -free_mbipp(mddb_mb_ic_t **mbipp) -{ - mddb_mb_ic_t *mbip1, *mbip2; - - for (mbip1 = *mbipp; mbip1 != NULL; mbip1 = mbip2) { - mbip2 = mbip1->mbi_next; - kmem_free((caddr_t)mbip1, MDDB_IC_BSIZE); - } - *mbipp = (mddb_mb_ic_t *)NULL; -} - -static mddb_ri_t * -save_rip(mddb_set_t *s) -{ - mddb_ri_t *trip = s->s_rip; - mddb_ri_t *nrip = NULL; - mddb_ri_t **nripp = &nrip; - mddb_ri_t *rip; - - while (trip) { - /* Run to the end of the list */ - for (/* void */; (*nripp != NULL); nripp = &(*nripp)->ri_next) - /* void */; - - /* Add the new member */ - *nripp = kmem_zalloc(sizeof (**nripp), KM_SLEEP); - - ASSERT(*nripp != NULL); - - /* shorthand */ - rip = *nripp; - - *rip = *trip; /* structure assignment */ - - /* Clear the stuff that is not needed for hints */ - rip->ri_flags = 0; - rip->ri_commitcnt = 0; - rip->ri_transplant = 0; - rip->ri_mbip = (mddb_mb_ic_t *)NULL; - rip->ri_dtp = (mddb_dt_t *)NULL; - rip->ri_lbp = (mddb_lb_t *)NULL; - rip->ri_did_icp = (mddb_did_ic_t *)NULL; - rip->ri_devid = (ddi_devid_t)NULL; - rip->ri_old_devid = (ddi_devid_t)NULL; - rip->ri_next = (mddb_ri_t *)NULL; - - trip = trip->ri_next; - } - return (nrip); -} - -static void -free_rip(mddb_ri_t **ripp) -{ - mddb_ri_t *rip; - mddb_ri_t *arip; - - for (rip = *ripp; rip != (mddb_ri_t *)NULL; rip = arip) { - arip = rip->ri_next; - if (rip->ri_devid != (ddi_devid_t)NULL) { - ddi_devid_free(rip->ri_devid); - rip->ri_devid = (ddi_devid_t)NULL; - } - if (rip->ri_old_devid != (ddi_devid_t)NULL) { - ddi_devid_free(rip->ri_old_devid); - rip->ri_old_devid = (ddi_devid_t)NULL; - } - kmem_free((caddr_t)rip, sizeof (*rip)); - } - *ripp = (mddb_ri_t *)NULL; -} - -/* - * this routine selects the correct replica to use - * the rules are as follows - * 1. if all replica has same init time select highest commit count - * 2. if some but not all replicas are from another hostid discard - * them. - * 3. find which init time is present is most replicas - * 4. discard all replicas which do not match most init times - * 5. select replica with highest commit count - */ - -static mddb_lb_t * -selectlocator( - mddb_set_t *s -) -{ - mddb_ri_t *rip = s->s_rip; - mddb_ri_t *r, *r1; - mddb_lb_t *lbp; - struct timeval32 *tp = (struct timeval32 *)NULL; - int different; - int same; - int count; - int maxcount; - set_t setno = s->s_setno; - size_t sz; - int mn_set = 0; - - /* Clear the ri_transplant flag on all the rip entries. */ - /* Set ri_commitcnt to locator's commitcnt - if available */ - for (r = rip; r != (mddb_ri_t *)NULL; r = r->ri_next) { - r->ri_transplant = 0; - if (r->ri_lbp != (mddb_lb_t *)NULL) { - r->ri_commitcnt = r->ri_lbp->lb_commitcnt; - /* If any locators have MN bit set, set flag */ - if (r->ri_lbp->lb_flags & MDDB_MNSET) - mn_set = 1; - } - } - - /* - * A data tag is being used, so use it to limit the selection first. - * Data tags not used in MN diskset. - */ - if ((mn_set == 0) && (md_get_setstatus(setno) & MD_SET_USETAG)) { - mddb_dt_t *dtp = (mddb_dt_t *)md_set[setno].s_dtp; - - /* - * now toss any locators that have a different data tag - */ - for (r = rip; r != (mddb_ri_t *)NULL; r = r->ri_next) { - if (r->ri_lbp == (mddb_lb_t *)NULL) - continue; - - if (r->ri_dtp != (mddb_dt_t *)NULL) { - /* If same tag, keep it */ - if (dtl_cmp(&dtp->dt_dtag, - &r->ri_dtp->dt_dtag) == 0) - continue; - } - - if (r->ri_dtp != (mddb_dt_t *)NULL) { - kmem_free((caddr_t)r->ri_dtp, MDDB_DT_BYTES); - r->ri_dtp = (mddb_dt_t *)NULL; - } - - mddb_devid_icp_free(&r->ri_did_icp, r->ri_lbp); - if (!(md_get_setstatus(setno) & - MD_SET_REPLICATED_IMPORT)) { - if (r->ri_old_devid != (ddi_devid_t)NULL) { - sz = ddi_devid_sizeof(r->ri_old_devid); - kmem_free((caddr_t)r->ri_old_devid, sz); - r->ri_old_devid = (ddi_devid_t)NULL; - } - } - - kmem_free((caddr_t)r->ri_lbp, - dbtob(r->ri_lbp->lb_blkcnt)); - r->ri_lbp = (mddb_lb_t *)NULL; - - r->ri_transplant = 1; - } - - /* Tag used, clear the bit */ - md_clr_setstatus(s->s_setno, MD_SET_USETAG); - - if (md_get_setstatus(s->s_setno) & MD_SET_TAGDATA) { - /* - * Get rid of the list of tags. - */ - dtl_freel(&s->s_dtlp); - - /* - * Re-create the list with the tag used. - */ - (void) dtl_addl(s, &dtp->dt_dtag); - } - } - - /* - * scan to see if all replicas have same time - */ - for (r = rip; r != (mddb_ri_t *)NULL; r = r->ri_next) { - if (r->ri_lbp == (mddb_lb_t *)NULL) - continue; - if (tp == NULL) { - tp = &r->ri_lbp->lb_inittime; - continue; - } - /* CSTYLED */ - if (timercmp(tp, &r->ri_lbp->lb_inittime, !=)) - break; - } - - /* - * if r == NULL then they were all them same. Choose highest - * commit count - */ - if (r == (mddb_ri_t *)NULL) - goto out; - - /* - * If here, a bogus replica is present and at least 1 lb_inittime - * did not match. - */ - - /* - * look and see if any but not all are from different id - */ - - different = 0; - same = 0; - for (r = rip; r != (mddb_ri_t *)NULL; r = r->ri_next) { - if (r->ri_lbp == (mddb_lb_t *)NULL) - continue; - if (cmpidentifier(s, &r->ri_lbp->lb_ident)) - different = 1; - else - same = 1; - } - - /* - * now go through and throw out different if there are some - * that are the same - */ - if (different != 0 && same != 0) { - for (r = rip; r != (mddb_ri_t *)NULL; r = r->ri_next) { - if (r->ri_lbp == (mddb_lb_t *)NULL) - continue; - - if (!cmpidentifier(s, &r->ri_lbp->lb_ident)) - continue; - - if (r->ri_dtp != (mddb_dt_t *)NULL) { - kmem_free((caddr_t)r->ri_dtp, MDDB_DT_BYTES); - r->ri_dtp = (mddb_dt_t *)NULL; - } - - mddb_devid_icp_free(&r->ri_did_icp, r->ri_lbp); - if (!(md_get_setstatus(setno) & - MD_SET_REPLICATED_IMPORT)) { - if (r->ri_old_devid != (ddi_devid_t)NULL) { - sz = ddi_devid_sizeof(r->ri_old_devid); - kmem_free((caddr_t)r->ri_old_devid, sz); - r->ri_old_devid = (ddi_devid_t)NULL; - } - } - - kmem_free((caddr_t)r->ri_lbp, - dbtob(r->ri_lbp->lb_blkcnt)); - r->ri_lbp = (mddb_lb_t *)NULL; - - r->ri_transplant = 1; - } - } - - /* - * go through and pick highest. Use n square because it is - * simple and 40 some is max possible - */ - maxcount = 0; - lbp = (mddb_lb_t *)NULL; - for (r1 = rip; r1 != (mddb_ri_t *)NULL; r1 = r1->ri_next) { - if (r1->ri_lbp == (mddb_lb_t *)NULL) - continue; - count = 0; - for (r = r1; r != (mddb_ri_t *)NULL; r = r->ri_next) { - if (r->ri_lbp == (mddb_lb_t *)NULL) - continue; - if (timercmp(&r1->ri_lbp->lb_inittime, /* CSTYLED */ - &r->ri_lbp->lb_inittime, ==)) - count++; - } - if (count > maxcount) { - maxcount = count; - lbp = r1->ri_lbp; - } - } - - /* - * now go though and toss any that are of a different time stamp - */ - for (r = rip; r != (mddb_ri_t *)NULL; r = r->ri_next) { - if (r->ri_lbp == (mddb_lb_t *)NULL) - continue; - if (timercmp(&lbp->lb_inittime, /* CSTYLED */ - &r->ri_lbp->lb_inittime, ==)) - continue; - - if (r->ri_dtp != (mddb_dt_t *)NULL) { - kmem_free((caddr_t)r->ri_dtp, MDDB_DT_BYTES); - r->ri_dtp = (mddb_dt_t *)NULL; - } - - mddb_devid_icp_free(&r->ri_did_icp, r->ri_lbp); - if (!(md_get_setstatus(setno) & MD_SET_REPLICATED_IMPORT)) { - if (r->ri_old_devid != (ddi_devid_t)NULL) { - sz = ddi_devid_sizeof(r->ri_old_devid); - kmem_free((caddr_t)r->ri_old_devid, sz); - r->ri_old_devid = (ddi_devid_t)NULL; - } - } - - kmem_free((caddr_t)r->ri_lbp, dbtob(r->ri_lbp->lb_blkcnt)); - r->ri_lbp = (mddb_lb_t *)NULL; - - r->ri_transplant = 1; - } - -out: - /* - * Find the locator with the highest commit count, and make it the - * "chosen" one. - */ - lbp = (mddb_lb_t *)NULL; - for (r = rip; r != (mddb_ri_t *)NULL; r = r->ri_next) { - if (r->ri_lbp == (mddb_lb_t *)NULL) - continue; - - if (lbp == NULL) { - lbp = r->ri_lbp; - continue; - } - - if (r->ri_lbp->lb_commitcnt > lbp->lb_commitcnt) - lbp = r->ri_lbp; - } - - /* Toss all locator blocks, except the "chosen" one. */ - for (r = rip; r != (mddb_ri_t *)NULL; r = r->ri_next) { - if (r->ri_lbp == (mddb_lb_t *)NULL) - continue; - - /* Get rid of all dtp's */ - if (r->ri_dtp != (mddb_dt_t *)NULL) { - kmem_free((caddr_t)r->ri_dtp, MDDB_DT_BYTES); - r->ri_dtp = (mddb_dt_t *)NULL; - } - - if (r->ri_lbp == lbp) - continue; - - /* Get rid of extra locator devid block info */ - mddb_devid_icp_free(&r->ri_did_icp, r->ri_lbp); - if (!(md_get_setstatus(setno) & MD_SET_REPLICATED_IMPORT)) { - if (r->ri_old_devid != (ddi_devid_t)NULL) { - sz = ddi_devid_sizeof(r->ri_old_devid); - kmem_free((caddr_t)r->ri_old_devid, sz); - r->ri_old_devid = (ddi_devid_t)NULL; - } - } - - /* Get rid of extra locators */ - kmem_free((caddr_t)r->ri_lbp, dbtob(r->ri_lbp->lb_blkcnt)); - r->ri_lbp = (mddb_lb_t *)NULL; - } - return (lbp); -} - -static void -locator2cfgloc( - mddb_lb_t *lbp, - mddb_cfg_loc_t *clp, - int li, - side_t sideno, - mddb_did_ic_t *did_icp -) -{ - mddb_drvnm_t *dn; - mddb_locator_t *lp = &lbp->lb_locators[li]; - mddb_sidelocator_t *slp; - mddb_mnsidelocator_t *mnslp; - mddb_did_info_t *did_info; - int i, sz, szalloc; - int mn_set = 0; - mddb_mnlb_t *mnlbp; - - if (lbp->lb_flags & MDDB_MNSET) { - mn_set = 1; - mnlbp = (mddb_mnlb_t *)lbp; - for (i = 0; i < MD_MNMAXSIDES; i++) { - mnslp = &mnlbp->lb_mnsidelocators[i][li]; - if (mnslp->mnl_sideno == sideno) - break; - } - if (i == MD_MNMAXSIDES) - return; - } else { - slp = &lbp->lb_sidelocators[sideno][li]; - } - - if (lbp->lb_flags & MDDB_DEVID_STYLE) { - did_info = &(did_icp->did_ic_blkp->blk_info[li]); - if (did_info->info_flags & MDDB_DID_EXISTS) { - sz = (int)ddi_devid_sizeof(did_icp->did_ic_devid[li]); - if (clp->l_devid_flags & MDDB_DEVID_SPACE) { - /* - * copy device id from mddb to - * cfg_loc structure - */ - szalloc = clp->l_devid_sz; - if (sz <= szalloc) { - for (i = 0; i < sz; i++) { - ((char *)(uintptr_t) - clp->l_devid)[i] = - ((char *)did_icp-> - did_ic_devid[li])[i]; - } - clp->l_devid_flags |= MDDB_DEVID_VALID; - (void) strcpy(clp->l_minor_name, - did_info->info_minor_name); - } else { - clp->l_devid_flags |= - MDDB_DEVID_NOSPACE; - } - } else if (clp->l_devid_flags & MDDB_DEVID_GETSZ) { - clp->l_devid_flags = MDDB_DEVID_SZ; - clp->l_devid_sz = sz; - } - } - } - - /* - * Even if a devid exists, use the dev, drvnm and mnum in the locators - * and sidelocators. During startup, the dev, drvnm and mnum in - * these structures may not match the devid (the locators and - * sidelocators will be updated to match the devid by the routine - * load_old_replicas). Using out-of-sync values won't cause any - * problems since ridev will re-derive these from the devid and mnum. - * After startup, the dev, drvnm and mnum in these structures have - * been updated and can be used. - */ - - clp->l_blkno = lp->l_blkno; - clp->l_flags = lp->l_flags; - clp->l_dev = lp->l_dev; - - if (mn_set) { - dn = &lbp->lb_drvnm[mnslp->mnl_drvnm_index]; - clp->l_mnum = mnslp->mnl_mnum; - } else { - dn = &lbp->lb_drvnm[slp->l_drvnm_index]; - clp->l_mnum = slp->l_mnum; - } - (void) strncpy(clp->l_driver, dn->dn_data, MD_MAXDRVNM); -} - -/* - * Find the index into the mnsidelocator where entry will go. - * Then index can be fed into both splitname2locatorblocks and - * cfgloc2locator so that those entries can be kept in sync. - * - * Returns: - * -1 if failed to find unused slot or if a traditional diskset - * index, if successful (0 <= index <= MD_MNMAXSIDES) - */ -static int -checklocator( - mddb_lb_t *lbp, - int li, - side_t sideno -) -{ - uchar_t i; - mddb_mnsidelocator_t *mnslp; - mddb_mnlb_t *mnlbp; - int index = -1; - - if (lbp->lb_flags & MDDB_MNSET) { - /* - * Checking side locator structure. First, check if - * there is already an entry for this side. If so, - * then use that entry. Otherwise, find an entry - * that has a sideno of 0. - */ - mnlbp = (mddb_mnlb_t *)lbp; - for (i = 0; i < MD_MNMAXSIDES; i++) { - mnslp = &mnlbp->lb_mnsidelocators[i][li]; - if (mnslp->mnl_sideno == sideno) { - /* Found a match - stop looking */ - index = i; - break; - } else if ((mnslp->mnl_sideno == 0) && (index == -1)) { - /* Set first empty slot, but keep looking */ - index = i; - } - } - /* Didn't find empty slot or previously used slot */ - if ((i == MD_MNMAXSIDES) && (index == -1)) { - return (-1); - } - return (index); - } else - return (0); -} - -/* - * Takes locator information (driver name, minor number, sideno) and - * stores it in the locator block. - * For traditional diskset, the sideno is the index into the sidelocator - * array in the locator block. - * For the MN diskset, the sideno is the nodeid which can be any number, - * so the index passed in is the index into the mnsidelocator array - * in the locator block. - */ -static int -cfgloc2locator( - mddb_lb_t *lbp, - mddb_cfg_loc_t *clp, - int li, - side_t sideno, - int index /* Only useful in MNsets when > 1 */ -) -{ - uchar_t i; - mddb_sidelocator_t *slp; - mddb_mnsidelocator_t *mnslp; - mddb_set_t *s; - int mn_set = 0; - mddb_mnlb_t *mnlbp; - - if (lbp->lb_flags & MDDB_MNSET) { - mnlbp = (mddb_mnlb_t *)lbp; - mn_set = 1; - /* - * Index will be the slot that has the given sideno or - * the first empty slot if no match is found. - * This was pre-checked out in check locator. - */ - mnslp = &mnlbp->lb_mnsidelocators[index][li]; - } else { - slp = &lbp->lb_sidelocators[sideno][li]; - } - - /* - * Look for the driver name - */ - for (i = 0; i < MDDB_DRVNMCNT; i++) { - if (lbp->lb_drvnm[i].dn_len == 0) - continue; - if (strncmp(lbp->lb_drvnm[i].dn_data, clp->l_driver, - MD_MAXDRVNM) == 0) - break; - } - - /* - * Didn't find one, add a new one - */ - if (i == MDDB_DRVNMCNT) { - for (i = 0; i < MDDB_DRVNMCNT; i++) { - if (lbp->lb_drvnm[i].dn_len == 0) - break; - } - if (i == MDDB_DRVNMCNT) - return (1); - (void) strncpy(lbp->lb_drvnm[i].dn_data, clp->l_driver, - MD_MAXDRVNM); - lbp->lb_drvnm[i].dn_len = (uchar_t)strlen(clp->l_driver); - } - - /* Fill in the drvnm index */ - if (mn_set) { - mnslp->mnl_drvnm_index = i; - mnslp->mnl_mnum = clp->l_mnum; - mnslp->mnl_sideno = sideno; - } else { - slp->l_drvnm_index = i; - slp->l_mnum = clp->l_mnum; - } - - if (lbp->lb_flags & MDDB_DEVID_STYLE) { - /* - * This device id could already be associated with this index - * if this is not the first side added to the set. - * If device id is 0, there is no device id for this device. - */ - if ((ddi_devid_t)(uintptr_t)clp->l_devid == 0) - return (0); - s = (mddb_set_t *)md_set[lbp->lb_setno].s_db; - if (mddb_devid_add(s, li, (ddi_devid_t)(uintptr_t)clp->l_devid, - clp->l_minor_name)) { - return (1); - } - } - - return (0); -} - -/* - * See if there are mediator hosts and try to use the data. - */ -static int -mediate( - mddb_set_t *s -) -{ - mddb_lb_t *lbp = s->s_lbp; - med_data_lst_t *meddlp = NULL; - med_data_lst_t *tmeddlp = NULL; - med_data_t *meddp; - int medok = 0; - int medacc = 0; - uint_t maxcc; - int golden = 0; - int err = 1; - set_t setno = s->s_setno; - - /* Do not have a mediator, then the state is stale */ - if (s->s_med.n_cnt == 0) - return (err); - - /* Contact the mediator hosts for the data */ - meddlp = get_med_host_data(&s->s_med, s->s_setname, setno); - - /* No mediator data, stale */ - if (meddlp == NULL) - return (err); - - /* Mark all the mediator data that is not for this set as errored */ - for (tmeddlp = meddlp; tmeddlp != NULL; tmeddlp = tmeddlp->mdl_nx) { - struct timeval32 tmptime; - meddp = tmeddlp->mdl_med; - - /* Count the number of mediators contacted */ - medacc++; - - /* Paranoid check */ - if (meddp->med_dat_sn != setno) - meddp->med_dat_fl |= MED_DFL_ERROR; - - TIMEVAL_TO_TIMEVAL32(&tmptime, &meddp->med_dat_id); - - /*CSTYLED*/ - if (timercmp(&tmptime, &lbp->lb_ident.createtime, !=)) - meddp->med_dat_fl |= MED_DFL_ERROR; - } - - /* Get the max commitcount */ - maxcc = 0; - for (tmeddlp = meddlp; tmeddlp != NULL; tmeddlp = tmeddlp->mdl_nx) { - meddp = tmeddlp->mdl_med; - if (meddp->med_dat_fl & MED_DFL_ERROR) - continue; - if (meddp->med_dat_cc > maxcc) - maxcc = meddp->med_dat_cc; - } - - /* Now mark the records that don't have the highest cc as errored */ - for (tmeddlp = meddlp; tmeddlp != NULL; tmeddlp = tmeddlp->mdl_nx) { - meddp = tmeddlp->mdl_med; - if (meddp->med_dat_fl & MED_DFL_ERROR) - continue; - if (meddp->med_dat_cc != maxcc) - meddp->med_dat_fl |= MED_DFL_ERROR; - } - - /* Now mark the records that don't match the lb commitcnt as errored */ - for (tmeddlp = meddlp; tmeddlp != NULL; tmeddlp = tmeddlp->mdl_nx) { - meddp = tmeddlp->mdl_med; - if (meddp->med_dat_fl & MED_DFL_ERROR) - continue; - if (meddp->med_dat_cc != lbp->lb_commitcnt) - meddp->med_dat_fl |= MED_DFL_ERROR; - } - - /* Is there a "golden" copy and how many valid mediators */ - for (tmeddlp = meddlp; tmeddlp != NULL; tmeddlp = tmeddlp->mdl_nx) { - meddp = tmeddlp->mdl_med; - if (meddp->med_dat_fl & MED_DFL_ERROR) - continue; - - if (meddp->med_dat_fl & MED_DFL_GOLDEN) - golden++; - - medok++; - } - - /* No survivors, stale */ - if (medok == 0) - goto out; - - /* No mediator quorum and no golden copies, stale */ - if (medacc < ((s->s_med.n_cnt / 2) + 1) && ! golden) { - /* Skip odd numbers, no exact 50% */ - if (s->s_med.n_cnt & 1) - goto out; - /* Have 50%, allow an accept */ - if (medacc == (s->s_med.n_cnt / 2)) - md_set_setstatus(setno, MD_SET_ACCOK); - goto out; - } - - /* We either have a quorum or a golden copy, or both */ - err = 0; - -out: - if (meddlp) { - for (/* void */; meddlp != NULL; meddlp = tmeddlp) { - tmeddlp = meddlp->mdl_nx; - kmem_free(meddlp->mdl_med, sizeof (med_data_t)); - kmem_free(meddlp, sizeof (med_data_lst_t)); - } - } - - return (err); -} - -/* - * 1. read masterblks and locator blocks for all know database locations - * a. keep track of which have good master blks - * b. keep track of which have good locators - * - */ -static int -get_mbs_n_lbs( - mddb_set_t *s, - int *write_lb -) -{ - mddb_lb_t *lbp = NULL; /* pointer to locator block */ - /* May be cast to mddb_mnlb_t */ - /* if accessing sidenames in */ - /* MN set */ - mddb_did_ic_t *did_icp = NULL; /* ptr to Device ID incore */ - mddb_did_blk_t *did_blkp = 0; - int did_blkp_sz = 0; - mddb_did_db_t *did_dbp; - mddb_did_info_t *did_info; - caddr_t did_block; - mddb_ri_t *rip; - mddb_dtag_lst_t *dtlp; - mddb_locator_t *lp; - daddr_t physblk; - int li; - uint_t blk; - md_dev64_t dev; - caddr_t buffer; - uint_t lb_blkcnt; - int retval = 0; - int err = 0; - int lb_ok = 0; - int lb_total = 0; - int lb_tagged = 0; - int lb_tags; - set_t setno = s->s_setno; - int cont_flag, i; - mddb_did_db_t *did_dbp1, *did_dbp2; - int mn_set = 0; - mddb_cfg_loc_t *cl; - - /* - * read in master blocks and locator block for all known locators. - * lb_blkcnt will be set correctly for MN set later once getmasters - * has determined that the set is a MN set. - */ - lb_blkcnt = ((setno == MD_LOCAL_SET) ? MDDB_LOCAL_LBCNT : MDDB_LBCNT); - - for (rip = s->s_rip; rip != NULL; rip = rip->ri_next) { - rip->ri_flags &= (MDDB_F_PTCHED | MDDB_F_IOCTL | - MDDB_F_EMASTER); - rip->ri_lbp = (mddb_lb_t *)NULL; - rip->ri_did_icp = (mddb_did_ic_t *)NULL; - - /* - * Translated dev is only used in calls to getmasters and - * getblks which expect a translated (aka miniroot) dev. - */ - dev = md_xlate_targ_2_mini(rip->ri_dev); - if (dev == NODEV64) { - /* Set error flag that getmasters would have set */ - /* if getmasters had been allowed to fail */ - rip->ri_flags |= MDDB_F_EMASTER; - } - - /* - * Invalid device id on system (due to failed or - * removed device) or invalid devt during upgrade - * (due to powered off device) will cause this - * replica to be marked in error and not used. - */ - if (rip->ri_flags & MDDB_F_EMASTER) - continue; - - /* get all master blocks, does mddb_devopen() */ - rip->ri_mbip = getmasters(s, dev, rip->ri_blkno, - &rip->ri_flags, &mn_set); - - /* if invalid master block - try next replica */ - if (! rip->ri_mbip) - continue; - - /* - * If lbp alloc'd to wrong size - reset it. - * If MN set, lb_blkcnt must be MDDB_MNLBCNT. - * If a traditional set, lb_blkcnt must NOT be MDDB_MNLBCNT. - */ - if (lbp) { - if (((mn_set) && (lb_blkcnt != MDDB_MNLBCNT)) || - ((!mn_set) && (lb_blkcnt == MDDB_MNLBCNT))) { - kmem_free((caddr_t)lbp, dbtob(lb_blkcnt)); - lbp = (mddb_lb_t *)NULL; - } - } - - if (lbp == (mddb_lb_t *)NULL) { - /* If a MN set, set lb_blkcnt for MN loc blk size */ - if (mn_set) - lb_blkcnt = MDDB_MNLBCNT; - lbp = (mddb_lb_t *)kmem_zalloc(dbtob(lb_blkcnt), - KM_SLEEP); - } - - /* - * Read in all the sectors for the locator block - * NOTE: Need to use getblks, rather than readblklst. - * because it is too early and things are - * NOT set up yet for read*()'s - */ - buffer = (caddr_t)lbp; - for (blk = 0; blk < lb_blkcnt; blk++) { - physblk = getphysblk(blk, rip->ri_mbip); - err = getblks(s, buffer, dev, physblk, - btodb(MDDB_BSIZE), 0); - if (err) { - rip->ri_flags |= err; - break; - } - buffer += MDDB_BSIZE; - } - - if (err) - continue; - - /* Verify the locator block */ - if (blk != lb_blkcnt) - continue; - if (lbp->lb_magic != MDDB_MAGIC_LB) - continue; - if (lbp->lb_blkcnt != lb_blkcnt) - continue; - if (mn_set) { - /* If a MN set, check for MNLB revision in lb. */ - if (revchk(MDDB_REV_MNLB, lbp->lb_revision)) - continue; - } else { - /* If not a MN set, check for LB revision in lb. */ - if (revchk(MDDB_REV_LB, lbp->lb_revision)) - continue; - } - if (crcchk(lbp, &lbp->lb_checksum, dbtob(lb_blkcnt), NULL)) - continue; - - /* - * With the addition of MultiNode Disksets, we must make sure - * to verify that this is the correct set. A node could - * have been out of the config for awhile and this disk could - * have been moved to a different diskset and we don't want - * to accidentally start the wrong set. - * - * We don't do this check if we're in the middle of - * importing a set. - */ - if (!(md_get_setstatus(s->s_setno) & - (MD_SET_IMPORT | MD_SET_REPLICATED_IMPORT)) && - (lbp->lb_setno != s->s_setno)) - continue; - - rip->ri_flags |= MDDB_F_LOCACC; - - /* - * a commit count of zero means this locator has been deleted - */ - if (lbp->lb_commitcnt == 0) - continue; - - /* - * If replica is in the device ID style and md_devid_destroy - * flag is set, turn off device id style. This is only to be - * used in a catastrophic failure case. Examples would be - * where the device id of all drives in the system - * (especially the mirror'd root drives) had been changed - * by firmware upgrade or by a patch to an existing disk - * driver. Another example would be in the case of non-unique - * device ids due to a bug. The device id would be valid on - * the system, but would return the wrong dev_t. - */ - if ((lbp->lb_flags & MDDB_DEVID_STYLE) && md_devid_destroy) { - lbp->lb_flags &= ~MDDB_DEVID_STYLE; - lbp->lb_didfirstblk = 0; - lbp->lb_didblkcnt = 0; - *write_lb = 1; - } - - - /* - * If replica is in device ID style, read in device ID - * block and verify device ID block information. - */ - if (lbp->lb_flags & MDDB_DEVID_STYLE) { - - /* Read in device ID block */ - if (did_icp == NULL) { - did_icp = (mddb_did_ic_t *) - kmem_zalloc(sizeof (mddb_did_ic_t), - KM_SLEEP); - } else { - /* Reuse did_icp, but clear out data */ - if (did_icp->did_ic_blkp != - (mddb_did_blk_t *)NULL) { - kmem_free((caddr_t)did_icp->did_ic_blkp, - did_blkp_sz); - did_blkp = (mddb_did_blk_t *)NULL; - did_icp->did_ic_blkp = - (mddb_did_blk_t *)NULL; - } - if (did_icp->did_ic_dbp != - (mddb_did_db_t *)NULL) { - did_dbp1 = did_icp->did_ic_dbp; - while (did_dbp1) { - did_dbp2 = did_dbp1->db_next; - kmem_free((caddr_t) - did_dbp1->db_ptr, - dbtob(did_dbp1->db_blkcnt)); - kmem_free((caddr_t)did_dbp1, - sizeof (mddb_did_db_t)); - did_dbp1 = did_dbp2; - } - did_icp->did_ic_dbp = - (mddb_did_db_t *)NULL; - } - for (i = 0; i < MDDB_NLB; i++) { - did_icp->did_ic_devid[i] = - (ddi_devid_t)NULL; - } - } - - /* Can't reuse blkp since size could be different */ - if (did_blkp != (mddb_did_blk_t *)NULL) { - kmem_free(did_blkp, did_blkp_sz); - } - did_blkp_sz = (int)dbtob(lbp->lb_didblkcnt); - did_blkp = (mddb_did_blk_t *)kmem_zalloc(did_blkp_sz, - KM_SLEEP); - did_icp->did_ic_blkp = did_blkp; - buffer = (caddr_t)did_blkp; - for (blk = lbp->lb_didfirstblk; - blk < (lbp->lb_didblkcnt + lbp->lb_didfirstblk); - blk++) { - physblk = getphysblk(blk, rip->ri_mbip); - err = getblks(s, buffer, dev, physblk, - btodb(MDDB_BSIZE), 0); - if (err) { - rip->ri_flags |= err; - break; - } - buffer += MDDB_BSIZE; - } - if (err) - continue; - - /* Verify the Device ID block */ - if (blk != (lbp->lb_didblkcnt + lbp->lb_didfirstblk)) - continue; - if (did_blkp->blk_magic != MDDB_MAGIC_DI) - continue; - if (lbp->lb_didblkcnt != MDDB_DID_BLOCKS) - continue; - if (revchk(MDDB_REV_DI, did_blkp->blk_revision)) - continue; - if (crcchk(did_blkp, &did_blkp->blk_checksum, - dbtob(lbp->lb_didblkcnt), NULL)) - continue; - - /* - * Check if device ID block is out of sync with the - * Locator Block by checking if the locator block - * commitcnt does not match the device id block - * commitcnt. If an 'out of sync' condition - * exists, discard this replica since it has - * inconsistent data and can't be used in - * determining the best replica. - * - * An 'out of sync' condition could happen if old - * SDS code was running with new devid style replicas - * or if a failure occurred between the writing of - * the locator block's commitcnt and the device - * id block's commitcnt. - * - * If old SDS code had been running, the upgrade - * process should detect this situation and - * have removed all of the device id information - * via the md_devid_destroy flag in md.conf. - */ - if (did_blkp->blk_commitcnt != - lbp->lb_commitcnt) { - continue; - } - } - - - /* - * If replica is still in device ID style, read in all - * of the device IDs, verify the checksum of the device IDs. - */ - if (lbp->lb_flags & MDDB_DEVID_STYLE) { - /* - * Reset valid bit in device id info block flags. This - * flag is stored on disk, but the valid bit is reset - * when reading in the replica. If the corresponding - * device id is valid (aka meaning that the system - * knows about this device id), the valid bit will - * be set at a later time. The valid bit for this - * replica's device ID will be set in this routine. - * The valid bits for the rest of the device id's - * will be set after the 'best' replica has - * been selected in routine load_old_replicas. - * Reset updated bit in device id info block flags. - * This flag is also stored on disk, reset when read - * in and set when the locators and side locators - * have been updated to match this valid device - * id information. - */ - for (li = 0; li < lbp->lb_loccnt; li++) { - did_info = &did_blkp->blk_info[li]; - if (did_info->info_flags & MDDB_DID_EXISTS) - did_info->info_flags &= - ~(MDDB_DID_VALID | - MDDB_DID_UPDATED); - } - - cont_flag = 0; - for (li = 0; li < lbp->lb_loccnt; li++) { - did_info = &did_blkp->blk_info[li]; - did_block = (caddr_t)NULL; - if (did_info->info_flags & MDDB_DID_EXISTS) { - /* - * Check if block has - * already been read in - */ - did_dbp = did_icp->did_ic_dbp; - while (did_dbp != 0) { - if (did_dbp->db_firstblk == - did_info->info_firstblk) - break; - else - did_dbp = - did_dbp->db_next; - } - /* if block not found, read it in */ - if (did_dbp == NULL) { - did_block = (caddr_t) - (kmem_zalloc(dbtob( - did_info->info_blkcnt), - KM_SLEEP)); - buffer = (caddr_t)did_block; - for (blk = - did_info->info_firstblk; - blk < (did_info-> - info_firstblk + - did_info->info_blkcnt); - blk++) { - physblk = - getphysblk(blk, - rip->ri_mbip); - err = getblks(s, - buffer, dev, - physblk, btodb( - MDDB_BSIZE), 0); - if (err) { - rip->ri_flags |= - err; - break; - } - buffer += MDDB_BSIZE; - } - if (err) { - kmem_free(did_block, - dbtob(did_info-> - info_blkcnt)); - did_block = - (caddr_t)NULL; - cont_flag = 1; - break; - } - - /* - * Block read in - - * alloc Disk Block area - */ - did_dbp = (mddb_did_db_t *) - kmem_zalloc( - sizeof (mddb_did_db_t), - KM_SLEEP); - did_dbp->db_ptr = did_block; - did_dbp->db_firstblk = - did_info->info_firstblk; - did_dbp->db_blkcnt = - did_info->info_blkcnt; - - /* Add to front of dbp list */ - did_dbp->db_next = - did_icp->did_ic_dbp; - did_icp->did_ic_dbp = did_dbp; - } - /* Check validity of devid in block */ - if (crcchk(((char *)did_dbp->db_ptr + - did_info->info_offset), - &did_info->info_checksum, - did_info->info_length, NULL)) { - cont_flag = 1; - break; - } - - /* Block now pointed to by did_dbp */ - did_icp->did_ic_devid[li] = - (ddi_devid_t)((char *) - did_dbp->db_ptr + - did_info->info_offset); - } - } - if (cont_flag) - continue; - } - - /* - * All blocks containing devids are now in core. - */ - - /* - * If we're doing a replicated import (also known as - * remote copy import), the device id in the locator - * block is incorrect and we need to fix it up here - * alongwith the l_dev otherwise we run into lots of - * trouble later on. - */ - if ((md_get_setstatus(setno) & MD_SET_REPLICATED_IMPORT)) { - mddb_ri_t *trip; - for (li = 0; li < lbp->lb_loccnt; li++) { - did_info = &did_blkp->blk_info[li]; - lp = &lbp->lb_locators[li]; - - if (lp->l_flags & MDDB_F_DELETED) - continue; - - if (!(did_info->info_flags & MDDB_DID_EXISTS)) - continue; - - if (did_icp->did_ic_devid[li] == NULL) - continue; - - for (trip = s->s_rip; trip != NULL; - trip = trip->ri_next) { - if (trip->ri_old_devid == NULL) - continue; - if (ddi_devid_compare( - trip->ri_old_devid, - did_icp->did_ic_devid[li]) != 0) { - continue; - } - - /* update l_dev and side mnum */ - lp->l_dev = md_cmpldev(trip->ri_dev); - lbp->lb_sidelocators[0][li].l_mnum = - md_getminor(trip->ri_dev); - } - } - } - - /* - * If there is a valid devid, verify that this locator - * block has information about itself by checking the - * device ID, minor_name and block - * number from this replica's incore data structure - * against the locator block information that has just - * been read in from disk. - * - * If not a valid devid, verify that this locator block - * has information about itself by checking the minor - * number, block number and driver name from this - * replica's incore data structure against the locator - * block information that has just been read in from disk. - */ - if ((rip->ri_devid != NULL) && - (lbp->lb_flags & MDDB_DEVID_STYLE)) { - /* - * This locator block MUST have locator (replica) - * information about itself. Check against devid, - * slice part of minor number, and block number. - */ - for (li = 0; li < lbp->lb_loccnt; li++) { - did_info = &did_blkp->blk_info[li]; - lp = &lbp->lb_locators[li]; - if (lp->l_flags & MDDB_F_DELETED) - continue; - - if (!(did_info->info_flags & MDDB_DID_EXISTS)) - continue; - - if (((md_get_setstatus(setno) & - MD_SET_REPLICATED_IMPORT)) && - (rip->ri_old_devid != (ddi_devid_t)NULL)) { - if (ddi_devid_compare(rip->ri_old_devid, - did_icp->did_ic_devid[li]) != 0) - continue; - } else { - if (ddi_devid_compare(rip->ri_devid, - did_icp->did_ic_devid[li]) != 0) - continue; - } - - if (strcmp(rip->ri_minor_name, - did_info->info_minor_name) != 0) - continue; - - if (lp->l_blkno == rip->ri_blkno) - break; - } - } else { - /* - * This locator block MUST have locator (replica) - * information about itself. - */ - if (!mn_set) { - for (li = 0; li < lbp->lb_loccnt; li++) { - mddb_drvnm_t *dn; - mddb_sidelocator_t *slp; - - lp = &lbp->lb_locators[li]; - slp = &lbp-> - lb_sidelocators[s->s_sideno][li]; - if (lp->l_flags & MDDB_F_DELETED) - continue; - if (slp->l_mnum != md_getminor( - rip->ri_dev)) - continue; - if (lp->l_blkno != rip->ri_blkno) - continue; - dn = &lbp->lb_drvnm[slp->l_drvnm_index]; - if (strncmp(dn->dn_data, - rip->ri_driver, MD_MAXDRVNM) == 0) - break; - } - } else { - for (li = 0; li < lbp->lb_loccnt; li++) { - mddb_drvnm_t *dn; - mddb_mnsidelocator_t *mnslp; - mddb_mnlb_t *mnlbp; - int i; - - /* - * Check all possible locators locking - * for match to the currently read-in - * locator, must match on: - * - blkno - * - side locator for this - * node's side - * - side locator minor number - * - side locator driver name - */ - - /* - * Looking at sidelocs: - * cast lbp -> mnlbp - */ - mnlbp = (mddb_mnlb_t *)lbp; - lp = &mnlbp->lb_locators[li]; - if (lp->l_flags & MDDB_F_DELETED) - continue; - if (lp->l_blkno != rip->ri_blkno) - continue; - - for (i = 0; i < MD_MNMAXSIDES; i++) { - mnslp = &mnlbp-> - lb_mnsidelocators[i][li]; - if (mnslp->mnl_sideno == - s->s_sideno) { - break; - } - } - /* No matching side found */ - if (i == MD_MNMAXSIDES) - continue; - if (mnslp->mnl_mnum != - md_getminor(rip->ri_dev)) - continue; - dn = &lbp-> - lb_drvnm[mnslp->mnl_drvnm_index]; - if (strncmp(dn->dn_data, - rip->ri_driver, MD_MAXDRVNM) == 0) - break; - } - } - } - - /* - * Didn't find ourself in this locator block it means - * the locator block is a stale transplant. Probably from - * a user doing a dd. - */ - if (li == lbp->lb_loccnt) - continue; - - /* - * Keep track of the number of accessed and valid - * locator blocks. - */ - lb_ok++; - - /* - * Read the tag in, skips invalid or blank tags. - * Only valid tags allocate storage - * Data tags are not used in MN disksets. - */ - if ((!mn_set) && (! dt_read(s, lbp, rip))) { - /* - * Keep track of the number of tagged - * locator blocks. - */ - lb_tagged++; - - /* Keep a list of unique tags. */ - (void) dtl_addl(s, &rip->ri_dtp->dt_dtag); - } - - if (!(md_get_setstatus(setno) & MD_SET_REPLICATED_IMPORT)) { - /* - * go through locator block and add any other - * locations of the data base. - * For the replicated import case, this was done earlier - * and we really don't need or want to do so again - */ - cl = kmem_zalloc(sizeof (mddb_cfg_loc_t), KM_SLEEP); - for (li = 0; li < lbp->lb_loccnt; li++) { - lp = &lbp->lb_locators[li]; - if (lp->l_flags & MDDB_F_DELETED) - continue; - - cl->l_devid_flags = MDDB_DEVID_GETSZ; - cl->l_devid = (uint64_t)0; - cl->l_devid_sz = 0; - cl->l_old_devid = (uint64_t)0; - cl->l_old_devid_sz = 0; - cl->l_minor_name[0] = '\0'; - locator2cfgloc(lbp, cl, li, s->s_sideno, - did_icp); - - if (cl->l_devid_flags & MDDB_DEVID_SZ) { - if ((cl->l_devid = (uintptr_t)kmem_alloc - (cl->l_devid_sz, KM_SLEEP)) - == NULL) { - continue; - } else { - cl->l_devid_flags = - MDDB_DEVID_SPACE; - } - } - locator2cfgloc(lbp, cl, li, s->s_sideno, - did_icp); - - (void) ridev(&s->s_rip, cl, &lp->l_dev, 0); - - if (cl->l_devid_flags & MDDB_DEVID_SPACE) - kmem_free((caddr_t)(uintptr_t) - cl->l_devid, cl->l_devid_sz); - } - kmem_free(cl, sizeof (mddb_cfg_loc_t)); - } - - /* Save LB for later */ - rip->ri_lbp = lbp; - if (lbp->lb_flags & MDDB_DEVID_STYLE) { - rip->ri_did_icp = did_icp; - did_icp = (mddb_did_ic_t *)NULL; - did_blkp = (mddb_did_blk_t *)NULL; - } else - rip->ri_did_icp = NULL; - lbp = (mddb_lb_t *)NULL; - } - - if (lbp != (mddb_lb_t *)NULL) - kmem_free((caddr_t)lbp, dbtob(lb_blkcnt)); - - if (did_icp != (mddb_did_ic_t *)NULL) { - if (did_icp->did_ic_blkp != (mddb_did_blk_t *)NULL) { - kmem_free((caddr_t)did_icp->did_ic_blkp, did_blkp_sz); - did_blkp = (mddb_did_blk_t *)NULL; - } - if (did_icp->did_ic_dbp != (mddb_did_db_t *)NULL) { - mddb_did_db_t *did_dbp1, *did_dbp2; - - did_dbp1 = did_icp->did_ic_dbp; - while (did_dbp1) { - did_dbp2 = did_dbp1->db_next; - kmem_free((caddr_t)did_dbp1->db_ptr, - dbtob(did_dbp1->db_blkcnt)); - kmem_free((caddr_t)did_dbp1, - sizeof (mddb_did_db_t)); - did_dbp1 = did_dbp2; - } - } - kmem_free((caddr_t)did_icp, sizeof (mddb_did_ic_t)); - } - - if (did_blkp != (mddb_did_blk_t *)NULL) { - kmem_free((caddr_t)did_blkp, did_blkp_sz); - } - - /* No locator blocks were ok */ - if (lb_ok == 0) - goto out; - - /* No tagged data was found - will be 0 for MN diskset */ - if (lb_tagged == 0) - goto out; - - /* Find the highest non-deleted replica count */ - for (rip = s->s_rip; rip != NULL; rip = rip->ri_next) { - int lb_tot = 0; - - if (rip->ri_mbip == (mddb_mb_ic_t *)NULL) - continue; - - if (rip->ri_lbp == (mddb_lb_t *)NULL) - continue; - - for (li = 0; li < rip->ri_lbp->lb_loccnt; li++) { - lp = &rip->ri_lbp->lb_locators[li]; - if (lp->l_flags & MDDB_F_DELETED) - continue; - lb_tot++; - } - - if (lb_tot > lb_total) - lb_total = lb_tot; - } - - /* Count the number of unique tags */ - for (lb_tags = 0, dtlp = s->s_dtlp; dtlp != NULL; dtlp = dtlp->dtl_nx) - lb_tags++; - - /* Should have at least one tag at this point */ - ASSERT(lb_tags > 0); - - - /* - * If the number of tagged locators is not the same as the number of - * OK locators OR more than one tag exists, then make sure the - * selected tag will be written out later. - */ - if ((lb_tagged - lb_ok) != 0 || lb_tags > 1) - md_set_setstatus(setno, MD_SET_TAGDATA); - - /* Only a single tag, take the tagged data */ - if (lb_tags == 1) { - dt_setup(s, &s->s_dtlp->dtl_dt); - md_set_setstatus(setno, MD_SET_USETAG); - goto out; - } - - /* Multiple tags, not selecting a tag, tag mode is on */ - if (! (md_get_setstatus(setno) & MD_SET_USETAG)) - retval = MDDB_E_TAGDATA; - -out: - - return (retval); -} - -/* - * 1. Select a locator. - * 2. check if enough locators now have current copies - * 3. read in database from one of latest - * 4. if known to have latest make all database the same - * 5. if configuration has changed rewrite locators - * - * Parameters: - * s - pointer to mddb_set structure - * flag - used in MN disksets to tell if this node is being joined to - * a diskset that is in the STALE state. If the flag is - * MDDB_MN_STALE, then this node should be marked in the STALE - * state even if > 50% mddbs are available. (The diskset can - * only change from STALE->OK if all nodes withdraw from the - * MN diskset and then rejoin). - */ -static int -load_old_replicas( - mddb_set_t *s, - int flag -) -{ - mddb_lb_t *lbp = NULL; - mddb_mnlb_t *mnlbp = NULL; - mddb_ri_t *rip; - mddb_locator_t *lp; - mddb_db_t *dbp; - mddb_de_ic_t *dep; - int li; - int alc; - int lc; - int tlc; - int retval = 0; - caddr_t p; - size_t maxrecsize; - set_t setno = s->s_setno; - mddb_did_db_t *did_dbp1; - mddb_did_info_t *did_info; - mddb_did_ic_t *did_icp = NULL; - md_dev64_t *newdev; - mddb_sidelocator_t *slp = 0; - mddb_mnsidelocator_t *mnslp = 0; - uchar_t i; - char *name; - ddi_devid_t ret_devid; - md_dev64_t dev; - uint_t len, sz; - char *minor_name; - int write_lb = 0; - int rval; - int stale_rtn = 0; - - /* The only error path out of get_mbs_n_lbs() is MDDB_E_TAGDATA */ - if (retval = get_mbs_n_lbs(s, &write_lb)) - goto errout; - - if ((lbp = s->s_lbp = selectlocator(s)) == NULL) { - retval = MDDB_E_NOLOCBLK; - goto errout; - } - - /* If a multi-node set, then set md_set.s_status flag */ - if (lbp->lb_flags & MDDB_MNSET) { - md_set_setstatus(setno, MD_SET_MNSET); - /* - * If data tag area had been allocated before set type was - * known - free it now. - */ - if (md_set[setno].s_dtp) { - kmem_free((caddr_t)md_set[setno].s_dtp, MDDB_DT_BYTES); - md_set[setno].s_dtp = NULL; - } - } - - /* - * If the replica is in devid format, setup the devid incore ptr. - */ - if (lbp->lb_flags & MDDB_DEVID_STYLE) { - for (rip = s->s_rip; rip != NULL; rip = rip->ri_next) { - if (rip->ri_lbp == s->s_lbp) { - did_icp = s->s_did_icp = rip->ri_did_icp; - break; - } - } - /* - * If no devid incore info found - something has gone - * wrong so errout. - */ - if (rip == NULL) { - retval = MDDB_E_NODEVID; - goto errout; - } - - /* - * Add all blocks containing devids to free list. - * Then remove addresses that actually contain devids. - */ - did_dbp1 = did_icp->did_ic_dbp; - while (did_dbp1) { - if (mddb_devid_free_add(s, did_dbp1->db_firstblk, - 0, dbtob(did_dbp1->db_blkcnt))) { - retval = MDDB_E_NOSPACE; - goto errout; - } - - did_dbp1 = did_dbp1->db_next; - } - for (li = 0; li < lbp->lb_loccnt; li++) { - did_info = &(did_icp->did_ic_blkp->blk_info[li]); - if (!(did_info->info_flags & MDDB_DID_EXISTS)) - continue; - - if (mddb_devid_free_delete(s, did_info->info_firstblk, - did_info->info_offset, did_info->info_length)) { - /* unable to find disk block */ - retval = MDDB_E_NODEVID; - goto errout; - } - } - } - - /* - * create mddb_mbaray, count all locators and active locators. - */ - alc = 0; - lc = 0; - for (li = 0; li < lbp->lb_loccnt; li++) { - ddi_devid_t li_devid; - - lp = &lbp->lb_locators[li]; - - if (lp->l_flags & MDDB_F_DELETED) - continue; - - /* Count non-deleted replicas */ - lc++; - - /* - * Use the devid of this locator to compare with the rip - * list. The scenario to watch out for here is that this - * locator could be on a disk that is dead and there could - * be a valid entry in the rip list for a different disk - * that has been moved to the dead disks dev_t. We don't - * want to match with the moved disk. - */ - li_devid = NULL; - (void) mddb_devid_get(s, li, &li_devid, &minor_name); - - for (rip = s->s_rip; rip != NULL; rip = rip->ri_next) { - if (match_mddb(rip, li_devid, minor_name, - md_expldev(lp->l_dev), lp->l_blkno)) { - break; - } - } - if (rip == NULL) { - /* - * If rip not found, then mark error in master block - * so that no writes are later attempted to this - * replica. rip may not be setup if ridev - * failed due to un-found driver name. - */ - lp->l_flags |= MDDB_F_EMASTER; - continue; - } - - s->s_mbiarray[li] = rip->ri_mbip; - - lp->l_flags &= MDDB_F_ACTIVE; - lp->l_flags |= (int)rip->ri_flags; - - if (rip->ri_transplant) - lp->l_flags &= ~MDDB_F_ACTIVE; - - if (lp->l_flags & MDDB_F_LOCACC) - alc++; - } - - /* Save on a divide - calculate 50% + 1 up front */ - tlc = ((lc + 1) / 2); - - if (alc > tlc) { /* alc > tlc - OK */ - md_clr_setstatus(setno, MD_SET_STALE); - } else if (alc < tlc) { /* alc < tlc - stale */ - md_set_setstatus(setno, MD_SET_STALE); - } else if (lc & 1) { /* alc == tlc && odd - OK */ - md_clr_setstatus(setno, MD_SET_STALE); - } else { /* alc == tlc && even - ? */ - /* Can do an accept, and are */ - if (md_get_setstatus(setno) & (MD_SET_ACCOK | MD_SET_ACCEPT)) { - md_clr_setstatus(setno, MD_SET_STALE); - } else { /* possibly has a mediator */ - if (mediate(s)) { - md_set_setstatus(setno, MD_SET_STALE); - } else { - md_clr_setstatus(setno, MD_SET_STALE); - } - } - - /* - * The mirrored_root_flag allows the sysadmin to decide to - * start the local set in a read/write (non-stale) mode - * when there are only 50% available mddbs on the system and - * when the root file system is on a mirror. This is useful - * in a 2 disk system where 1 disk failure would cause an mddb - * quorum failure and subsequent boot failures since the root - * filesystem would be in a read-only state. - */ - if (mirrored_root_flag == 1 && setno == 0 && - svm_bootpath[0] != 0) { - md_clr_setstatus(setno, MD_SET_STALE); - } else { - if (md_get_setstatus(setno) & MD_SET_STALE) { - /* Allow half mode - CAREFUL! */ - if (mddb_allow_half) - md_clr_setstatus(setno, MD_SET_STALE); - } - } - - /* - * In a MN diskset, - * - if 50% mddbs are unavailable and this - * has been marked STALE above - * - master node isn't in the STALE state - * - this node isn't the master node (this node - * isn't the first node to join the set) - * then clear the STALE state and set TOOFEW. - * - * If this node is the master node and set was marked STALE, - * then the set stays STALE. - * - * If this node is not the master and this node's state is - * STALE and the master node is not marked STALE, - * then master node must be in the TOOFEW state or the - * master is panic'ing. A MN diskset can only be placed into - * the STALE state by having the first node join the set - * with <= 50% mddbs. There's no way for a MN diskset to - * transition between STALE and not-STALE states unless all - * nodes are withdrawn from the diskset or all nodes in the - * diskset are rebooted at the same time. - * - * So, mark this node's state as TOOFEW instead of STALE. - */ - if (((md_get_setstatus(setno) & (MD_SET_MNSET | MD_SET_STALE)) - == (MD_SET_MNSET | MD_SET_STALE)) && - ((flag & MDDB_MN_STALE) == 0) && - (!(md_set[setno].s_am_i_master))) { - md_clr_setstatus(setno, MD_SET_STALE); - md_set_setstatus(setno, MD_SET_TOOFEW); - } - } - - /* - * If a MN set is marked STALE on the other nodes, - * mark it stale here. Override all other considerations - * such as a mediator or > 50% mddbs available. - */ - if (md_get_setstatus(setno) & MD_SET_MNSET) { - if (flag & MDDB_MN_STALE) - md_set_setstatus(setno, MD_SET_STALE); - } - - /* - * read a good copy of the locator names - * if an error occurs reading what is suppose - * to be a good copy continue looking for another - * good copy - */ - s->s_lnp = NULL; - for (li = 0; li < lbp->lb_loccnt; li++) { - lp = &lbp->lb_locators[li]; - if ((! (lp->l_flags & MDDB_F_ACTIVE)) || - (lp->l_flags & MDDB_F_EMASTER)) - continue; - - /* Find rip entry for this locator if one exists */ - for (rip = s->s_rip; rip != NULL; rip = rip->ri_next) { - if (match_mddb(rip, NULL, NULL, md_expldev(lp->l_dev), - lp->l_blkno)) - break; - } - - if (rip == NULL) { - continue; - } - - /* - * Use the rip commitcnt since the commitcnt in lbp could - * been cleared by selectlocator. Looking for a replica with - * the same commitcnt as the 'golden' copy in order to - * get the same data. - */ - if (rip->ri_commitcnt != lbp->lb_commitcnt) { - continue; - } - - /* - * Now have a copy of the database that is equivalent - * to the chosen locator block with respect to - * inittime, identifier and commitcnt. Trying the - * equivalent databases in the order that they were - * written will provide the most up to date data. - */ - lp->l_flags |= readlocnames(s, li); - if (s->s_lnp) - break; - } - - if (s->s_lnp == NULL) { - retval = MDDB_E_NOLOCNMS; - goto errout; - } - - /* - * read a good copy of the data base - * if an error occurs reading what is suppose - * to be a good copy continue looking for another - * good copy - */ - - s->s_dbp = NULL; - for (li = 0; li < lbp->lb_loccnt; li++) { - lp = &lbp->lb_locators[li]; - if ((! (lp->l_flags & MDDB_F_ACTIVE)) || - (lp->l_flags & MDDB_F_EMASTER)) - continue; - - /* Find rip entry for this locator if one exists */ - for (rip = s->s_rip; rip != NULL; rip = rip->ri_next) { - if (match_mddb(rip, NULL, NULL, md_expldev(lp->l_dev), - lp->l_blkno)) - break; - } - - if (rip == NULL) { - continue; - } - - /* - * Use the rip commitcnt since the commitcnt in lbp could - * been cleared by selectlocator. Looking for a replica with - * the same commitcnt as the 'golden' copy in order to - * get the same data. - */ - if (rip->ri_commitcnt != lbp->lb_commitcnt) { - continue; - } - - /* - * Now have a copy of the database that is equivalent - * to the chosen locator block with respect to - * inittime, identifier and commitcnt. Trying the - * equivalent databases in the order that they were - * written will provide the most up to date data. - */ - lp->l_flags |= readcopy(s, li); - - if (s->s_dbp) - break; - } - - if (s->s_dbp == NULL) { - retval = MDDB_E_NODIRBLK; - goto errout; - } - - lp->l_flags |= MDDB_F_MASTER; - lp->l_flags |= MDDB_F_UP2DATE; - - /* - * go through and find largest record; - * Also fixup the user data area's - */ - maxrecsize = MAX(MDDB_BSIZE, s->s_databuffer_size); - - for (dbp = s->s_dbp; dbp != NULL; dbp = dbp->db_next) - for (dep = dbp->db_firstentry; dep != NULL; dep = dep->de_next) - if (dep->de_flags & MDDB_F_OPT) - getoptrecord(s, dep); - else { - allocuserdata(dep); - maxrecsize = MAX(dep->de_recsize, maxrecsize); - } - - if (maxrecsize > s->s_databuffer_size) { - p = (caddr_t)kmem_zalloc(maxrecsize, KM_SLEEP); - if (s->s_databuffer_size) - kmem_free(s->s_databuffer, s->s_databuffer_size); - s->s_databuffer = p; - s->s_databuffer_size = maxrecsize; - } - - /* If we can clear the tag data record, do it now. */ - /* Data tags not supported on MN sets */ - if ((md_get_setstatus(setno) & MD_SET_CLRTAG) && - (!(md_get_setstatus(setno) & MD_SET_MNSET))) - dt_setup(s, NULL); - - /* This will return non-zero if STALE or TOOFEW */ - /* This will write out chosen replica image to all replicas */ - stale_rtn = selectreplicas(s, MDDB_SCANALL); - - if ((md_get_setstatus(setno) & MD_SET_REPLICATED_IMPORT)) { - ddi_devid_t devidptr; - - /* - * ignore the return value from selectreplicas because we - * may have a STALE or TOOFEW set in the case of a partial - * replicated diskset. We will fix that up later. - */ - - lbp = s->s_lbp; - for (li = 0; li < lbp->lb_loccnt; li++) { - did_info = &(did_icp->did_ic_blkp->blk_info[li]); - - if (did_info->info_flags & MDDB_DID_EXISTS) { - devidptr = s->s_did_icp->did_ic_devid[li]; - lp = &lbp->lb_locators[li]; - for (rip = s->s_rip; rip != NULL; - rip = rip->ri_next) { - if (rip->ri_old_devid == 0) - continue; - if (ddi_devid_compare(rip->ri_old_devid, - devidptr) != 0) { - continue; - } - if (update_locatorblock(s, - md_expldev(lp->l_dev), - rip->ri_devid, rip->ri_old_devid)) { - goto errout; - } - } - } - } - } else { - if (stale_rtn) - goto errout; - } - - /* - * If the replica is in device id style - validate the device id's, - * if present, in the locator block devid area. - */ - newdev = kmem_zalloc(sizeof (md_dev64_t) * MDDB_NLB, KM_SLEEP); - if (lbp->lb_flags & MDDB_DEVID_STYLE) { - for (li = 0; li < lbp->lb_loccnt; li++) { - newdev[li] = 0; - lp = &lbp->lb_locators[li]; - if (lp->l_flags & MDDB_F_DELETED) - continue; - did_info = &(did_icp->did_ic_blkp->blk_info[li]); - dev = md_expldev(lp->l_dev); - if (did_info->info_flags & MDDB_DID_EXISTS) { - /* Validate device id on current system */ - newdev[li] = dev; - if (mddb_devid_validate( - did_icp->did_ic_devid[li], - &(newdev[li]), - did_info->info_minor_name) == 0) { - /* Set valid flag */ - did_info->info_flags |= MDDB_DID_VALID; - } else { - lp->l_flags |= MDDB_F_EMASTER; - } - } else if (!(MD_UPGRADE)) { - /* - * If a device doesn't have a device id, - * check if there is now a device ID - * associated with device. If one exists, - * add it to the locator block devid area. - * If there's not enough space to add it, - * print a warning. - * Don't do this during upgrade. - */ - dev_t ddi_dev = md_dev64_to_dev(dev); - if (ddi_lyr_get_devid(ddi_dev, &ret_devid) == - DDI_SUCCESS) { - if (ddi_lyr_get_minor_name(ddi_dev, - S_IFBLK, &minor_name) - == DDI_SUCCESS) { - if (mddb_devid_add(s, li, - ret_devid, minor_name)) { - cmn_err(CE_WARN, - "Not enough space" - " in metadevice" - " state" - " database\n"); - cmn_err(CE_WARN, - "to add relocation" - " information for" - " device:\n"); - cmn_err(CE_WARN, - " major = %d, " - " minor = %d\n", - getmajor(ddi_dev), - getminor(ddi_dev)); - } else { - write_lb = 1; - } - kmem_free(minor_name, - strlen(minor_name) + 1); - } - ddi_devid_free(ret_devid); - } - } - } - - /* - * If a device has a valid device id and if the dev_t - * associated with the device id has changed, update the - * driver name, minor num and dev_t in the local and side - * locators to match the dev_t that the system currently - * associates with the device id. - * - * Don't do this during upgrade. - */ - if (!(MD_UPGRADE)) { - for (li = 0; li < lbp->lb_loccnt; li++) { - lp = &lbp->lb_locators[li]; - if (lp->l_flags & MDDB_F_DELETED) - continue; - did_info = &(did_icp->did_ic_blkp->blk_info - [li]); - if ((did_info->info_flags & MDDB_DID_VALID) && - !(did_info->info_flags & - MDDB_DID_UPDATED)) { - if (lbp->lb_flags & MDDB_MNSET) { - int j; - int index = -1; - mnlbp = (mddb_mnlb_t *)lbp; - for (j = 0; j < MD_MNMAXSIDES; - j++) { - mnslp = &mnlbp-> - lb_mnsidelocators[j] - [li]; - if (mnslp->mnl_sideno == - s->s_sideno) - break; - if (mnslp->mnl_sideno == - 0) - index = j; - } - if (j == MD_MNMAXSIDES) { - /* - * No match found; take - * empty - */ - mnslp = &mnlbp-> - lb_mnsidelocators - [index][li]; - write_lb = 1; - mnslp->mnl_mnum = - md_getminor(newdev - [li]); - } else if (mnslp->mnl_mnum != - md_getminor(newdev[li])) { - write_lb = 1; - mnslp->mnl_mnum = - md_getminor(newdev - [li]); - } - } else { - slp = &lbp-> - lb_sidelocators[s->s_sideno] - [li]; - if (slp->l_mnum != - md_getminor(newdev[li])) { - write_lb = 1; - slp->l_mnum = - md_getminor(newdev - [li]); - } - } - name = ddi_major_to_name(md_getmajor( - newdev[li])); - if (lbp->lb_flags & MDDB_MNSET) - i = mnslp->mnl_drvnm_index; - else - i = slp->l_drvnm_index; - if (strncmp(lbp->lb_drvnm[i].dn_data, - name, lbp->lb_drvnm[i].dn_len) != - 0) { - /* Driver name has changed */ - len = strlen(name); - /* Look for the driver name */ - for (i = 0; i < MDDB_DRVNMCNT; - i++) { - if (lbp->lb_drvnm[i]. - dn_len != len) - continue; - if (strncmp(lbp-> - lb_drvnm[i].dn_data, - name, len) == 0) - break; - } - /* Didn't find one, add it */ - if (i == MDDB_DRVNMCNT) { - for (i = 0; i < - MDDB_DRVNMCNT; - i++) { - if (lbp-> - lb_drvnm[i]. - dn_len == 0) - break; - } - if (i == - MDDB_DRVNMCNT) { - cmn_err(CE_WARN, - "Unable to " - " update " - "driver " - " name for " - "dev: " - "major = %d" - ", minor = " - "%d\n", - md_getmajor( - newdev[li]), - md_getminor( - newdev - [li])); - continue; - } - (void) strncpy(lbp-> - lb_drvnm[i].dn_data, - name, MD_MAXDRVNM); - lbp->lb_drvnm[i]. - dn_len = (uchar_t) - strlen(name); - } - /* Fill in the drvnm index */ - if (lbp->lb_flags & - MDDB_MNSET) - mnslp->mnl_drvnm_index = - i; - else - slp->l_drvnm_index = i; - write_lb = 1; - } - did_info->info_flags |= - MDDB_DID_UPDATED; - } - } - } - } - kmem_free(newdev, sizeof (md_dev64_t) * MDDB_NLB); - - /* - * If locator block has been changed by get_mbs_n_lbs, - * by addition of new device id, by updated minor name or - * by updated driver name - write out locator block. - */ - if (write_lb) { - rval = push_lb(s); - (void) upd_med(s, "load_old_replicas(0)"); - if (rval) - goto errout; - } - - /* - * If the tag was moved, allocated, or a BADTAG was seen for some other - * reason, then make sure tags are written to all the replicas. - * Data tags not supported on MN sets. - */ - if (!(md_get_setstatus(setno) & MD_SET_MNSET)) { - if (! (lc = dt_alloc_if_needed(s))) { - for (li = 0; li < lbp->lb_loccnt; li++) { - lp = &lbp->lb_locators[li]; - - if ((! (lp->l_flags & MDDB_F_ACTIVE)) || - (lp->l_flags & MDDB_F_EMASTER)) - continue; - - if (lp->l_flags & MDDB_F_BADTAG) { - lc = 1; - break; - } - } - } - - if (lc) { - md_set_setstatus(setno, MD_SET_TAGDATA); - md_clr_setstatus(setno, MD_SET_BADTAG); - (void) selectreplicas(s, MDDB_SCANALL); - } - } - -errout: - - /* Free extraneous rip components. */ - for (rip = s->s_rip; rip != NULL; rip = rip->ri_next) { - /* Get rid of lbp's and dtp's */ - - if (rip->ri_lbp != lbp) { - if (rip->ri_dtp != (mddb_dt_t *)NULL) { - kmem_free((caddr_t)rip->ri_dtp, MDDB_DT_BYTES); - rip->ri_dtp = (mddb_dt_t *)NULL; - } - - if (rip->ri_devid != (ddi_devid_t)NULL) { - sz = (int)ddi_devid_sizeof(rip->ri_devid); - kmem_free((caddr_t)rip->ri_devid, sz); - rip->ri_devid = (ddi_devid_t)NULL; - } - if (rip->ri_old_devid != (ddi_devid_t)NULL) { - sz = (int)ddi_devid_sizeof(rip->ri_old_devid); - kmem_free((caddr_t)rip->ri_old_devid, sz); - rip->ri_old_devid = (ddi_devid_t)NULL; - } - - if (rip->ri_lbp != (mddb_lb_t *)NULL) { - mddb_devid_icp_free(&rip->ri_did_icp, - rip->ri_lbp); - - kmem_free((caddr_t)rip->ri_lbp, - dbtob(rip->ri_lbp->lb_blkcnt)); - rip->ri_lbp = (mddb_lb_t *)NULL; - } - } - - if (lbp != NULL) { - for (li = 0; li < lbp->lb_loccnt; li++) { - lp = &lbp->lb_locators[li]; - if (lp->l_flags & MDDB_F_DELETED) - continue; - if (rip->ri_dev == md_expldev(lp->l_dev) && - rip->ri_blkno == lp->l_blkno) - break; - } - if (li < lbp->lb_loccnt) - continue; - } - - /* - * Get rid of mbp's: - * if lbp, those out of lb_loccnt bounds - * if !lbp, all of them. - */ - if (rip->ri_mbip) { - md_dev64_t dev64 = md_xlate_targ_2_mini(rip->ri_dev); - if (dev64 != NODEV64) - mddb_devclose(dev64); - - free_mbipp(&rip->ri_mbip); - } - /* - * Turn off MDDB_F_EMASTER flag in a diskset since diskset - * code always ends up calling ridev for all replicas - * before calling load_old_replicas. ridev will reset - * MDDB_F_EMASTER flag if flag was due to unresolved devid. - */ - if (setno != MD_LOCAL_SET) - rip->ri_flags &= ~MDDB_F_EMASTER; - } - return (retval); -} - -/* - * Given the devt from the md.conf info, get the devid for the device. - */ -static void -lookup_db_devid(mddb_cfg_loc_t *cl) -{ - dev_t ldev; - ddi_devid_t devid; - char *minor; - - if (ddi_name_to_major(cl->l_driver) == (major_t)-1) { - cmn_err(CE_NOTE, "mddb: unknown major name '%s'", cl->l_driver); - return; - } - - ldev = makedevice(ddi_name_to_major(cl->l_driver), cl->l_mnum); - if (ddi_lyr_get_devid(ldev, &devid) != DDI_SUCCESS) { - cmn_err(CE_NOTE, "mddb: unable to get devid for '%s', 0x%x", - cl->l_driver, cl->l_mnum); - return; - } - - if (ddi_lyr_get_minor_name(ldev, S_IFBLK, &minor) != DDI_SUCCESS) { - cmn_err(CE_NOTE, "mddb: unable to get minor name 0x%x", - cl->l_mnum); - return; - } - - cl->l_devid_flags = MDDB_DEVID_SPACE | MDDB_DEVID_VALID | MDDB_DEVID_SZ; - cl->l_devid_sz = (int)ddi_devid_sizeof(devid); - cl->l_devid = (uint64_t)(uintptr_t)devid; - (void) strlcpy(cl->l_minor_name, minor, MDDB_MINOR_NAME_MAX); - - kmem_free(minor, strlen(minor) + 1); -} - -/* - * grab driver name, minor, block and devid out of - * strings like "driver:minor:block:devid" - */ -static int -parse_db_loc( - char *str, - mddb_cfg_loc_t *clp -) -{ - char *p, *e; - char *minor_name; - ddi_devid_t ret_devid; - - clp->l_dev = 0; - p = clp->l_driver; - e = p + sizeof (clp->l_driver) - 1; - while ((*str != ':') && (*str != '\0') && (p < e)) - *p++ = *str++; - *p = '\0'; - if (*str++ != ':') - return (-1); - clp->l_mnum = 0; - while (ISNUM(*str)) { - clp->l_mnum *= 10; - clp->l_mnum += *str++ - '0'; - } - if (*str++ != ':') - return (-1); - clp->l_blkno = 0; - while (ISNUM(*str)) { - clp->l_blkno *= 10; - clp->l_blkno += *str++ - '0'; - } - if (*str++ != ':') - return (-1); - - /* - * If the md_devid_destroy flag is set, ignore the device ids. - * This is only to used in a catastrophic failure case. Examples - * would be where the device id of all drives in the system - * (especially the mirror'd root drives) had been changed - * by firmware upgrade or by a patch to an existing disk - * driver. Another example would be in the case of non-unique - * device ids due to a bug. The device id would be valid on - * the system, but would return the wrong dev_t. - */ - if (md_devid_destroy) { - clp->l_devid_flags = 0; - clp->l_devid = (uint64_t)NULL; - clp->l_devid_sz = 0; - clp->l_old_devid = (uint64_t)NULL; - clp->l_old_devid_sz = 0; - clp->l_minor_name[0] = '\0'; - return (0); - } - - if (ddi_devid_str_decode(str, - (ddi_devid_t *)&ret_devid, &minor_name) == DDI_FAILURE) - return (-1); - - clp->l_devid = (uint64_t)(uintptr_t)ret_devid; - clp->l_devid_flags = 0; - clp->l_old_devid = (uint64_t)NULL; - clp->l_old_devid_sz = 0; - - /* If no device id associated with device, just return */ - if ((ddi_devid_t)(uintptr_t)clp->l_devid == (ddi_devid_t)NULL) { - clp->l_devid_sz = 0; - clp->l_minor_name[0] = '\0'; - if (strcmp(str, "id0") == 0 && md_devid_destroy == 0 && - md_keep_repl_state == 0) { - /* - * No devid in md.conf; we're in recovery mode so - * lookup the devid for the device as specified by - * the devt in md.conf. - */ - lookup_db_devid(clp); - } - return (0); - } - - clp->l_devid_flags = MDDB_DEVID_SPACE | MDDB_DEVID_VALID | - MDDB_DEVID_SZ; - clp->l_devid_sz = (int)ddi_devid_sizeof( - (ddi_devid_t)(uintptr_t)clp->l_devid); - (void) strcpy(clp->l_minor_name, minor_name); - kmem_free(minor_name, strlen(minor_name) + 1); - - return (0); -} - -/* - * grab driver name, minor, and block out of - * strings like "driver:minor:block:devid driver:minor:block:devid ..." - */ -static void -parse_db_string( - char *str -) -{ - char *p, *e; - mddb_cfg_loc_t *cl; - char restore_space; - - /* CSTYLED */ - cl = kmem_zalloc(sizeof (mddb_cfg_loc_t), KM_SLEEP); - for (p = str; (*p != '\0'); ) { - for (; ((*p != '\0') && (ISWHITE(*p))); ++p) - ; - if (*p == '\0') - break; - for (e = p; ((*e != '\0') && (! ISWHITE(*e))); ++e) - ; - /* - * Only give parse_db_loc 1 entry, so stuff a null into - * the string if we're not at the end. We need to save this - * char and restore it after call. - */ - restore_space = '\0'; - if (*e != '\0') { - restore_space = *e; - *e = '\0'; - } - if (parse_db_loc(p, cl) != 0) { - cmn_err(CE_NOTE, "mddb: parsing error on '%s'", p); - } else { - (void) ridev( - &((mddb_set_t *)md_set[MD_LOCAL_SET].s_db)->s_rip, - cl, NULL, MDDB_F_PTCHED); - if (cl->l_devid_flags & MDDB_DEVID_SPACE) { - kmem_free((caddr_t)(uintptr_t)cl->l_devid, - cl->l_devid_sz); - } - } - if (restore_space != '\0') { - *e = restore_space; - } - p = e; - } - kmem_free(cl, sizeof (mddb_cfg_loc_t)); -} - -/* - * grab database locations supplied by md.conf as properties - */ -static void -parse_db_strings(void) -{ - int bootlist_id; - int proplen; - /* - * size of _bootlist_name should match uses of line and entry in - * libmeta meta_systemfile_append_mddb routine (meta_systemfile.c) - */ - char _bootlist_name[MDDB_BOOTLIST_MAX_LEN]; - char *bootlist_name; - caddr_t prop; - -/* - * Step through the bootlist properties one at a time by forming the - * correct name, fetching the property, parsing the property and - * then freeing the memory. If a property does not exist or returns - * some form of error just ignore it. There is no guarantee that - * the properties will always exist in sequence, for example - * mddb_bootlist1 may exist and mddb_bootlist2 may not exist with - * mddb_bootlist3 existing. - */ - bootlist_name = &_bootlist_name[0]; - for (bootlist_id = 0; bootlist_id < md_maxbootlist; bootlist_id++) { - - proplen = 0; - (void) sprintf(bootlist_name, "mddb_bootlist%d", bootlist_id); - - if (ddi_getlongprop(DDI_DEV_T_ANY, md_devinfo, - DDI_PROP_CANSLEEP, bootlist_name, (caddr_t)&prop, - &proplen) != DDI_PROP_SUCCESS) - continue; - - if (proplen <= 0) - continue; - - if (md_init_debug) - cmn_err(CE_NOTE, "%s is %s", bootlist_name, prop); - - parse_db_string(prop); - kmem_free(prop, proplen); - } -} - -static int -initit( - set_t setno, - int flag -) -{ - int i; - mddb_set_t *s; - mddb_lb_t *lbp; /* pointer to locator block */ - mddb_ln_t *lnp; /* pointer to locator names */ - mddb_db_t *dbp; /* pointer to directory block */ - mddb_did_blk_t *did_blkp; /* pointer to Device ID block */ - mddb_did_ic_t *did_icp; /* pointer to Device ID incore area */ - mddb_bf_t *bfp; - side_t sideno; - side_t maxsides; - mddb_block_t lb_blkcnt; - int retval = 0; - md_dev64_t dev; - mddb_mnlb_t *mnlbp; - int devid_flag; - - /* single thread's all loads/unloads of set's */ - mutex_enter(&mddb_lock); - mutex_enter(SETMUTEX(setno)); - - if (((mddb_set_t *)md_set[setno].s_db) == NULL) { - mutex_exit(SETMUTEX(setno)); - mutex_exit(&mddb_lock); - return (MDDB_E_NOTNOW); - } - - s = (mddb_set_t *)md_set[setno].s_db; - - single_thread_start(s); - - /* - * init is already underway, block. Return success. - */ - if (s->s_lbp) { - single_thread_end(s); - mutex_exit(SETMUTEX(setno)); - mutex_exit(&mddb_lock); - return (0); - } - - uniqtime32(&s->s_inittime); - - /* grab database locations patched by /etc/system */ - if (setno == MD_LOCAL_SET) - parse_db_strings(); - - s->s_mbiarray = (mddb_mb_ic_t **)kmem_zalloc( - sizeof (mddb_mb_ic_t *) * mddb_maxcopies, KM_SLEEP); - - s->s_zombie = 0; - s->s_staledeletes = 0; - s->s_optcmtcnt = 0; - s->s_opthavelck = 0; - s->s_optwantlck = 0; - s->s_optwaiterr = 0; - s->s_opthungerr = 0; - - /* - * KEEPTAG can never be set for a MN diskset since no tags are - * allowed to be stored in a MN diskset. No way to check - * if this is a MN diskset or not at this point since the mddb - * hasn't been read in from disk yet. (flag will only have - * MUTLINODE bit set if a new set is being created.) - */ - if (! (md_get_setstatus(s->s_setno) & MD_SET_KEEPTAG)) - dt_setup(s, NULL); - - md_clr_setstatus(s->s_setno, MD_SET_TOOFEW); - - for (i = 0; i < mddb_maxbufheaders; i++) { - bfp = (mddb_bf_t *)kmem_zalloc(sizeof (*bfp), KM_SLEEP); - sema_init(&bfp->bf_buf.b_io, 0, NULL, - SEMA_DEFAULT, NULL); - sema_init(&bfp->bf_buf.b_sem, 0, NULL, - SEMA_DEFAULT, NULL); - bfp->bf_buf.b_offset = -1; - freebuffer(s, bfp); - } - - retval = load_old_replicas(s, flag); - /* If 0 return value - success */ - if (! retval) { - single_thread_end(s); - mutex_exit(SETMUTEX(setno)); - mutex_exit(&mddb_lock); - return (0); - } - - /* - * If here, then the load_old_replicas() failed - */ - - - /* If the database was supposed to exist. */ - if (flag & MDDB_MUSTEXIST) { - if (s->s_mbiarray != (mddb_mb_ic_t **)NULL) { - for (i = 0; i < mddb_maxcopies; i++) { - if (! s->s_mbiarray[i]) - continue; - dev = md_expldev( - s->s_lbp->lb_locators[i].l_dev); - dev = md_xlate_targ_2_mini(dev); - if (dev != NODEV64) - mddb_devclose(dev); - - free_mbipp(&s->s_mbiarray[i]); - } - - kmem_free((caddr_t)s->s_mbiarray, - sizeof (mddb_mb_ic_t *) * mddb_maxcopies); - s->s_mbiarray = NULL; - } - - if (s->s_lnp != (mddb_ln_t *)NULL) { - kmem_free((caddr_t)s->s_lnp, - dbtob(s->s_lbp->lb_lnblkcnt)); - s->s_lnp = (mddb_ln_t *)NULL; - } - - mddb_devid_icp_free(&s->s_did_icp, s->s_lbp); - - if (s->s_lbp != (mddb_lb_t *)NULL) { - kmem_free((caddr_t)s->s_lbp, - dbtob(s->s_lbp->lb_blkcnt)); - s->s_lbp = (mddb_lb_t *)NULL; - } - - while ((bfp = allocbuffer(s, MDDB_NOSLEEP)) != NULL) - kmem_free((caddr_t)bfp, sizeof (*bfp)); - - single_thread_end(s); - mutex_exit(SETMUTEX(setno)); - mutex_exit(&mddb_lock); - - if (retval == MDDB_E_TAGDATA) - return (retval); - - /* Want a bit more detailed error messages */ - if (mddb_db_err_detail) - return (retval); - - return (MDDB_E_NODB); - } - - - /* - * MDDB_NOOLDOK set - Creating a new database, so do - * more initialization. - */ - - lb_blkcnt = (mddb_block_t)((setno == MD_LOCAL_SET) ? - MDDB_LOCAL_LBCNT : MDDB_LBCNT); - if (flag & MDDB_MULTINODE) { - lb_blkcnt = MDDB_MNLBCNT; - } - - if (s->s_lbp == NULL) - s->s_lbp = (mddb_lb_t *)kmem_alloc(dbtob(lb_blkcnt), KM_SLEEP); - lbp = s->s_lbp; - - bzero((caddr_t)lbp, dbtob(lb_blkcnt)); - lbp->lb_setno = setno; - lbp->lb_magic = MDDB_MAGIC_LB; - if (flag & MDDB_MULTINODE) { - lbp->lb_revision = MDDB_REV_MNLB; - } else { - lbp->lb_revision = MDDB_REV_LB; - } - lbp->lb_inittime = s->s_inittime; - if (flag & MDDB_MULTINODE) { - mnlbp = (mddb_mnlb_t *)lbp; - for (i = 0; i < MDDB_NLB; i++) { - for (sideno = 0; sideno < MD_MNMAXSIDES; sideno++) { - mddb_mnsidelocator_t *mnslp; - mnslp = &mnlbp->lb_mnsidelocators[sideno][i]; - mnslp->mnl_mnum = NODEV32; - mnslp->mnl_sideno = 0; - mnslp->mnl_drvnm_index = 0; - } - } - } else { - maxsides = ((setno == MD_LOCAL_SET) ? 1 : MD_MAXSIDES); - for (i = 0; i < MDDB_NLB; i++) { - for (sideno = 0; sideno < maxsides; sideno++) { - mddb_sidelocator_t *slp; - slp = &lbp->lb_sidelocators[sideno][i]; - slp->l_mnum = NODEV32; - } - } - } - lbp->lb_blkcnt = lb_blkcnt; - - /* lb starts on block 0 */ - /* locator names starts after locator block */ - lbp->lb_lnfirstblk = lb_blkcnt; - if (flag & MDDB_MULTINODE) { - lbp->lb_lnblkcnt = (mddb_block_t)MDDB_MNLNCNT; - } else { - lbp->lb_lnblkcnt = (mddb_block_t)((setno == MD_LOCAL_SET) ? - MDDB_LOCAL_LNCNT : MDDB_LNCNT); - } - - if (flag & MDDB_MULTINODE) { - /* Creating a multinode diskset */ - md_set_setstatus(setno, MD_SET_MNSET); - lbp->lb_flags |= MDDB_MNSET; - } - - /* Data portion of mddb located after locator names */ - lbp->lb_dbfirstblk = lbp->lb_lnfirstblk + lbp->lb_lnblkcnt; - - /* the btodb that follows is converting the directory block size */ - /* Data tag part of mddb located after first block of mddb data */ - lbp->lb_dtfirstblk = (mddb_block_t)(lbp->lb_dbfirstblk + - btodb(MDDB_BSIZE)); - /* Data tags are not used in MN diskset - so set count to 0 */ - if (flag & MDDB_MULTINODE) - lbp->lb_dtblkcnt = (mddb_block_t)0; - else - lbp->lb_dtblkcnt = (mddb_block_t)MDDB_DT_BLOCKS; - - - lnp = (mddb_ln_t *)kmem_zalloc(dbtob(lbp->lb_lnblkcnt), KM_SLEEP); - lnp->ln_magic = MDDB_MAGIC_LN; - if (flag & MDDB_MULTINODE) { - lnp->ln_revision = MDDB_REV_MNLN; - } else { - lnp->ln_revision = MDDB_REV_LN; - } - s->s_lnp = lnp; - - /* - * Set up Device ID portion of Locator Block. - * Do not set locator to device id style if - * md_devid_destroy is 1 and md_keep_repl_state is 1 - * (destroy all device id data and keep replica in - * non device id mode). - * - * This is logically equivalent to set locator to - * device id style if md_devid_destroy is 0 or - * md_keep_repl_state is 0. - * - * In SunCluster environment, device id mode is disabled - * which means diskset will be run in non-devid mode. For - * localset, the behavior will remain intact and run in - * device id mode. - * - * In multinode diskset devids are turned off. - */ - devid_flag = 1; - if (cluster_bootflags & CLUSTER_CONFIGURED) - if (setno != MD_LOCAL_SET) - devid_flag = 0; - if (flag & MDDB_MULTINODE) - devid_flag = 0; - if ((md_devid_destroy == 1) && (md_keep_repl_state == 1)) - devid_flag = 0; - /* - * if we weren't devid style before and md_keep_repl_state=1 - * we need to stay non-devid - */ - if (((lbp->lb_flags & MDDB_DEVID_STYLE) == 0) && - (md_keep_repl_state == 1)) - devid_flag = 0; - if (devid_flag) { - lbp->lb_didfirstblk = lbp->lb_dtfirstblk + - lbp->lb_dtblkcnt; - lbp->lb_didblkcnt = (mddb_block_t)MDDB_DID_BLOCKS; - lbp->lb_flags |= MDDB_DEVID_STYLE; - - did_icp = (mddb_did_ic_t *)kmem_zalloc - (sizeof (mddb_did_ic_t), KM_SLEEP); - did_blkp = (mddb_did_blk_t *) - kmem_zalloc(dbtob(lbp->lb_didblkcnt), KM_SLEEP); - did_blkp->blk_magic = MDDB_MAGIC_DI; - did_blkp->blk_revision = MDDB_REV_DI; - did_icp->did_ic_blkp = did_blkp; - s->s_did_icp = did_icp; - } - - setidentifier(s, &lbp->lb_ident); - uniqtime32(&lbp->lb_timestamp); - dbp = (mddb_db_t *)kmem_zalloc(sizeof (mddb_db_t), KM_SLEEP); - dbp->db_magic = MDDB_MAGIC_DB; - dbp->db_revision = MDDB_REV_DB; - uniqtime32(&dbp->db_timestamp); - dbp->db_nextblk = 0; - dbp->db_firstentry = NULL; - dbp->db_blknum = lbp->lb_dbfirstblk; - dbp->db_recsum = MDDB_GLOBAL_XOR; - s->s_dbp = dbp; - single_thread_end(s); - mutex_exit(SETMUTEX(setno)); - mutex_exit(&mddb_lock); - return (0); -} - -mddb_set_t * -mddb_setenter( - set_t setno, - int flag, - int *errorcodep -) -{ - mddb_set_t *s; - int err = 0; - size_t sz = sizeof (void *) * MD_MAXUNITS; - - mutex_enter(SETMUTEX(setno)); - if (! md_set[setno].s_db) { - mutex_exit(SETMUTEX(setno)); - if (errorcodep != NULL) - *errorcodep = MDDB_E_NOTOWNER; - return (NULL); - } - - /* Allocate s_un and s_ui arrays if not already present. */ - if (md_set[setno].s_un == NULL) { - md_set[setno].s_un = kmem_zalloc(sz, KM_NOSLEEP); - if (md_set[setno].s_un == NULL) { - mutex_exit(SETMUTEX(setno)); - if (errorcodep != NULL) - *errorcodep = MDDB_E_NOTOWNER; - return (NULL); - } - } - if (md_set[setno].s_ui == NULL) { - md_set[setno].s_ui = kmem_zalloc(sz, KM_NOSLEEP); - if (md_set[setno].s_ui == NULL) { - mutex_exit(&md_set[setno].s_dbmx); - kmem_free(md_set[setno].s_un, sz); - md_set[setno].s_un = NULL; - if (errorcodep != NULL) - *errorcodep = MDDB_E_NOTOWNER; - return (NULL); - } - } - s = (mddb_set_t *)md_set[setno].s_db; - if (s->s_lbp) - return (s); - - if (flag & MDDB_NOINIT) - return (s); - - /* - * Release the set mutex - it will be acquired and released in - * initit after acquiring the mddb_lock. This is done to assure - * that mutexes are always acquired in the same order to prevent - * possible deadlock - */ - mutex_exit(SETMUTEX(setno)); - - if ((err = initit(setno, flag)) != 0) { - if (errorcodep != NULL) - *errorcodep = err; - return (NULL); - } - - mutex_enter(SETMUTEX(setno)); - return ((mddb_set_t *)md_set[setno].s_db); -} - -/* - * Release the set lock for a given set. - * - * In a MN diskset, this routine may send messages to the rpc.mdcommd - * in order to have the slave nodes re-parse parts of the mddb. - * Messages are only sent if the global ioctl lock is not held. - * - * With the introduction of multi-threaded ioctls, there is no way - * to determine which thread(s) are holding the ioctl lock. So, if - * the ioctl lock is held (by process X) process X will send the - * messages to the slave nodes when process X releases the ioctl lock. - */ -void -mddb_setexit( - mddb_set_t *s -) -{ - md_mn_msg_mddb_parse_t *mddb_parse_msg; - md_mn_kresult_t *kresult; - mddb_lb_t *lbp = s->s_lbp; - int i; - int rval = 1; - - /* - * If not a MN diskset OR - * a MN diskset but this node isn't master, - * then release the mutex. - */ - if (!(MD_MNSET_SETNO(s->s_setno)) || - ((MD_MNSET_SETNO(s->s_setno)) && - (!md_set[s->s_setno].s_am_i_master))) { - mutex_exit(SETMUTEX(s->s_setno)); - return; - } - - /* - * If global ioctl lock is held, then send no messages, - * just release mutex and return. - * - */ - if (md_status & MD_GBL_IOCTL_LOCK) { - mutex_exit(SETMUTEX(s->s_setno)); - return; - } - - /* - * This thread is not holding the ioctl lock, so drop the set - * lock, send messages to slave nodes to reparse portions - * of the mddb and return. - * - * If the block parse flag is set, do not send parse messages. - * This flag is set when master is adding a new mddb that would - * cause parse messages to be sent to the slaves, but the slaves - * don't have knowledge of the new mddb yet since the mddb add - * operation hasn't been run on the slave nodes yet. When the - * master unblocks the parse flag, the parse messages will be - * generated. - * - * If s_mn_parseflags_sending is non-zero, then another thread - * is already currently sending a parse message, so just release - * the mutex and return. If an mddb change occurred that results - * in a parse message to be generated, the thread that is currently - * sending a parse message would generate the additional parse message. - * - * If s_mn_parseflags_sending is zero and parsing is not blocked, - * then loop until s_mn_parseflags is 0 (until there are no more - * messages to send). - * While s_mn_parseflags is non-zero, - * put snapshot of parse_flags in s_mn_parseflags_sending - * set s_mn_parseflags to zero - * release mutex - * send message - * re-grab mutex - * set s_mn_parseflags_sending to zero - */ - mddb_parse_msg = kmem_zalloc(sizeof (md_mn_msg_mddb_parse_t), KM_SLEEP); - while (((s->s_mn_parseflags_sending & MDDB_PARSE_MASK) == 0) && - (s->s_mn_parseflags & MDDB_PARSE_MASK) && - (!(md_get_setstatus(s->s_setno) & MD_SET_MNPARSE_BLK))) { - /* Grab snapshot of parse flags */ - s->s_mn_parseflags_sending = s->s_mn_parseflags; - s->s_mn_parseflags = 0; - - mutex_exit(SETMUTEX(s->s_setno)); - - /* - * Send the message to the slaves to re-parse - * the indicated portions of the mddb. Send the status - * of the 50 mddbs in this set so that slaves know which - * mddbs that the master node thinks are 'good'. - * Otherwise, slave may reparse, but from wrong replica. - */ - mddb_parse_msg->msg_parse_flags = s->s_mn_parseflags_sending; - for (i = 0; i < MDDB_NLB; i++) { - mddb_parse_msg->msg_lb_flags[i] = - lbp->lb_locators[i].l_flags; - } - kresult = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP); - while (rval != 0) { - rval = mdmn_ksend_message(s->s_setno, - MD_MN_MSG_MDDB_PARSE, 0, 0, - (char *)mddb_parse_msg, - sizeof (md_mn_msg_mddb_parse_t), kresult); - if (rval != 0) - cmn_err(CE_WARN, "mddb_setexit: Unable to send " - "mddb update message to other nodes in " - "diskset %s\n", s->s_setname); - } - kmem_free(kresult, sizeof (md_mn_kresult_t)); - - /* - * Re-grab mutex to clear sending field and to - * see if another parse message needs to be generated. - */ - mutex_enter(SETMUTEX(s->s_setno)); - s->s_mn_parseflags_sending = 0; - } - kmem_free(mddb_parse_msg, sizeof (md_mn_msg_mddb_parse_t)); - mutex_exit(SETMUTEX(s->s_setno)); -} - -static void -mddb_setexit_no_parse( - mddb_set_t *s -) -{ - mutex_exit(SETMUTEX(s->s_setno)); -} - -uint_t -mddb_lb_did_convert(mddb_set_t *s, uint_t doit, uint_t *blk_cnt) -{ - uint_t li; - mddb_lb_t *lbp = s->s_lbp; - mddb_locator_t *lp; - ddi_devid_t ret_devid; - uint_t devid_len; - dev_t ddi_dev; - mddb_did_ic_t *did_icp; - mddb_did_blk_t *did_blkp; - char *minor_name; - size_t sz; - int retval; - int err; - md_dev64_t dev64; /* tmp var to make code look better */ - - - /* Need disk block(s) to hold mddb_did_blk_t */ - *blk_cnt = MDDB_DID_BLOCKS; - - if (doit) { - /* - * Alloc mddb_did_blk_t disk block and fill in header area. - * Don't fill in did magic number until end of routine so - * if machine panics in the middle of conversion, the - * device id information will be thrown away at the - * next snarfing of this set. - * Need to set DEVID_STYLE so that mddb_devid_add will - * function properly. - */ - /* grab the mutex */ - if ((mddb_setenter(s->s_setno, MDDB_NOINIT, &err)) == NULL) { - return (1); - } - single_thread_start(s); - lbp->lb_didfirstblk = getfreeblks(s, MDDB_DID_BLOCKS); - if (lbp->lb_didfirstblk == 0) { - single_thread_end(s); - mddb_setexit(s); - return (1); - } - lbp->lb_didblkcnt = (mddb_block_t)MDDB_DID_BLOCKS; - did_icp = (mddb_did_ic_t *)kmem_zalloc(sizeof (mddb_did_ic_t), - KM_SLEEP); - did_blkp = (mddb_did_blk_t *)kmem_zalloc(MDDB_DID_BYTES, - KM_SLEEP); - - did_blkp->blk_revision = MDDB_REV_DI; - did_icp->did_ic_blkp = did_blkp; - s->s_did_icp = did_icp; - lbp->lb_flags |= MDDB_DEVID_STYLE; - } - - /* Fill in information in mddb_did_info_t array */ - for (li = 0; li < lbp->lb_loccnt; li++) { - lp = &lbp->lb_locators[li]; - if (lp->l_flags & MDDB_F_DELETED) - continue; - - dev64 = md_xlate_targ_2_mini(md_expldev(lp->l_dev)); - ddi_dev = md_dev64_to_dev(dev64); - if (ddi_dev == NODEV) { - /* - * No translation available for replica. - * Could fail conversion to device id replica, - * but instead will just continue with next - * replica in list. - */ - continue; - } - if (ddi_lyr_get_devid(ddi_dev, &ret_devid) == DDI_SUCCESS) { - /* - * Just count each devid as at least 1 block. This - * is conservative since several device id's may fit - * into 1 disk block, but it's better to overestimate - * the number of blocks needed than to underestimate. - */ - devid_len = (int)ddi_devid_sizeof(ret_devid); - *blk_cnt += btodb(devid_len + (MDDB_BSIZE - 1)); - if (doit) { - if (ddi_lyr_get_minor_name(ddi_dev, S_IFBLK, - &minor_name) == DDI_SUCCESS) { - if (mddb_devid_add(s, li, ret_devid, - minor_name)) { - cmn_err(CE_WARN, - "Not enough space in metadb" - " to add device id for" - " dev: major = %d, " - "minor = %d\n", - getmajor(ddi_dev), - getminor(ddi_dev)); - } - sz = strlen(minor_name) + 1; - kmem_free(minor_name, sz); - } - } - ddi_devid_free(ret_devid); - } - } - - if (doit) { - did_blkp->blk_magic = MDDB_MAGIC_DI; - retval = push_lb(s); - (void) upd_med(s, "mddb_lb_did_convert(0)"); - single_thread_end(s); - mddb_setexit(s); - if (retval != 0) - return (1); - } - - return (0); -} - -static mddb_set_t * -init_set( - mddb_config_t *cp, - int flag, - int *errp -) -{ - mddb_set_t *s; - char *setname = NULL; - set_t setno = MD_LOCAL_SET; - side_t sideno = 0; - struct timeval32 *created = NULL; - - if (cp != NULL) { - setname = cp->c_setname; - setno = cp->c_setno; - sideno = cp->c_sideno; - created = &cp->c_timestamp; - } - - if (setno >= MD_MAXSETS) - return ((mddb_set_t *)NULL); - - if (md_set[setno].s_db) - return (mddb_setenter(setno, flag, errp)); - - s = (mddb_set_t *)kmem_zalloc(sizeof (*s), KM_SLEEP); - - cv_init(&s->s_buf_cv, NULL, CV_DEFAULT, NULL); - cv_init(&s->s_single_thread_cv, NULL, CV_DEFAULT, NULL); - cv_init(&s->s_optqueuing_cv, NULL, CV_DEFAULT, NULL); - cv_init(&s->s_opthungerr_cv, NULL, CV_DEFAULT, NULL); - cv_init(&s->s_optwantlck_cv, NULL, CV_DEFAULT, NULL); - - s->s_setno = setno; - s->s_sideno = sideno; - if (setno == MD_LOCAL_SET) { - (void) snprintf(s->s_ident.serial, sizeof (s->s_ident.serial), - "%u", zone_get_hostid(NULL)); - } else { - s->s_ident.createtime = *created; - s->s_setname = (char *)kmem_alloc(strlen(setname) + 1, - KM_SLEEP); - (void) strcpy(s->s_setname, setname); - } - - /* have a config struct, copy mediator information */ - if (cp != NULL) - s->s_med = cp->c_med; /* structure assignment */ - - md_set[setno].s_db = (void *) s; - - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_TAKEOVER, SVM_TAG_SET, setno, NODEV64); - - return (mddb_setenter(setno, flag, errp)); -} - -void -mddb_unload_set( - set_t setno -) -{ - - mddb_set_t *s; - mddb_db_t *dbp, *adbp = NULL; - mddb_de_ic_t *dep, *dep2; - mddb_bf_t *bfp; - int i; - md_dev64_t dev; - - if ((s = mddb_setenter(setno, MDDB_NOINIT, NULL)) == NULL) - return; - - single_thread_start(s); - - s->s_opthavequeuinglck = 0; - s->s_optwantqueuinglck = 0; - - for (dbp = s->s_dbp; dbp != 0; dbp = adbp) { - for (dep = dbp->db_firstentry; dep != NULL; dep = dep2) { - if (dep->de_rb_userdata != NULL) { - if (dep->de_icreqsize) - kmem_free(dep->de_rb_userdata_ic, - dep->de_icreqsize); - else - kmem_free(dep->de_rb_userdata, - dep->de_reqsize); - } - kmem_free((caddr_t)dep->de_rb, dep->de_recsize); - dep2 = dep->de_next; - kmem_free((caddr_t)dep, sizeofde(dep)); - } - adbp = dbp->db_next; - kmem_free((caddr_t)dbp, sizeof (mddb_db_t)); - } - s->s_dbp = (mddb_db_t *)NULL; - - free_rip(&s->s_rip); - - for (i = 0; i < mddb_maxcopies; i++) { - if (! s->s_mbiarray) - break; - - if (! s->s_mbiarray[i]) - continue; - - dev = md_expldev(s->s_lbp->lb_locators[i].l_dev); - dev = md_xlate_targ_2_mini(dev); - if (dev != NODEV64) - mddb_devclose(dev); - - free_mbipp(&s->s_mbiarray[i]); - } - - if (s->s_mbiarray) { - kmem_free((caddr_t)s->s_mbiarray, - sizeof (mddb_mb_ic_t *) * mddb_maxcopies); - s->s_mbiarray = (mddb_mb_ic_t **)NULL; - } - - if (s->s_lnp) { - kmem_free((caddr_t)s->s_lnp, dbtob(s->s_lbp->lb_lnblkcnt)); - s->s_lnp = (mddb_ln_t *)NULL; - } - - if (s->s_lbp) { - mddb_devid_icp_free(&s->s_did_icp, s->s_lbp); - kmem_free((caddr_t)s->s_lbp, dbtob(s->s_lbp->lb_blkcnt)); - s->s_lbp = (mddb_lb_t *)NULL; - } - - if (s->s_freebitmap) { - kmem_free((caddr_t)s->s_freebitmap, s->s_freebitmapsize); - s->s_freebitmap = NULL; - s->s_freebitmapsize = 0; - } - - while ((bfp = allocbuffer(s, MDDB_NOSLEEP)) != NULL) - kmem_free((caddr_t)bfp, sizeof (*bfp)); - - if (s->s_databuffer_size) { - kmem_free(s->s_databuffer, s->s_databuffer_size); - s->s_databuffer_size = 0; - } - - if (s->s_setname != NULL) - kmem_free((caddr_t)s->s_setname, strlen(s->s_setname)+1); - - /* Data tags not supported on MN sets. */ - if (!(md_get_setstatus(setno) & MD_SET_MNSET)) - dtl_freel(&s->s_dtlp); - - md_set[setno].s_db = NULL; - ASSERT(s->s_singlelockwanted == 0); - kmem_free(s, sizeof (mddb_set_t)); - - /* Take care of things setup in the md_set array */ - if (! (md_get_setstatus(setno) & MD_SET_KEEPTAG)) { - if (md_set[setno].s_dtp) { - kmem_free((caddr_t)md_set[setno].s_dtp, MDDB_DT_BYTES); - md_set[setno].s_dtp = NULL; - } - } - - md_clr_setstatus(setno, MD_SET_ACCOK | MD_SET_ACCEPT | - MD_SET_TAGDATA | MD_SET_USETAG | MD_SET_TOOFEW | MD_SET_STALE | - MD_SET_OWNERSHIP | MD_SET_BADTAG | MD_SET_CLRTAG | MD_SET_MNSET | - MD_SET_DIDCLUP | MD_SET_MNPARSE_BLK | MD_SET_MN_MIR_STATE_RC | - MD_SET_IMPORT | MD_SET_REPLICATED_IMPORT); - - mutex_exit(SETMUTEX(setno)); -} - -/* - * returns 0 if name can be put into locator block - * returns 1 if locator block prefixes are all used - * - * Takes splitname (suffix, prefix, sideno) and - * stores it in the locator name structure. - * For traditional diskset, the sideno is the index into the suffixes - * array in the locator name structure. - * For the MN diskset, the sideno is the nodeid which can be any number, - * so the index passed in is the index into the mnsuffixes array - * in the locator structure. This index was computed by the - * routine checklocator which basically checked the locator block - * mnside locator structure. - */ -static int -splitname2locatorblock( - md_splitname *spn, - mddb_ln_t *lnp, - int li, - side_t sideno, - int index -) -{ - uchar_t i; - md_name_suffix *sn; - md_mnname_suffix_t *mnsn; - mddb_mnln_t *mnlnp; - - for (i = 0; i < MDDB_PREFIXCNT; i++) { - if (lnp->ln_prefixes[i].pre_len != SPN_PREFIX(spn).pre_len) - continue; - if (bcmp(lnp->ln_prefixes[i].pre_data, SPN_PREFIX(spn).pre_data, - SPN_PREFIX(spn).pre_len) == 0) - break; - } - if (i == MDDB_PREFIXCNT) { - for (i = 0; i < MDDB_PREFIXCNT; i++) { - if (lnp->ln_prefixes[i].pre_len == 0) - break; - } - if (i == MDDB_PREFIXCNT) - return (1); - bcopy(SPN_PREFIX(spn).pre_data, lnp->ln_prefixes[i].pre_data, - SPN_PREFIX(spn).pre_len); - lnp->ln_prefixes[i].pre_len = SPN_PREFIX(spn).pre_len; - } - - if (lnp->ln_revision == MDDB_REV_MNLN) { - /* If a MN diskset, use index */ - mnlnp = (mddb_mnln_t *)lnp; - mnsn = &mnlnp->ln_mnsuffixes[index][li]; - mnsn->mn_ln_sideno = sideno; - mnsn->mn_ln_suffix.suf_len = SPN_SUFFIX(spn).suf_len; - mnsn->mn_ln_suffix.suf_prefix = i; - bcopy(SPN_SUFFIX(spn).suf_data, - mnsn->mn_ln_suffix.suf_data, SPN_SUFFIX(spn).suf_len); - } else { - sn = &lnp->ln_suffixes[sideno][li]; - sn->suf_len = SPN_SUFFIX(spn).suf_len; - sn->suf_prefix = i; - bcopy(SPN_SUFFIX(spn).suf_data, sn->suf_data, - SPN_SUFFIX(spn).suf_len); - } - return (0); -} - -/* - * Find the locator name for the given sideno and convert the locator name - * information into a splitname structure. - */ -void -mddb_locatorblock2splitname( - mddb_ln_t *lnp, - int li, - side_t sideno, - md_splitname *spn -) -{ - int iprefix; - md_name_suffix *sn; - md_mnname_suffix_t *mnsn; - int i; - mddb_mnln_t *mnlnp; - - if (lnp->ln_revision == MDDB_REV_MNLN) { - mnlnp = (mddb_mnln_t *)lnp; - for (i = 0; i < MD_MNMAXSIDES; i++) { - mnsn = &mnlnp->ln_mnsuffixes[i][li]; - if (mnsn->mn_ln_sideno == sideno) - break; - } - if (i == MD_MNMAXSIDES) - return; - - SPN_SUFFIX(spn).suf_len = mnsn->mn_ln_suffix.suf_len; - bcopy(mnsn->mn_ln_suffix.suf_data, SPN_SUFFIX(spn).suf_data, - SPN_SUFFIX(spn).suf_len); - iprefix = mnsn->mn_ln_suffix.suf_prefix; - } else { - sn = &lnp->ln_suffixes[sideno][li]; - SPN_SUFFIX(spn).suf_len = sn->suf_len; - bcopy(sn->suf_data, SPN_SUFFIX(spn).suf_data, - SPN_SUFFIX(spn).suf_len); - iprefix = sn->suf_prefix; - } - SPN_PREFIX(spn).pre_len = lnp->ln_prefixes[iprefix].pre_len; - bcopy(lnp->ln_prefixes[iprefix].pre_data, SPN_PREFIX(spn).pre_data, - SPN_PREFIX(spn).pre_len); -} - -static int -getdeldev( - mddb_config_t *cp, - int command, - md_error_t *ep -) -{ - mddb_set_t *s; - mddb_lb_t *lbp; - mddb_locator_t *locators; - uint_t loccnt; - mddb_mb_ic_t *mbip; - mddb_block_t blk; - int err = 0; - int i, j; - int li; - uint_t commitcnt; - set_t setno = cp->c_setno; - uint_t set_status; - md_dev64_t dev; - int flags = MDDB_MUSTEXIST; - mddb_ri_t *rip; - - cp->c_dbmax = MDDB_NLB; - - /* - * Data checking - */ - if (setno >= md_nsets || cp->c_id < 0 || - cp->c_id > cp->c_dbmax) { - return (mdmderror(ep, MDE_INVAL_UNIT, MD_ADM_MINOR)); - } - - if (cp->c_flags & MDDB_C_STALE) - flags |= MDDB_MN_STALE; - - if ((s = mddb_setenter(setno, flags, &err)) == NULL) - return (mddbstatus2error(ep, err, NODEV32, setno)); - - cp->c_flags = 0; - - lbp = s->s_lbp; - loccnt = lbp->lb_loccnt; - locators = lbp->lb_locators; - - /* shorthand */ - set_status = md_get_setstatus(setno); - - if (set_status & MD_SET_STALE) - cp->c_flags |= MDDB_C_STALE; - - if (set_status & MD_SET_TOOFEW) - cp->c_flags |= MDDB_C_TOOFEW; - - cp->c_sideno = s->s_sideno; - - cp->c_dbcnt = 0; - /* - * go through and count active entries - */ - for (i = 0; i < loccnt; i++) { - if (locators[i].l_flags & MDDB_F_DELETED) - continue; - cp->c_dbcnt++; - } - - /* - * add the ability to accept a locator block index - * which is not relative to previously deleted replicas. This - * is for support of MD_DEBUG=STAT in metastat since it asks for - * replica information specifically for each of the mirror resync - * records. MDDB_CONFIG_SUBCMD uses one of the pad spares in - * the mddb_config_t type. - */ - if (cp->c_subcmd == MDDB_CONFIG_ABS) { - if (cp->c_id < 0 || cp->c_id > cp->c_dbmax) { - mddb_setexit(s); - return (mdmddberror(ep, MDE_DB_INVALID, NODEV32, - setno)); - } - li = cp->c_id; - } else { - if (cp->c_id >= cp->c_dbcnt) { - mddb_setexit(s); - return (mdmddberror(ep, MDE_DB_INVALID, NODEV32, - setno)); - } - - /* CSTYLED */ - for (li = 0, j = 0; /* void */; li++) { - if (locators[li].l_flags & MDDB_F_DELETED) - continue; - j++; - if (j > cp->c_id) - break; - } - } - - if (command == MDDB_ENDDEV) { - daddr_t ib = 0, jb; - - blk = 0; - if ((s != NULL) && s->s_mbiarray[li]) { - mbip = s->s_mbiarray[li]; - while ((jb = getphysblk(blk++, mbip)) > 0) { - if (jb > ib) - ib = jb; - } - cp->c_dbend = (int)ib; - } else { - cp->c_dbend = 0; - } - } - - locator2cfgloc(lbp, &cp->c_locator, li, s->s_sideno, s->s_did_icp); - mddb_locatorblock2splitname(s->s_lnp, li, s->s_sideno, &cp->c_devname); - - if (command != MDDB_DELDEV) { - mddb_setexit(s); - return (0); - } - - /* Currently don't allow addition/deletion of sides during upgrade */ - if (MD_UPGRADE) { - cmn_err(CE_WARN, - "Deletion of replica not allowed during upgrade.\n"); - mddb_setexit(s); - return (mdmddberror(ep, MDE_DB_NOTNOW, NODEV32, setno)); - } - - /* - * If here, replica delete in progress. - */ - single_thread_start(s); - - if ((! (locators[li].l_flags & MDDB_F_EMASTER)) && - (locators[li].l_flags & MDDB_F_ACTIVE)) { - commitcnt = lbp->lb_commitcnt; - lbp->lb_commitcnt = 0; - setidentifier(s, &lbp->lb_ident); - crcgen(lbp, &lbp->lb_checksum, dbtob(lbp->lb_blkcnt), NULL); - /* - * Don't need to write out device id area, since locator - * block on this replica is being deleted by setting the - * commitcnt to 0. - */ - (void) writeblks(s, (caddr_t)lbp, 0, lbp->lb_blkcnt, li, - MDDB_WR_ONLY_MASTER); - lbp->lb_commitcnt = commitcnt; - } - - if (s->s_mbiarray[li]) { - /* A freed mbi pointer still exists in the mddb_ri_t */ - for (rip = s->s_rip; rip != NULL; rip = rip->ri_next) { - if (rip->ri_mbip == s->s_mbiarray[li]) - rip->ri_mbip = NULL; - } - free_mbipp(&s->s_mbiarray[li]); - } - - if (! (locators[li].l_flags & MDDB_F_EMASTER)) { - dev = md_expldev(locators[li].l_dev); - dev = md_xlate_targ_2_mini(dev); - if (dev != NODEV64) - mddb_devclose(dev); - } - - s->s_mbiarray[li] = 0; - lbp->lb_locators[li].l_flags = MDDB_F_DELETED; - - /* Only support data tags for traditional and local sets */ - if ((md_get_setstatus(setno) & MD_SET_STALE) && - (!(lbp->lb_flags & MDDB_MNSET)) && - setno != MD_LOCAL_SET) - if (set_dtag(s, ep)) - mdclrerror(ep); - - /* Write data tags to all accessible devices */ - /* Only support data tags for traditional and local sets */ - if (!(lbp->lb_flags & MDDB_MNSET)) { - (void) dt_write(s); - } - - /* Delete device id of deleted replica */ - if (lbp->lb_flags & MDDB_DEVID_STYLE) { - (void) mddb_devid_delete(s, li); - } - /* write new locator to all devices */ - err = writelocall(s); - - (void) upd_med(s, "getdeldev(0)"); - - SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DELETE, SVM_TAG_REPLICA, setno, - md_expldev(locators[li].l_dev)); - - computefreeblks(s); /* recompute always it may be larger */ - cp->c_dbcnt--; - err |= fixoptrecords(s); - if (err) { - if (writeretry(s)) { - single_thread_end(s); - mddb_setexit(s); - return (mdmddberror(ep, MDDB_E_NOTNOW, NODEV32, setno)); - } - } - - single_thread_end(s); - mddb_setexit(s); - return (0); -} - -static int -getdriver( - mddb_cfg_loc_t *clp -) -{ - major_t majordev; - - /* - * Data checking - */ - if (clp->l_dev <= 0) - return (EINVAL); - - majordev = getmajor(expldev(clp->l_dev)); - - if (ddi_major_to_name(majordev) == (char *)NULL) - return (EINVAL); - - if (MD_UPGRADE) - (void) strcpy(clp->l_driver, md_targ_major_to_name(majordev)); - else - (void) strcpy(clp->l_driver, ddi_major_to_name(majordev)); - return (0); -} - -/* - * update_valid_replica - updates the locator block namespace (prefix - * and/or suffix) with new pathname and devname. - * RETURN - * 1 Error - * 0 Success - */ -static int -update_valid_replica( - side_t side, - mddb_locator_t *lp, - mddb_set_t *s, - int li, - char *devname, - char *pathname, - md_dev64_t devt -) -{ - uchar_t pre_len, suf_len; - md_name_suffix *sn; - mddb_ln_t *lnp; - uchar_t pre_index; - uchar_t i; - - if (md_expldev(lp->l_dev) != devt) { - return (0); - } - - if (pathname[strlen(pathname) - 1] == '/') - pathname[strlen(pathname) - 1] = '\0'; - - pre_len = (uchar_t)strlen(pathname); - suf_len = (uchar_t)strlen(devname); - - if ((pre_len > MD_MAXPREFIX) || (suf_len > MD_MAXSUFFIX)) - return (1); - - lnp = s->s_lnp; - - /* - * Future note: Need to do something here for the MN diskset case - * when device ids are supported in disksets. - * Can't add until merging devids_in_diskset code into code base - * Currently only called with side of 0. - */ - - sn = &lnp->ln_suffixes[side][li]; - - /* - * Check if prefix (Ex: /dev/dsk) needs to be changed. - * If new prefix is the same as the previous prefix - no change. - * - * If new prefix is not the same, check if new prefix - * matches an existing one. If so, use that one. - * - * If new prefix doesn't exist, add a new prefix. If not enough - * space, return failure. - */ - pre_index = sn->suf_prefix; - /* Check if new prefix is the same as the old prefix. */ - if ((lnp->ln_prefixes[pre_index].pre_len != pre_len) || - (bcmp(lnp->ln_prefixes[pre_index].pre_data, pathname, - pre_len) != 0)) { - /* Check if new prefix is an already known prefix. */ - for (i = 0; i < MDDB_PREFIXCNT; i++) { - if (lnp->ln_prefixes[i].pre_len != pre_len) { - continue; - } - if (bcmp(lnp->ln_prefixes[i].pre_data, pathname, - pre_len) == 0) { - break; - } - } - /* If no match found for new prefix - add the new prefix */ - if (i == MDDB_PREFIXCNT) { - for (i = 0; i < MDDB_PREFIXCNT; i++) { - if (lnp->ln_prefixes[i].pre_len == 0) - break; - } - /* No space to add new prefix - return failure */ - if (i == MDDB_PREFIXCNT) { - return (1); - } - bcopy(pathname, lnp->ln_prefixes[i].pre_data, pre_len); - lnp->ln_prefixes[i].pre_len = pre_len; - } - sn->suf_prefix = i; - } - - /* Now, update the suffix (Ex: c0t0d0s0) if needed */ - if ((sn->suf_len != suf_len) || - (bcmp(sn->suf_data, devname, suf_len) != 0)) { - bcopy(devname, sn->suf_data, suf_len); - sn->suf_len = suf_len; - } - return (0); -} - - -/* - * md_update_locator_namespace - If in devid style and active and the devid's - * exist and are valid update the locator namespace pathname - * and devname. - * RETURN - * 1 Error - * 0 Success - */ -int -md_update_locator_namespace( - set_t setno, /* which set to get name from */ - side_t side, - char *dname, - char *pname, - md_dev64_t devt -) -{ - mddb_set_t *s; - mddb_lb_t *lbp; - int li; - uint_t flg; - int err = 0; - mddb_ln_t *lnp; - - if ((s = mddb_setenter(setno, MDDB_MUSTEXIST, &err)) == NULL) - return (1); - single_thread_start(s); - lbp = s->s_lbp; - /* must be DEVID_STYLE */ - if (lbp->lb_flags & MDDB_DEVID_STYLE) { - for (li = 0; li < lbp->lb_loccnt; li++) { - mddb_locator_t *lp = &lbp->lb_locators[li]; - - if (lp->l_flags & MDDB_F_DELETED) { - continue; - } - - /* replica also must be active */ - if (lp->l_flags & MDDB_F_ACTIVE) { - flg = s->s_did_icp->did_ic_blkp-> - blk_info[li].info_flags; - /* only update if did exists and is valid */ - if ((flg & MDDB_DID_EXISTS) && - (flg & MDDB_DID_VALID)) { - if (update_valid_replica(side, lp, s, - li, dname, pname, devt)) { - err = 1; - goto out; - } - } - } - } - } - lnp = s->s_lnp; - uniqtime32(&lnp->ln_timestamp); - if (lbp->lb_flags & MDDB_MNSET) - lnp->ln_revision = MDDB_REV_MNLN; - else - lnp->ln_revision = MDDB_REV_LN; - crcgen(lnp, &lnp->ln_checksum, dbtob(lbp->lb_lnblkcnt), NULL); - err = writeall(s, (caddr_t)lnp, lbp->lb_lnfirstblk, - lbp->lb_lnblkcnt, 0); - /* - * If a MN diskset and this is the master, set the PARSE_LOCNM - * flag in the mddb_set structure to show that the locator - * names have changed. - */ - - if ((lbp->lb_flags & MDDB_MNSET) && - (md_set[s->s_setno].s_am_i_master)) { - s->s_mn_parseflags |= MDDB_PARSE_LOCNM; - } -out: - single_thread_end(s); - mddb_setexit(s); - if (err) - return (1); - return (0); -} - -/* - * update_locatorblock - for active entries in the locator block, check - * the devt to see if it matches the given devt. If so, and - * there is an associated device id which is not the same - * as the passed in devid, delete old devid and add a new one. - * - * During import of replicated disksets, old_didptr contains - * the original disk's device id. Use this device id in - * addition to the devt to determine if an entry is a match - * and should be updated with the new device id of the - * replicated disk. Specifically, this is the case being handled: - * - * Original_disk Replicated_disk Disk_Available_During_Import - * c1t1d0 c1t3d0 no - so old name c1t1d0 shown - * c1t2d0 c1t1d0 yes - name is c1t1d0 - * c1t3d0 c1t2d0 yes - name is c1t2d0 - * - * Can't just match on devt since devt for the first and third - * disks will be the same, but the original disk's device id - * is known and can be used to distinguish which disk's - * replicated device id should be updated. - * RETURN - * MDDB_E_NODEVID - * MDDB_E_NOLOCBLK - * 1 Error - * 0 Success - */ -static int -update_locatorblock( - mddb_set_t *s, - md_dev64_t dev, - ddi_devid_t didptr, - ddi_devid_t old_didptr -) -{ - mddb_lb_t *lbp = NULL; - mddb_locator_t *lp; - int li; - uint_t flg; - ddi_devid_t devid_ptr; - int retval = 0; - char *minor_name; - int repl_import_flag; - - /* Set replicated flag if this is a replicated import */ - repl_import_flag = md_get_setstatus(s->s_setno) & - MD_SET_REPLICATED_IMPORT; - - lbp = s->s_lbp; - /* find replicas that haven't been deleted */ - for (li = 0; li < lbp->lb_loccnt; li++) { - lp = &lbp->lb_locators[li]; - - if ((lp->l_flags & MDDB_F_DELETED)) { - continue; - } - /* - * check to see if locator devt matches given dev - * and if there is a device ID associated with it - */ - flg = s->s_did_icp->did_ic_blkp-> blk_info[li].info_flags; - if ((md_expldev(lp->l_dev) == dev) && - (flg & MDDB_DID_EXISTS)) { - if (flg & MDDB_DID_VALID) { - continue; /* cont to nxt active entry */ - } - devid_ptr = s->s_did_icp->did_ic_devid[li]; - if (devid_ptr == NULL) { - return (MDDB_E_NODEVID); - } - - /* - * During a replicated import the old_didptr - * must match the current devid before the - * devid can be updated. - */ - if (repl_import_flag) { - if (ddi_devid_compare(devid_ptr, - old_didptr) != 0) - continue; - } - - if (ddi_devid_compare(devid_ptr, didptr) != 0) { - /* - * devid's not equal so - * delete and add - */ - if (ddi_lyr_get_minor_name( - md_dev64_to_dev(dev), - S_IFBLK, &minor_name) == DDI_SUCCESS) { - (void) mddb_devid_delete(s, li); - (void) mddb_devid_add(s, li, didptr, - minor_name); - kmem_free(minor_name, - strlen(minor_name)+1); - break; - } else { - retval = 1; - goto err_out; - } - } - } - } /* end for */ - retval = push_lb(s); - (void) upd_med(s, "update_locatorblock(0)"); -err_out: - return (retval); -} - -static int -update_mb_devid( - mddb_set_t *s, - mddb_ri_t *rip, - ddi_devid_t devidptr -) -{ - mddb_mb_ic_t *mbip; - mddb_mb_t *mb = NULL; - daddr_t blkno; - md_dev64_t device; - uint_t sz; - int mb2free = 0; - int err = 0; - - - /* - * There is case where a disk may not have mddb, - * and only has dummy mddb which contains - * a valid devid we like to update and in this - * case, the rip_lbp will be NULL but we still - * like to update the devid embedded in the - * dummy mb block. - * - */ - if (rip->ri_mbip != (mddb_mb_ic_t *)NULL) { - mbip = rip->ri_mbip; - mb = &mbip->mbi_mddb_mb; - } else { - /* - * Done if it is non-replicated set - */ - if (devidptr != (ddi_devid_t)NULL) { - mb = (mddb_mb_t *)kmem_zalloc(MDDB_BSIZE, - KM_SLEEP); - mb->mb_magic = MDDB_MAGIC_DU; - mb->mb_revision = MDDB_REV_MB; - mb2free = 1; - } else { - goto out; - } - } - - blkno = rip->ri_blkno; - device = rip->ri_dev; - /* - * Replace the mb_devid with the new/valid one - */ - if (devidptr != (ddi_devid_t)NULL) { - /* - * Zero out what we have previously - */ - if (mb->mb_devid_len) - bzero(mb->mb_devid, mb->mb_devid_len); - sz = ddi_devid_sizeof(devidptr); - bcopy((char *)devidptr, (char *)mb->mb_devid, sz); - mb->mb_devid_len = sz; - } - - mb->mb_setno = s->s_setno; - uniqtime32(&mb->mb_timestamp); - crcgen(mb, &mb->mb_checksum, MDDB_BSIZE, NULL); - /* - * putblks will - * - * - drop the s_dbmx lock - * - biowait - * - regain the s_dbmx lock - * - * Need to update this if we wants to handle - * mb_next != NULL which it is unlikely will happen - */ - err = putblks(s, (caddr_t)mb, blkno, 1, device, 0); - - if (mb2free) { - kmem_free(mb, MDDB_BSIZE); - } -out: - return (err); -} - -static int -setdid( - mddb_config_t *cp -) -{ - ddi_devid_t devidp; - dev_t ddi_dev; - mddb_set_t *s; - int err = 0; - mddb_ri_t *rip; - - /* - * Data integrity check - */ - if (cp->c_setno >= md_nsets || cp->c_devt <= 0) - return (EINVAL); - - if ((md_get_setstatus(cp->c_setno) & MD_SET_STALE)) - return (0); - - ddi_dev = md_dev64_to_dev(cp->c_devt); - if (ddi_lyr_get_devid(ddi_dev, &devidp) != DDI_SUCCESS) { - return (-1); - } - if (devidp == NULL) { - return (-1); - } - - if ((s = mddb_setenter(cp->c_setno, MDDB_MUSTEXIST, &err)) == NULL) - return (-1); - single_thread_start(s); - - for (rip = s->s_rip; rip != NULL; rip = rip->ri_next) { - if (rip->ri_lbp == (mddb_lb_t *)NULL) - continue; - /* - * We only update what is asked - */ - if (rip->ri_dev == cp->c_devt) { - if (update_mb_devid(s, rip, devidp) != 0) { - err = -1; - goto out; - } - } - } - - if (update_locatorblock(s, cp->c_devt, devidp, NULL)) { - err = -1; - goto out; - } - -out: - single_thread_end(s); - mddb_setexit(s); - ddi_devid_free(devidp); - return (err); -} - -static int -delnewside( - mddb_config_t *cp, - int command, - md_error_t *ep -) -{ - mddb_set_t *s; - int li; - mddb_lb_t *lbp; /* pointer to locator block */ - mddb_ln_t *lnp; /* pointer to locator names */ - mddb_mnln_t *mnlnp; /* pointer to locator names */ - mddb_locator_t *lp; - mddb_sidelocator_t *slp; - mddb_cfg_loc_t *clp; - int err = 0; - set_t setno = cp->c_setno; - ddi_devid_t devid; - ddi_devid_t ret_devid = NULL; - char *minor_name; - uint_t use_devid = 0; - dev_t ddi_dev; - md_mnname_suffix_t *mnsn; - mddb_mnlb_t *mnlbp; - mddb_mnsidelocator_t *mnslp; - - /* Currently don't allow addition/deletion of sides during upgrade */ - if (MD_UPGRADE) { - cmn_err(CE_WARN, - "Addition and deletion of sides not allowed" - " during upgrade. \n"); - return (mdmddberror(ep, MDE_DB_NOTNOW, NODEV32, setno)); - } - - /* - * Data integrity check - */ - if (setno >= md_nsets || cp->c_locator.l_dev <= 0) - return (mdmderror(ep, MDE_INVAL_UNIT, MD_ADM_MINOR)); - - if ((s = mddb_setenter(setno, MDDB_MUSTEXIST, &err)) == NULL) - return (mddbstatus2error(ep, err, NODEV32, setno)); - - single_thread_start(s); - clp = &cp->c_locator; - - lbp = s->s_lbp; - - if (lbp->lb_setno != setno) { - single_thread_end(s); - mddb_setexit(s); - return (mdmddberror(ep, MDE_DB_INVALID, NODEV32, setno)); - } - - /* - * Find this device/blkno pair - */ - if (lbp->lb_flags & MDDB_DEVID_STYLE) { - ddi_dev = md_dev64_to_dev(clp->l_dev); - if ((ddi_lyr_get_devid(ddi_dev, &ret_devid) == DDI_SUCCESS) && - (ddi_lyr_get_minor_name(ddi_dev, S_IFBLK, &minor_name) - == DDI_SUCCESS)) { - if (strlen(minor_name) < MDDB_MINOR_NAME_MAX) { - clp->l_devid = (uint64_t)(uintptr_t)ret_devid; - use_devid = 1; - (void) strcpy(clp->l_minor_name, minor_name); - } - kmem_free(minor_name, strlen(minor_name)+1); - } - if (use_devid != 1 && ret_devid != NULL) - ddi_devid_free(ret_devid); - } - for (li = 0; li < lbp->lb_loccnt; li++) { - lp = &lbp->lb_locators[li]; - if (lp->l_flags & MDDB_F_DELETED) - continue; - if (use_devid) { - if ((mddb_devid_get(s, li, &devid, &minor_name)) == 0) - continue; - if ((ddi_devid_compare(devid, - (ddi_devid_t)(uintptr_t)clp->l_devid) == 0) && - (strcmp(clp->l_minor_name, minor_name) == 0) && - ((daddr_t)lp->l_blkno == clp->l_blkno)) { - break; - } - } else { - if (lp->l_dev == clp->l_dev && - (daddr_t)lp->l_blkno == clp->l_blkno) { - break; - } - } - } - - if (li == lbp->lb_loccnt) { - if (use_devid) - ddi_devid_free((ddi_devid_t)(uintptr_t)clp->l_devid); - single_thread_end(s); - mddb_setexit(s); - return (mdmddberror(ep, MDE_DB_INVALID, NODEV32, setno)); - } - - lnp = s->s_lnp; - if (command == MDDB_NEWSIDE) { - int index = 0; - /* - * If a MN diskset, need to find the index where the new - * locator information is to be stored in the mnsidelocator - * field of the locator block so that the locator name can - * be stored at the same array index in the mnsuffixes - * field of the locator names structure. - */ - if (lbp->lb_flags & MDDB_MNSET) { - if ((index = checklocator(lbp, li, - cp->c_sideno)) == -1) { - if (use_devid) { - ddi_devid_free((ddi_devid_t) - (uintptr_t)clp->l_devid); - } - single_thread_end(s); - mddb_setexit(s); - return (mdmddberror(ep, MDE_DB_TOOSMALL, - NODEV32, setno)); - } - } - - /* - * Store the locator name before the sidelocator information - * in case a panic occurs between these 2 steps. Must have - * the locator name information in order to print reasonable - * error information. - */ - if (splitname2locatorblock(&cp->c_devname, lnp, li, - cp->c_sideno, index)) { - if (use_devid) - ddi_devid_free( - (ddi_devid_t)(uintptr_t)clp->l_devid); - single_thread_end(s); - mddb_setexit(s); - return (mdmddberror(ep, MDE_DB_TOOSMALL, NODEV32, - setno)); - } - - if (cfgloc2locator(lbp, clp, li, cp->c_sideno, index)) { - if (use_devid) - ddi_devid_free( - (ddi_devid_t)(uintptr_t)clp->l_devid); - single_thread_end(s); - mddb_setexit(s); - return (mdmddberror(ep, MDE_DB_TOOSMALL, NODEV32, - setno)); - } - } - - if (use_devid) - ddi_devid_free((ddi_devid_t)(uintptr_t)clp->l_devid); - - if (command == MDDB_DELSIDE) { - int i; - for (i = 0; i < lbp->lb_loccnt; i++) { - if (lbp->lb_flags & MDDB_MNSET) { - int j; - mnlbp = (mddb_mnlb_t *)lbp; - for (j = 0; j < MD_MNMAXSIDES; j++) { - mnslp = &mnlbp->lb_mnsidelocators[j][i]; - if (mnslp->mnl_sideno == cp->c_sideno) - break; - } - if (j < MD_MNMAXSIDES) { - mnslp->mnl_mnum = NODEV32; - mnslp->mnl_sideno = 0; - mnlnp = (mddb_mnln_t *)lnp; - mnsn = &(mnlnp->ln_mnsuffixes[j][i]); - bzero((caddr_t)mnsn, - sizeof (md_mnname_suffix_t)); - } - } else { - slp = &lbp->lb_sidelocators[cp->c_sideno][i]; - bzero((caddr_t)&lnp->ln_suffixes - [cp->c_sideno][i], sizeof (md_name_suffix)); - slp->l_mnum = NODEV32; - } - } - } - - /* write new locator names to all devices */ - uniqtime32(&lnp->ln_timestamp); - if (lbp->lb_flags & MDDB_MNSET) - lnp->ln_revision = MDDB_REV_MNLN; - else - lnp->ln_revision = MDDB_REV_LN; - crcgen(lnp, &lnp->ln_checksum, dbtob(lbp->lb_lnblkcnt), NULL); - err |= writeall(s, (caddr_t)lnp, lbp->lb_lnfirstblk, - lbp->lb_lnblkcnt, 0); - /* - * If a MN diskset and this is the master, set the PARSE_LOCNM - * flag in the mddb_set structure to show that the locator - * names have changed. - */ - - if ((lbp->lb_flags & MDDB_MNSET) && - (md_set[s->s_setno].s_am_i_master)) { - s->s_mn_parseflags |= MDDB_PARSE_LOCNM; - } - if (err) { - if (writeretry(s)) { - single_thread_end(s); - mddb_setexit(s); - return (mdmddberror(ep, MDE_DB_NOTNOW, NODEV32, setno)); - } - } - - uniqtime32(&lbp->lb_timestamp); - /* write new locator to all devices */ - err = writelocall(s); - - (void) upd_med(s, "delnewside(0)"); - - computefreeblks(s); /* recompute always it may be larger */ - if (err) { - if (writeretry(s)) { - single_thread_end(s); - mddb_setexit(s); - return (mdmddberror(ep, MDE_DB_NOTNOW, NODEV32, setno)); - } - } - - single_thread_end(s); - mddb_setexit(s); - - return (0); -} - -static int -newdev( - mddb_config_t *cp, - int command, - md_error_t *ep -) -{ - mddb_set_t *s; - mddb_mb_ic_t *mbip, *mbip1; - int i, j; - int li; - mddb_lb_t *lbp; /* pointer to locator block */ - mddb_ln_t *lnp; /* pointer to locator names */ - mddb_locator_t *lp; - mddb_cfg_loc_t *clp; - int err = 0; - set_t setno = cp->c_setno; - ddi_devid_t devid2; - ddi_devid_t ret_devid = NULL; - char *minor_name; - uint_t use_devid = 0; - dev_t ddi_dev; - int old_flags; - int flags; - int mn_set = 0; - int index; - mddb_ri_t *rip; - int locator_deleted = 0; - dev32_t locator_deleted_dev; - int sz = 0; - - - /* Currently don't allow addition of new replica during upgrade */ - if (MD_UPGRADE) { - cmn_err(CE_WARN, - "Addition of new replica not allowed during upgrade.\n"); - return (mdmddberror(ep, MDE_DB_NOTNOW, NODEV32, setno)); - } - - /* - * Data integrity check - */ - if (setno >= md_nsets || cp->c_locator.l_dev <= 0) - return (mdmderror(ep, MDE_INVAL_UNIT, MD_ADM_MINOR)); - - /* Determine the flag settings for multinode sets */ - flags = MDDB_NOOLDOK; - if (cp->c_multi_node) - flags |= MDDB_MULTINODE; - - if ((s = mddb_setenter(setno, flags, &err)) == NULL) { - if (err != MDDB_E_NOTOWNER) - return (mddbstatus2error(ep, err, NODEV32, setno)); - s = init_set(cp, flags, &err); - if (s == NULL) - return (mddbstatus2error(ep, err, NODEV32, setno)); - } - - single_thread_start(s); - - /* shorthand */ - clp = &cp->c_locator; - - /* shorthand */ - lbp = s->s_lbp; - - if (lbp->lb_setno != setno) { - single_thread_end(s); - mddb_setexit(s); - return (mdmddberror(ep, MDE_DB_INVALID, NODEV32, setno)); - } - - /* - * See if this device/blkno pair is already a replica - */ - if (lbp->lb_flags & MDDB_DEVID_STYLE) { - ddi_dev = expldev(clp->l_dev); - if ((ddi_lyr_get_devid(ddi_dev, &ret_devid) == DDI_SUCCESS) && - (ddi_lyr_get_minor_name(ddi_dev, - S_IFBLK, &minor_name) == DDI_SUCCESS)) { - if (strlen(minor_name) < MDDB_MINOR_NAME_MAX) { - clp->l_devid = (uint64_t)(uintptr_t)ret_devid; - use_devid = 1; - (void) strcpy(clp->l_minor_name, minor_name); - } - kmem_free(minor_name, strlen(minor_name)+1); - } - if (use_devid != 1 && ret_devid != NULL) - ddi_devid_free(ret_devid); - } - - for (i = 0; i < lbp->lb_loccnt; i++) { - lp = &lbp->lb_locators[i]; - if (lp->l_flags & MDDB_F_DELETED) - continue; - if (use_devid) { - if ((mddb_devid_get(s, i, &devid2, &minor_name)) == 0) - continue; - if ((ddi_devid_compare(devid2, - (ddi_devid_t)(uintptr_t)clp->l_devid) == 0) && - (strcmp(clp->l_minor_name, minor_name) == 0) && - ((daddr_t)lp->l_blkno == clp->l_blkno)) { - if (command == MDDB_NEWDEV) { - ddi_devid_free((ddi_devid_t)(uintptr_t) - clp->l_devid); - single_thread_end(s); - mddb_setexit(s); - return (mdmddberror(ep, - MDE_DB_EXISTS, NODEV32, setno)); - } - } - } else { - if (lp->l_dev == clp->l_dev && - (daddr_t)lp->l_blkno == clp->l_blkno) { - if (command == MDDB_NEWDEV) { - single_thread_end(s); - mddb_setexit(s); - return (mdmddberror(ep, - MDE_DB_EXISTS, NODEV32, setno)); - } - } - } - } - - /* - * Really is a new replica, go get the master blocks - */ - mbip = getmasters(s, md_expldev(clp->l_dev), clp->l_blkno, - (uint_t *)0, &mn_set); - if (! mbip) { - if (use_devid) - ddi_devid_free((ddi_devid_t)(uintptr_t)clp->l_devid); - single_thread_end(s); - mddb_setexit(s); - return (mdmddberror(ep, MDE_DB_MASTER, NODEV32, setno)); - } - - /* - * Compute free blocks in replica. - */ - computefreeblks(s); - - /* - * Check if this is large enough - */ - for (mbip1 = mbip, i = 0; mbip1 != NULL; mbip1 = mbip1->mbi_next) - i += mbip1->mbi_mddb_mb.mb_blkcnt; - for (j = i; j < s->s_totalblkcnt; j++) { - if (blkcheck(s, j)) { - while (mbip) { - mbip1 = mbip->mbi_next; - kmem_free((caddr_t)mbip, MDDB_IC_BSIZE); - mbip = mbip1; - } - if (use_devid) - ddi_devid_free( - (ddi_devid_t)(uintptr_t)clp->l_devid); - mddb_devclose(md_expldev(clp->l_dev)); - single_thread_end(s); - mddb_setexit(s); - return (mdmddberror(ep, MDE_DB_TOOSMALL, NODEV32, - setno)); - } - } - - /* Look for a deleted slot */ - for (li = 0; li < lbp->lb_loccnt; li++) { - lp = &lbp->lb_locators[li]; - if (lp->l_flags & MDDB_F_DELETED) { - locator_deleted = 1; - locator_deleted_dev = lp->l_dev; - break; - } - } - - /* If no deleted slots, add a new one */ - if (li == lbp->lb_loccnt) { - /* Already have the max replicas, bail */ - if (lbp->lb_loccnt == MDDB_NLB) { - if (use_devid) - ddi_devid_free((ddi_devid_t)(uintptr_t) - clp->l_devid); - mddb_devclose(md_expldev(clp->l_dev)); - single_thread_end(s); - mddb_setexit(s); - return (mdmddberror(ep, MDE_TOOMANY_REPLICAS, NODEV32, - setno)); - } - lbp->lb_loccnt++; - lp = &lbp->lb_locators[li]; - } - - /* Initialize the new or deleted slot */ - old_flags = lp->l_flags; - lp->l_dev = clp->l_dev; - lp->l_blkno = (daddr32_t)clp->l_blkno; - lp->l_flags = clp->l_flags; - - /* shorthand */ - lnp = s->s_lnp; - - index = 0; - if ((lbp->lb_flags & MDDB_MNSET) || (flags & MDDB_MULTINODE)) { - /* - * If a MN diskset, need to find the index where the new - * locator information is to be stored in the mnsidelocator - * field of the locator block so that the locator name can - * be stored at the same array index in the mnsuffixes - * field of the locator names structure. - */ - lbp->lb_flags |= MDDB_MNSET; - if ((index = checklocator(lbp, li, s->s_sideno)) == -1) { - if (use_devid) - ddi_devid_free((ddi_devid_t)(uintptr_t)clp-> - l_devid); - lp->l_flags = old_flags; - lbp->lb_loccnt--; - mddb_devclose(md_expldev(clp->l_dev)); - single_thread_end(s); - mddb_setexit(s); - return (mdmddberror(ep, MDE_DB_TOOSMALL, - NODEV32, setno)); - } - } - /* - * Store the locator name before the sidelocator information - * in case a panic occurs between these 2 steps. Must have - * the locator name information in order to print reasonable - * error information. - */ - if (splitname2locatorblock(&cp->c_devname, lnp, li, - s->s_sideno, index)) { - if (use_devid) - ddi_devid_free((ddi_devid_t)(uintptr_t)clp->l_devid); - lp->l_flags = old_flags; - lbp->lb_loccnt--; - mddb_devclose(md_expldev(clp->l_dev)); - single_thread_end(s); - mddb_setexit(s); - return (mdmddberror(ep, MDE_DB_TOOSMALL, NODEV32, setno)); - } - - /* - * Compute free blocks in replica before calling cfgloc2locator - * since cfgloc2locator may attempt to alloc an unused block - * to store the device id. - * mbiarray needs to be setup before calling computefreeblks. - */ - s->s_mbiarray[li] = mbip; - computefreeblks(s); - - if (cfgloc2locator(lbp, clp, li, s->s_sideno, index)) { - if (use_devid) - ddi_devid_free((ddi_devid_t)(uintptr_t)clp->l_devid); - lp->l_flags = old_flags; - lbp->lb_loccnt--; - s->s_mbiarray[li] = 0; - mddb_devclose(md_expldev(clp->l_dev)); - single_thread_end(s); - mddb_setexit(s); - return (mdmddberror(ep, MDE_DB_TOOSMALL, NODEV32, setno)); - } - - /* - * Hijack a deleted rip master record and correct the contents - */ - if (locator_deleted) { - for (rip = s->s_rip; rip != NULL; rip = rip->ri_next) { - if (rip->ri_lbp != NULL && - rip->ri_mbip == 0 && - (rip->ri_dev == md_expldev(locator_deleted_dev))) { - rip->ri_dev = md_expldev(clp->l_dev); - rip->ri_mbip = mbip; - - if (use_devid && clp->l_devid != 0) { - sz = (int)ddi_devid_sizeof( - (ddi_devid_t)(uintptr_t) - clp->l_devid); - rip->ri_devid = - (ddi_devid_t)kmem_zalloc(sz, - KM_SLEEP); - bcopy((void *)(uintptr_t)clp->l_devid, - (char *)rip->ri_devid, sz); - } - - break; - } - } - } - - if (use_devid) - ddi_devid_free((ddi_devid_t)(uintptr_t)clp->l_devid); - - uniqtime32(&lbp->lb_timestamp); - lp->l_flags = MDDB_F_ACTIVE; - - /* write db copy to new device */ - err = writecopy(s, li, MDDB_WRITECOPY_ALL); - lp->l_flags |= MDDB_F_UP2DATE; - - /* write new locator names to all devices */ - uniqtime32(&lnp->ln_timestamp); - if (lbp->lb_flags & MDDB_MNSET) - lnp->ln_revision = MDDB_REV_MNLN; - else - lnp->ln_revision = MDDB_REV_LN; - crcgen(lnp, &lnp->ln_checksum, dbtob(lbp->lb_lnblkcnt), NULL); - err |= writeall(s, (caddr_t)lnp, lbp->lb_lnfirstblk, - lbp->lb_lnblkcnt, 0); - /* - * If a MN diskset and this is the master, set the PARSE_LOCNM - * flag in the mddb_set structure to show that the locator - * names have changed. - */ - - if ((lbp->lb_flags & MDDB_MNSET) && - (md_set[s->s_setno].s_am_i_master)) { - s->s_mn_parseflags |= MDDB_PARSE_LOCNM; - } - if (err) { - if (writeretry(s)) { - single_thread_end(s); - mddb_setexit(s); - return (mdmddberror(ep, MDE_DB_NOTNOW, NODEV32, setno)); - } - } - - /* Data tags not supported on MN sets */ - if ((md_get_setstatus(setno) & MD_SET_STALE) && - (!(lbp->lb_flags & MDDB_MNSET)) && - setno != MD_LOCAL_SET) - if (set_dtag(s, ep)) - mdclrerror(ep); - - /* Write data tags to all accessible devices */ - /* Data tags not supported on MN sets */ - if (!(lbp->lb_flags & MDDB_MNSET)) { - (void) dt_write(s); - } - - /* write new locator to all devices */ - err = writelocall(s); - - (void) upd_med(s, "newdev(0)"); - - SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_CREATE, SVM_TAG_REPLICA, setno, - md_expldev(clp->l_dev)); - - computefreeblks(s); /* recompute always it may be smaller */ - if (err) { - if (writeretry(s)) { - single_thread_end(s); - mddb_setexit(s); - return (mdmddberror(ep, MDE_DB_NOTNOW, NODEV32, setno)); - } - } - - single_thread_end(s); - mddb_setexit(s); - - return (0); -} - -#ifdef DEBUG -static void -mddb_check_set( - set_t setno -) -{ - mddb_set_t *s; - mddb_db_t *dbp; - mddb_de_ic_t *dep; - mddb_rb32_t *rbp; - - if (! md_set[setno].s_db) - return; - - s = (mddb_set_t *)md_set[setno].s_db; - - for (dbp = s->s_dbp; dbp != NULL; dbp = dbp->db_next) { - for (dep = dbp->db_firstentry; - dep != NULL; dep = dep->de_next) { - rbp = dep->de_rb; - ASSERT(rbp->rb_magic == MDDB_MAGIC_RB); - if (dep->de_rb_userdata) - ASSERT((uintptr_t)dep->de_rb_userdata > 2000); - } - } -} -#endif /* DEBUG */ - -/* - * Exported Entry Points - */ -#ifdef DEBUG -void -mddb_check(void) -{ - int i; - - for (i = 0; i < md_nsets; i++) { - if (! md_set[i].s_db) - return; - - mddb_check_set(i); - } - -} -#endif /* DEBUG */ - -int -mddb_configure( - mddb_cfgcmd_t command, - mddb_config_t *cp -) -{ - mddb_set_t *s; - md_error_t *ep = &cp->c_mde; - int flag = 0; - int err = 0; - set_t setno = cp->c_setno; - - mdclrerror(ep); - - switch (command) { - case MDDB_NEWDEV: - err = newdev(cp, command, ep); - break; - - case MDDB_NEWSIDE: - case MDDB_DELSIDE: - err = delnewside(cp, command, ep); - break; - - case MDDB_GETDEV: - case MDDB_DELDEV: - case MDDB_ENDDEV: - err = getdeldev(cp, command, ep); - break; - - case MDDB_GETDRVRNAME: - err = getdriver(&cp->c_locator); - break; - - case MDDB_USEDEV: - /* - * Note: must allow USEDEV ioctl during upgrade to - * support auto-take disksets. - * - * Also during the set import if the md_devid_destroy - * flag is set then error out - */ - - if ((cp->c_flags & MDDB_C_IMPORT) && md_devid_destroy) - return (mdmderror(ep, MDE_INVAL_UNIT, - MD_ADM_MINOR)); - - if (setno >= md_nsets) - return (mdmderror(ep, MDE_INVAL_UNIT, - MD_ADM_MINOR)); - - if ((s = mddb_setenter(setno, MDDB_NOINIT, &err)) == - NULL) { - if ((s = init_set(cp, MDDB_NOINIT, &err)) == - NULL) { - err = mddbstatus2error(ep, err, - NODEV32, setno); - break; - } - } - if (setno == MD_LOCAL_SET) - flag = MDDB_F_IOCTL; - if (cp->c_locator.l_old_devid) { - md_set_setstatus(setno, - MD_SET_REPLICATED_IMPORT); - } - err = ridev(&s->s_rip, &cp->c_locator, NULL, flag); - mddb_setexit(s); - break; - - case MDDB_RELEASESET: - mutex_enter(&mddb_lock); - mddb_unload_set(cp->c_setno); - mutex_exit(&mddb_lock); - break; - - case MDDB_SETDID: - err = setdid(cp); - break; - - default: - err = mdmddberror(ep, MDE_DB_INVALID, NODEV32, - cp->c_setno); - } - - return (err); -} - -int -mddb_getoptloc( - mddb_optloc_t *ol -) -{ - mddb_set_t *s; - mddb_db_t *dbp; - mddb_de_ic_t *dep; - mddb_recid_t id; - set_t setno; - - ol->li[0] = -1; - ol->li[1] = -1; - - id = ol->recid; - setno = DBSET(id); - if (setno >= md_nsets) - return (EINVAL); - - if ((s = mddb_setenter(setno, MDDB_MUSTEXIST, NULL)) == NULL) - return (0); - - id = DBID(id); - for (dbp = s->s_dbp; dbp != NULL; dbp = dbp->db_next) { - for (dep = dbp->db_firstentry; - dep != NULL; dep = dep->de_next) { - if (dep->de_recid != id) - continue; - ol->li[0] = dep->de_optinfo[0].o_li; - ol->li[1] = dep->de_optinfo[1].o_li; - mddb_setexit(s); - return (0); - } - } - mddb_setexit(s); - return (0); -} - -void -mddb_init(void) -{ - mddb_set_t *s; - - mutex_init(&mddb_lock, NULL, MUTEX_DEFAULT, NULL); - if ((s = init_set(NULL, MDDB_NOINIT, NULL)) != NULL) - mddb_setexit(s); -} - - -void -mddb_unload(void) -{ - int i; - - mutex_enter(&mddb_lock); - - for (i = 0; i < md_nsets; i++) { - md_clr_setstatus(i, MD_SET_KEEPTAG); - mddb_unload_set(i); - } - - crcfreetab(); - - mutex_exit(&mddb_lock); -} - -mddb_recid_t -mddb_createrec( - size_t usersize, /* size of db record */ - mddb_type_t type, /* type1 of db record */ - uint_t type2, /* type2 of db record */ - md_create_rec_option_t options, /* options for this creation */ - set_t setno /* set number to create record in */ -) -{ - mddb_set_t *s; - mddb_db_t *dbp, *prevdbp, *newdbp; - mddb_db32_t *db32p; - mddb_de_ic_t *dep; - /* LINTED variable unused - used for sizeof calculations */ - mddb_de32_t *de32p; - mddb_rb32_t *rbp; - size_t recsize; - ulong_t blkcnt; - ulong_t maxblocks; - size_t desize, desize_ic; - size_t used; - mddb_recid_t newid; - caddr_t tmppnt; - int i, err = 0; - void *userdata; - uint_t flag_type; - -#if defined(_ILP32) && !defined(lint) - ASSERT(sizeof (mddb_de_t) == sizeof (mddb_de32_t)); - ASSERT(sizeof (mddb_db_t) == sizeof (mddb_db32_t)); - ASSERT(sizeof (mddb_rb_t) == sizeof (mddb_rb32_t)); -#endif - - /* - * everyone is supposed to sepcify if it's a - * 32 bit or a 64 bit record - */ - if ((options &(MD_CRO_32BIT|MD_CRO_64BIT)) == 0) { - return (MDDB_E_INVALID); - } - - if ((s = mddb_setenter(setno, MDDB_MUSTEXIST, &err)) == NULL) - return (err); - - if (checkstate(s, MDDB_PROBE)) { - mddb_setexit(s); - return (MDDB_E_NOTNOW); - } - - recsize = roundup((sizeof (*rbp) - sizeof (rbp->rb_data)) + - usersize, MDDB_BSIZE); - blkcnt = btodb(recsize); - - if (mddb_maxblocks) - maxblocks = mddb_maxblocks; - else - maxblocks = (MDDB_BSIZE - (sizeof (*db32p) + sizeof (*de32p) - - sizeof (de32p->de32_blks))) / sizeof (mddb_block_t); - - if (blkcnt > maxblocks) { - mddb_setexit(s); - return (MDDB_E_INVALID); - } - /* - * allocate record block - * and new directory block so to avoid sleeping - * after starting single_thread - */ - rbp = (mddb_rb32_t *)kmem_zalloc(recsize, KM_SLEEP); - if ((options & MD_CRO_OPTIMIZE) == 0) - userdata = kmem_zalloc(usersize, KM_SLEEP); - newdbp = (mddb_db_t *)kmem_zalloc(sizeof (*newdbp), KM_SLEEP); - - /* - * if this is the largest record allocate new buffer for - * checkcopy(); - */ - if (recsize > s->s_databuffer_size) { - tmppnt = (caddr_t)kmem_zalloc(recsize, KM_SLEEP); - /* - * this test is incase when to sleep during kmem_alloc - * and some other task bumped max record size - */ - if (recsize > s->s_databuffer_size) { - if (s->s_databuffer_size) - kmem_free(s->s_databuffer, - s->s_databuffer_size); - s->s_databuffer = tmppnt; - s->s_databuffer_size = recsize; - } else { - kmem_free(tmppnt, recsize); - } - } - - single_thread_start(s); - - newid = 0; - do { - newid++; - if (DBID(newid) == 0) { - kmem_free((caddr_t)newdbp, sizeof (*newdbp)); - kmem_free((caddr_t)rbp, ((size_t)recsize)); - if ((options & MD_CRO_OPTIMIZE) == 0) - kmem_free(userdata, usersize); - single_thread_end(s); - mddb_setexit(s); - return (MDDB_E_NOTNOW); - } - - for (dbp = s->s_dbp; dbp != NULL; dbp = dbp->db_next) { - for (dep = dbp->db_firstentry; dep; - dep = dep->de_next) { - if (dep->de_recid == newid) - break; - } - if (dep != NULL) - break; - } - } while (dbp); - - desize = (sizeof (*de32p) - sizeof (de32p->de32_blks)) + - (sizeof (mddb_block_t) * blkcnt); - - /* - * see if a directory block exists which will hold this entry - */ - for (dbp = s->s_dbp; dbp != NULL; dbp = dbp->db_next) { - used = sizeof (*db32p); - for (dep = dbp->db_firstentry; - dep != NULL; dep = dep->de_next) { - used += sizeof (*de32p) - sizeof (de32p->de32_blks); - used += sizeof (mddb_block_t) * dep->de_blkcount; - } - if ((used + desize) < MDDB_BSIZE) - break; - } - if (dbp) { - kmem_free((caddr_t)newdbp, sizeof (*newdbp)); - if (blkcnt > s->s_freeblkcnt) { - kmem_free((caddr_t)rbp, ((size_t)recsize)); - if ((options & MD_CRO_OPTIMIZE) == 0) - kmem_free(userdata, usersize); - single_thread_end(s); - mddb_setexit(s); - return (MDDB_E_NOSPACE); - } - prevdbp = NULL; - } else { - /* - * need to add directory block - */ - if ((blkcnt + 1) > s->s_freeblkcnt) { - kmem_free((caddr_t)newdbp, sizeof (*newdbp)); - kmem_free((caddr_t)rbp, ((size_t)recsize)); - if ((options & MD_CRO_OPTIMIZE) == 0) - kmem_free(userdata, usersize); - single_thread_end(s); - mddb_setexit(s); - return (MDDB_E_NOSPACE); - } - for (dbp = s->s_dbp; dbp->db_next; dbp = dbp->db_next) - ; - dbp->db_next = newdbp; - bzero((caddr_t)dbp->db_next, sizeof (*newdbp)); - dbp->db_nextblk = getfreeblks(s, 1); - dbp->db_next->db_blknum = dbp->db_nextblk; - prevdbp = dbp; - dbp = dbp->db_next; - dbp->db_nextblk = 0; - dbp->db_firstentry = NULL; - dbp->db_recsum = 0; - dbp->db_magic = MDDB_MAGIC_DB; - } - /* - * ready to add record - */ - desize_ic = (sizeof (*dep) - sizeof (dep->de_blks)) + - (sizeof (mddb_block_t) * blkcnt); - if (dbp->db_firstentry) { - for (dep = dbp->db_firstentry; dep->de_next; dep = dep->de_next) - ; - dep->de_next = (mddb_de_ic_t *)kmem_zalloc(desize_ic, KM_SLEEP); - dep = dep->de_next; - } else { - dep = (mddb_de_ic_t *)kmem_zalloc(desize_ic, KM_SLEEP); - dbp->db_firstentry = dep; - } - bzero((caddr_t)dep, desize_ic); - dep->de_recid = newid; - /* - * Optimized records have an owner node associated with them in - * a MN diskset. The owner is only set on a node that is actively - * writing to that record. The other nodes will show that record - * as having an invalid owner. The owner for an optimized record - * is used during fixoptrecord to determine which node should - * write out the record when the replicas associated with that - * optimized record have been changed. - */ - if (MD_MNSET_SETNO(s->s_setno)) { - dep->de_owner_nodeid = MD_MN_INVALID_NID; - } - dep->de_type1 = type; - dep->de_type2 = type2; - dep->de_reqsize = usersize; - dep->de_recsize = recsize; - dep->de_blkcount = blkcnt; - flag_type = options & - (MD_CRO_OPTIMIZE | MD_CRO_STRIPE | MD_CRO_MIRROR | MD_CRO_RAID | - MD_CRO_SOFTPART | MD_CRO_TRANS_MASTER | MD_CRO_TRANS_LOG | - MD_CRO_HOTSPARE | MD_CRO_HOTSPARE_POOL | MD_CRO_CHANGELOG); - switch (flag_type) { - case MD_CRO_OPTIMIZE: - dep->de_flags = MDDB_F_OPT; - getoptdev(s, dep, 0); - getoptdev(s, dep, 1); - break; - case MD_CRO_STRIPE: - dep->de_flags = MDDB_F_STRIPE; - break; - case MD_CRO_MIRROR: - dep->de_flags = MDDB_F_MIRROR; - break; - case MD_CRO_RAID: - dep->de_flags = MDDB_F_RAID; - break; - case MD_CRO_SOFTPART: - dep->de_flags = MDDB_F_SOFTPART; - break; - case MD_CRO_TRANS_MASTER: - dep->de_flags = MDDB_F_TRANS_MASTER; - break; - case MD_CRO_TRANS_LOG: - dep->de_flags = MDDB_F_TRANS_LOG; - break; - case MD_CRO_HOTSPARE: - dep->de_flags = MDDB_F_HOTSPARE; - break; - case MD_CRO_HOTSPARE_POOL: - dep->de_flags = MDDB_F_HOTSPARE_POOL; - break; - case MD_CRO_CHANGELOG: - dep->de_flags = MDDB_F_CHANGELOG; - break; - } - /* - * try to get all blocks consecutive. If not possible - * just get them one at a time - */ - dep->de_blks[0] = getfreeblks(s, blkcnt); - if (dep->de_blks[0]) { - for (i = 1; i < blkcnt; i++) - dep->de_blks[i] = dep->de_blks[0] + i; - } else { - for (i = 0; i < blkcnt; i++) - dep->de_blks[i] = getfreeblks(s, 1); - } - dep->de_rb = rbp; - bzero((caddr_t)rbp, recsize); - rbp->rb_magic = MDDB_MAGIC_RB; - - /* Do we have to create an old style (32 bit) record? */ - if (options & MD_CRO_32BIT) { - if (options & MD_CRO_FN) - rbp->rb_revision = MDDB_REV_RBFN; - else - rbp->rb_revision = MDDB_REV_RB; - } else { - if (options & MD_CRO_FN) - rbp->rb_revision = MDDB_REV_RB64FN; - else - rbp->rb_revision = MDDB_REV_RB64; - } - - /* set de_rb_userdata for non optimization records */ - if ((options & MD_CRO_OPTIMIZE) == 0) { - dep->de_rb_userdata = userdata; - } - - uniqtime32(&rbp->rb_timestamp); - /* Generate the crc for this record */ - rec_crcgen(s, dep, rbp); - tmppnt = (caddr_t)rbp; - /* - * the following code writes new records to all instances of - * the data base. Writing one block at a time to each instance - * is safe because they are not yet in a directory entry which - * has been written to the data base - */ - err = 0; - if ((options & MD_CRO_OPTIMIZE) == 0) { - for (i = 0; i < blkcnt; i++) { - err |= writeall(s, (caddr_t)tmppnt, - dep->de_blks[i], 1, 0); - tmppnt += MDDB_BSIZE; - } - } else { - if ((MD_MNSET_SETNO(s->s_setno)) && - md_set[s->s_setno].s_am_i_master) { - /* - * If a MN diskset then only master writes out newly - * created optimized record. - */ - err |= writeoptrecord(s, dep); - } - } - uniqtime32(&dbp->db_timestamp); - dbp->db_revision = MDDB_REV_DB; - /* Don't include opt resync and change log records in global XOR */ - if (!(dep->de_flags & MDDB_F_OPT) && - !(dep->de_flags & MDDB_F_CHANGELOG)) - dbp->db_recsum ^= rbp->rb_checksum; - db32p = (mddb_db32_t *)kmem_zalloc(MDDB_BSIZE, KM_SLEEP); - create_db32rec(db32p, dbp); - crcgen(db32p, &db32p->db32_checksum, MDDB_BSIZE, NULL); - err |= writeall(s, (caddr_t)db32p, dbp->db_blknum, 1, 0); - if (prevdbp) { - dbp = prevdbp; - uniqtime32(&dbp->db_timestamp); - dbp->db_revision = MDDB_REV_DB; - create_db32rec(db32p, dbp); - crcgen(db32p, &db32p->db32_checksum, MDDB_BSIZE, NULL); - err |= writeall(s, (caddr_t)db32p, dbp->db_blknum, 1, 0); - } - kmem_free((caddr_t)db32p, MDDB_BSIZE); - if (err) { - if (writeretry(s)) { - s->s_zombie = newid; - single_thread_end(s); - mddb_setexit(s); - return (MDDB_E_NOTNOW); - } - } - single_thread_end(s); - mddb_setexit(s); - - ASSERT((newid & MDDB_SETMASK) == 0); - return (MAKERECID(setno, newid)); -} - -int -mddb_deleterec( - mddb_recid_t id -) -{ - mddb_set_t *s; - mddb_db_t *dbp; - mddb_db32_t *db32p; - mddb_de_ic_t *dep, *dep1; - int i; - -#if defined(_ILP32) && !defined(lint) - ASSERT(sizeof (mddb_db_t) == sizeof (mddb_db32_t)); - ASSERT(sizeof (mddb_rb_t) == sizeof (mddb_rb32_t)); -#endif - - s = mddb_setenter(DBSET(id), MDDB_NOINIT, NULL); - ASSERT(s != NULL); - - id = DBID(id); - if (checkstate(s, MDDB_PROBE)) { - mddb_setexit(s); - return (MDDB_E_NOTNOW); - } - - ASSERT(s->s_lbp != NULL); - single_thread_start(s); - - for (dbp = s->s_dbp; dbp != NULL; dbp = dbp->db_next) { - dep1 = NULL; - for (dep = dbp->db_firstentry; dep; dep = dep->de_next) { - if (dep->de_recid == id) - break; - dep1 = dep; - } - if (dep != NULL) - break; - } - /* - * no such record - */ - if (dep == NULL) { - single_thread_end(s); - ASSERT(s->s_staledeletes != 0); - s->s_staledeletes--; - mddb_setexit(s); - return (0); - } - - if (!(dep->de_flags & MDDB_F_OPT) && - !(dep->de_flags & MDDB_F_CHANGELOG)) { - dbp->db_recsum ^= dep->de_rb->rb_checksum; - dbp->db_recsum ^= dep->de_rb->rb_checksum_fiddle; - } - - if (dep->de_rb_userdata != NULL) { - if (dep->de_icreqsize) - kmem_free(dep->de_rb_userdata_ic, dep->de_icreqsize); - else - kmem_free(dep->de_rb_userdata, dep->de_reqsize); - } - - kmem_free((caddr_t)dep->de_rb, dep->de_recsize); - - for (i = 0; i < dep->de_blkcount; i++) - blkfree(s, dep->de_blks[i]); - if (dep1) - dep1->de_next = dep->de_next; - else - dbp->db_firstentry = dep->de_next; - - kmem_free(dep, sizeofde(dep)); - - uniqtime32(&dbp->db_timestamp); - dbp->db_revision = MDDB_REV_DB; - db32p = (mddb_db32_t *)kmem_zalloc(MDDB_BSIZE, KM_SLEEP); - create_db32rec(db32p, dbp); - crcgen(db32p, &db32p->db32_checksum, MDDB_BSIZE, NULL); - if (writeall(s, (caddr_t)db32p, dbp->db_blknum, 1, 0)) { - if (writeretry(s)) { - /* - * staledelete is used to mark deletes which failed. - * its only use is to not panic when the user retries - * the delete once the database is active again - */ - single_thread_end(s); - s->s_staledeletes++; - kmem_free((caddr_t)db32p, MDDB_BSIZE); - mddb_setexit(s); - return (MDDB_E_NOTNOW); - } - } - single_thread_end(s); - kmem_free((caddr_t)db32p, MDDB_BSIZE); - mddb_setexit(s); - return (0); -} - -mddb_recid_t -mddb_getnextrec( - mddb_recid_t id, - mddb_type_t typ, - uint_t type2 -) -{ - mddb_set_t *s; - mddb_db_t *dbp; - mddb_de_ic_t *dep; - int searching, err; - set_t setno; - - setno = DBSET(id); - id = DBID(id); - searching = id; - - if ((s = mddb_setenter(setno, MDDB_MUSTEXIST, &err)) == NULL) - return (err); - - for (dbp = s->s_dbp; dbp != NULL; dbp = dbp->db_next) { - for (dep = dbp->db_firstentry; - dep != NULL; dep = dep->de_next) { - if (searching) { - if (dep->de_recid == id) - searching = 0; - } else { - if ((typ == MDDB_ALL || dep->de_type1 == typ) && - (type2 == 0 || dep->de_type2 == type2)) { - id = dep->de_recid; - mddb_setexit(s); - ASSERT((id & MDDB_SETMASK) == 0); - return (MAKERECID(setno, id)); - } - } - } - } - - mddb_setexit(s); - - if (searching) - return (MDDB_E_NORECORD); - return (0); -} - -void * -mddb_getrecaddr( - mddb_recid_t id -) -{ - mddb_set_t *s; - mddb_db_t *dbp; - mddb_de_ic_t *dep; - void *rval; - - if ((s = mddb_setenter(DBSET(id), MDDB_MUSTEXIST, NULL)) == NULL) - return (NULL); - - id = DBID(id); - for (dbp = s->s_dbp; dbp != NULL; dbp = dbp->db_next) { - for (dep = dbp->db_firstentry; - dep != NULL; dep = dep->de_next) { - if (dep->de_recid != id) - continue; - if (dep->de_rb_userdata) - rval = (void *)dep->de_rb_userdata; - else - rval = (void *)dep->de_rb->rb_data; - mddb_setexit(s); - return (rval); - } - } - - mddb_setexit(s); - return (NULL); -} - - -mddb_de_ic_t * -mddb_getrecdep( - mddb_recid_t id -) -{ - mddb_set_t *s; - mddb_db_t *dbp; - mddb_de_ic_t *dep; - - if ((s = mddb_setenter(DBSET(id), MDDB_MUSTEXIST, NULL)) == NULL) - return (NULL); - - id = DBID(id); - for (dbp = s->s_dbp; dbp != NULL; dbp = dbp->db_next) { - for (dep = dbp->db_firstentry; - dep != NULL; dep = dep->de_next) { - if (dep->de_recid != id) - continue; - mddb_setexit(s); - return (dep); - } - } - - mddb_setexit(s); - return (NULL); -} - -void * -mddb_getrecaddr_resize( - mddb_recid_t id, - size_t icsize, - off_t off -) -{ - mddb_set_t *s; - mddb_db_t *dbp; - mddb_de_ic_t *dep; - void *rval = NULL; - - if ((s = mddb_setenter(DBSET(id), MDDB_MUSTEXIST, NULL)) == NULL) - return (NULL); - - id = DBID(id); - for (dbp = s->s_dbp; dbp != NULL; dbp = dbp->db_next) { - for (dep = dbp->db_firstentry; - dep != NULL; dep = dep->de_next) { - if (dep->de_recid != id) - continue; - if (dep->de_rb_userdata) - rval = (void *)dep->de_rb_userdata; - else - rval = (void *)dep->de_rb->rb_data; - break; - } - if (rval != NULL) - break; - } - - if (rval == NULL) { - mddb_setexit(s); - return (NULL); - } - - if (dep->de_rb_userdata) { - caddr_t nud; - - if (dep->de_icreqsize || (dep->de_reqsize >= icsize)) { - mddb_setexit(s); - return (rval); - } - ASSERT((dep->de_reqsize + off) <= icsize); - nud = kmem_zalloc(icsize, KM_SLEEP); - bcopy(dep->de_rb_userdata, nud + off, dep->de_reqsize); - kmem_free(dep->de_rb_userdata, dep->de_reqsize); - dep->de_rb_userdata = nud + off; - dep->de_rb_userdata_ic = nud; - dep->de_icreqsize = icsize; - rval = nud; - } else { - size_t recsize; - /* LINTED variable unused - used for sizeof calculations */ - mddb_rb32_t *nrbp; - - recsize = roundup((sizeof (*nrbp) - sizeof (nrbp->rb_data)) + - icsize, MDDB_BSIZE); - if (dep->de_recsize < recsize) - cmn_err(CE_PANIC, "mddb_getrecaddr_resize: only " - "nonoptimized records can be resized\n"); - } - - mddb_setexit(s); - return (rval); -} - -int -mddb_getrecprivate( - mddb_recid_t id -) -{ - mddb_set_t *s; - mddb_db_t *dbp; - mddb_de_ic_t *dep; - int err = 0; - int private; - - if ((s = mddb_setenter(DBSET(id), MDDB_MUSTEXIST, &err)) == NULL) - return (err); - - id = DBID(id); - for (dbp = s->s_dbp; dbp != NULL; dbp = dbp->db_next) { - for (dep = dbp->db_firstentry; - dep != NULL; dep = dep->de_next) { - if (dep->de_recid != id) - continue; - private = (int)dep->de_rb->rb_private; - mddb_setexit(s); - return (private); - } - } - - mddb_setexit(s); - return (MDDB_E_NORECORD); -} - -void -mddb_setrecprivate( - mddb_recid_t id, - uint_t private -) -{ - mddb_set_t *s; - mddb_db_t *dbp; - mddb_de_ic_t *dep; - - if ((s = mddb_setenter(DBSET(id), MDDB_MUSTEXIST, NULL)) == NULL) { - ASSERT(0); - return; - } - - id = DBID(id); - for (dbp = s->s_dbp; dbp != NULL; dbp = dbp->db_next) { - for (dep = dbp->db_firstentry; - dep != NULL; dep = dep->de_next) { - if (dep->de_recid != id) - continue; - dep->de_rb->rb_private = private; - mddb_setexit(s); - return; - } - } - - mddb_setexit(s); - ASSERT(0); -} - -mddb_type_t -mddb_getrectype1( - mddb_recid_t id -) -{ - mddb_set_t *s; - mddb_db_t *dbp; - mddb_de_ic_t *dep; - int err = 0; - mddb_type_t rval; - - if ((s = mddb_setenter(DBSET(id), MDDB_MUSTEXIST, &err)) == NULL) - return (err); - - id = DBID(id); - for (dbp = s->s_dbp; dbp != NULL; dbp = dbp->db_next) { - for (dep = dbp->db_firstentry; - dep != NULL; dep = dep->de_next) { - if (dep->de_recid != id) - continue; - rval = dep->de_type1; - mddb_setexit(s); - return (rval); - } - } - - mddb_setexit(s); - return (MDDB_E_NORECORD); -} - -int -mddb_getrectype2( - mddb_recid_t id -) -{ - mddb_set_t *s; - mddb_db_t *dbp; - mddb_de_ic_t *dep; - int err = 0; - int rval; - - if ((s = mddb_setenter(DBSET(id), MDDB_MUSTEXIST, &err)) == NULL) - return (err); - - id = DBID(id); - for (dbp = s->s_dbp; dbp != NULL; dbp = dbp->db_next) { - for (dep = dbp->db_firstentry; - dep != NULL; dep = dep->de_next) { - if (dep->de_recid != id) - continue; - rval = (int)dep->de_type2; - mddb_setexit(s); - return (rval); - } - } - - mddb_setexit(s); - return (MDDB_E_NORECORD); -} - -int -mddb_getrecsize( - mddb_recid_t id -) -{ - mddb_set_t *s; - mddb_db_t *dbp; - mddb_de_ic_t *dep; - int err = 0; - int rval; - - if ((s = mddb_setenter(DBSET(id), MDDB_MUSTEXIST, &err)) == NULL) - return (err); - - id = DBID(id); - for (dbp = s->s_dbp; dbp != NULL; dbp = dbp->db_next) { - for (dep = dbp->db_firstentry; - dep != NULL; dep = dep->de_next) { - if (dep->de_recid != id) - continue; - rval = (int)dep->de_reqsize; - mddb_setexit(s); - return (rval); - } - } - - mddb_setexit(s); - return (MDDB_E_NORECORD); -} - - -mddb_recstatus_t -mddb_getrecstatus( - mddb_recid_t id -) -{ - mddb_set_t *s; - mddb_db_t *dbp; - mddb_de_ic_t *dep; - int err = 0; - mddb_recstatus_t e_err; - - if ((s = mddb_setenter(DBSET(id), MDDB_MUSTEXIST, &err)) == NULL) - return ((mddb_recstatus_t)err); - - id = DBID(id); - for (dbp = s->s_dbp; dbp != NULL; dbp = dbp->db_next) { - for (dep = dbp->db_firstentry; - dep != NULL; dep = dep->de_next) { - if (dep->de_recid == id) - break; - } - if (dep) - break; - } - - e_err = MDDB_OK; - - if (! dep) - e_err = MDDB_NORECORD; - else if (! dep->de_rb->rb_commitcnt) - e_err = MDDB_NODATA; - else if (md_get_setstatus(s->s_setno) & MD_SET_STALE) - e_err = MDDB_STALE; - - mddb_setexit(s); - return (e_err); -} - -static int mddb_commitrec_retries = 5; - -/* - * Commit given record to disk. - * If committing an optimized record, do not call - * with md ioctl lock held. - */ -int -mddb_commitrec( - mddb_recid_t id -) -{ - mddb_set_t *s; - mddb_db_t *dbp; - mddb_de_ic_t *dep; - mddb_recid_t ids[2]; - mddb_rb32_t *rbp; - static int err = 0; - md_mn_msg_mddb_optrecerr_t *msg_recerr; - md_mn_kresult_t *kres; - mddb_lb_t *lbp; - mddb_mnlb_t *mnlbp; - mddb_locator_t *lp; - mddb_mnsidelocator_t *mnslp; - mddb_drvnm_t *dn; - int li; - md_replica_recerr_t *recerr; - int i, j; - int rval; - int hit_err = 0; - int retry = mddb_commitrec_retries; - int gave_up = 0; - - s = mddb_setenter(DBSET(id), MDDB_NOINIT, NULL); - ASSERT(s != NULL); - - if (checkstate(s, MDDB_PROBE)) { - mddb_setexit(s); - return (MDDB_E_NOTNOW); - } - - if (DBID(id) == 0) { - mddb_setexit(s); - return (0); - } - - for (dbp = s->s_dbp; dbp != NULL; dbp = dbp->db_next) { - for (dep = dbp->db_firstentry; dep; dep = dep->de_next) { - if (dep->de_recid == DBID(id)) - break; - } - if (dep) - break; - } - - if (dep == NULL) { - mddb_setexit(s); - return (MDDB_E_NORECORD); - } - - if (! (dep->de_flags & MDDB_F_OPT)) { - ids[0] = id; - ids[1] = 0; - mddb_setexit(s); - return (mddb_commitrecs(ids)); - } - - /* - * following code allows multiple processes to be doing - * optimization commits in parallel. - * NOTE: if lots of optimization commits then the lock - * will not get released until it winds down - */ - if (s->s_optwaiterr) { - while (s->s_optwaiterr) { - s->s_opthungerr = 1; - cv_wait(&s->s_opthungerr_cv, SETMUTEX(s->s_setno)); - } - if (checkstate(s, MDDB_PROBE)) { - mddb_setexit(s); - return (MDDB_E_NOTNOW); - } - } - if (s->s_optcmtcnt++ == 0) { - single_thread_start(s); - s->s_opthavelck = 1; - if (s->s_optwantlck) { - cv_broadcast(&s->s_optwantlck_cv); - s->s_optwantlck = 0; - } - } else { - while (! s->s_opthavelck) { - s->s_optwantlck = 1; - cv_wait(&s->s_optwantlck_cv, SETMUTEX(s->s_setno)); - } - } - - for (dbp = s->s_dbp; dbp != NULL; dbp = dbp->db_next) { - for (dep = dbp->db_firstentry; dep; dep = dep->de_next) { - if (dep->de_recid == DBID(id)) - break; - } - if (dep) - break; - } - - if (dep == NULL) { - if (! (--s->s_optcmtcnt)) { - single_thread_end(s); - s->s_opthavelck = 0; - } - mddb_setexit(s); - return (MDDB_E_NORECORD); - } - - rbp = dep->de_rb; - rbp->rb_commitcnt++; - uniqtime32(&rbp->rb_timestamp); - /* Generate the crc for this record */ - rec_crcgen(s, dep, rbp); - - if (writeoptrecord(s, dep)) { - if (MD_MNSET_SETNO(s->s_setno)) { - hit_err = 1; - } - s->s_optwaiterr++; - } - if (MD_MNSET_SETNO(s->s_setno)) { - /* If last thread out, release single_thread_start */ - if (! (--s->s_optcmtcnt)) { - single_thread_end(s); - s->s_opthavelck = 0; - } - /* - * If this thread had a writeoptrecords failure, then - * need to send message to master. - * But, multiple threads could all be running on the - * same single_thread_start, so serialize the threads - * by making each thread grab single_thread_start. - * - * After return from sending message to master message, - * replicas associated with optimized record will havei - * been changed (via a callback from the master to all - * nodes), so retry call to writeoptrecord. - * This code is replacing the call to writeretry that - * occurs for the local and traditional disksets. - */ - if (hit_err) { - single_thread_start(s); - /* - * If > 50% of replicas are alive then continue - * to send message to master until writeoptrecord - * succeeds. For now, assume that minor name, - * major number on this node is the same as on - * the master node. Once devids are turned on - * for MN disksets, can send devid. - */ - kres = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP); - msg_recerr = kmem_zalloc( - sizeof (md_mn_msg_mddb_optrecerr_t), KM_SLEEP); - while (!(md_get_setstatus(s->s_setno) & - MD_SET_TOOFEW)) { - bzero((caddr_t)msg_recerr, - sizeof (md_mn_msg_mddb_optrecerr_t)); - lbp = s->s_lbp; - mnlbp = (mddb_mnlb_t *)lbp; - for (i = 0; i < 2; i++) { - li = dep->de_optinfo[i].o_li; - lp = &lbp->lb_locators[li]; - for (j = 0; j < MD_MNMAXSIDES; j++) { - mnslp = - &mnlbp-> - lb_mnsidelocators[j][li]; - if (mnslp->mnl_sideno == - s->s_sideno) - break; - } - if (j == MD_MNMAXSIDES) - continue; - - dn = &lbp-> - lb_drvnm[mnslp->mnl_drvnm_index]; - recerr = &msg_recerr->msg_recerr[i]; - recerr->r_li = li; - recerr->r_flags = - dep->de_optinfo[i].o_flags; - recerr->r_blkno = lp->l_blkno; - recerr->r_mnum = md_getminor(lp->l_dev); - (void) strncpy(recerr->r_driver_name, - dn->dn_data, MD_MAXDRVNM); - } - - /* Release locks */ - single_thread_end(s); - mutex_exit(SETMUTEX(s->s_setno)); - - /* - * Send message to master about optimized - * record failure. After return, master - * should have marked failed replicas - * and sent parse message to slaves causing - * slaves to have fixed up the optimized - * record. - * On return from ksend_message, retry - * the write since this node should have fixed - * the optimized resync records it owns. - */ - rval = mdmn_ksend_message(s->s_setno, - MD_MN_MSG_MDDB_OPTRECERR, - MD_MSGF_NO_BCAST, 0, - (char *)msg_recerr, - sizeof (md_mn_msg_mddb_optrecerr_t), - kres); - if (!MDMN_KSEND_MSG_OK(rval, kres)) { - cmn_err(CE_WARN, "mddb_commitrec: " - "Unable to send optimized " - "resync record failure " - "message to other nodes in " - "diskset %s\n", s->s_setname); - mdmn_ksend_show_error(rval, kres, - "MD_MN_MSG_MDDB_OPTRECERR"); - } - - /* Regrab locks */ - mutex_enter(SETMUTEX(s->s_setno)); - single_thread_start(s); - - /* Start over in case mddb changed */ - for (dbp = s->s_dbp; dbp != NULL; - dbp = dbp->db_next) { - for (dep = dbp->db_firstentry; dep; - dep = dep->de_next) { - if (dep->de_recid == DBID(id)) - break; - } - if (dep) - break; - } - if (dep) { - rbp = dep->de_rb; - rbp->rb_commitcnt++; - uniqtime32(&rbp->rb_timestamp); - /* Generate the crc for this record */ - rec_crcgen(s, dep, rbp); - - /* - * If writeoptrecord succeeds, then - * break out. - */ - if (!(writeoptrecord(s, dep))) - break; - } - if (--retry == 0) { - cmn_err(CE_WARN, "mddb_commitrec: " - "giving up writing optimized " - "resync record for " - "diskset %s, device %s,%d " - "blkno 0x%x, flags 0x%x\n", - s->s_setname, recerr->r_driver_name, - recerr->r_mnum, recerr->r_blkno, - recerr->r_flags); - gave_up++; - break; - } - } - kmem_free(kres, sizeof (md_mn_kresult_t)); - kmem_free(msg_recerr, - sizeof (md_mn_msg_mddb_optrecerr_t)); - - /* Resync record should be fixed - if possible */ - s->s_optwaiterr--; - if (s->s_optwaiterr == 0) { - /* All errors have been handled */ - if (s->s_opthungerr) { - s->s_opthungerr = 0; - cv_broadcast(&s->s_opthungerr_cv); - } - } - single_thread_end(s); - mddb_setexit(s); - if (md_get_setstatus(s->s_setno) & MD_SET_TOOFEW) { - return (MDDB_E_NOTNOW); - } else if (gave_up) { - return (MDDB_E_STALE); - } else { - return (0); - } - } - } else { - /* If set is a traditional or local set */ - if (! (--s->s_optcmtcnt)) { - err = 0; - if (s->s_optwaiterr) { - err = writeretry(s); - s->s_optwaiterr = 0; - if (s->s_opthungerr) { - s->s_opthungerr = 0; - cv_broadcast(&s->s_opthungerr_cv); - } - } - single_thread_end(s); - s->s_opthavelck = 0; - mddb_setexit(s); - if (err) - return (MDDB_E_NOTNOW); - return (0); - } - if (s->s_optwaiterr) { - while (s->s_optwaiterr) { - s->s_opthungerr = 1; - cv_wait(&s->s_opthungerr_cv, - SETMUTEX(s->s_setno)); - } - if (checkstate(s, MDDB_NOPROBE)) { - mddb_setexit(s); - return (MDDB_E_NOTNOW); - } - } - } - - mddb_setexit(s); - return (0); -} - -int -mddb_commitrecs( - mddb_recid_t ids[] -) -{ - mddb_set_t *s; - mddb_db_t *dbp; - mddb_de_ic_t *dep; - mddb_rb32_t *rbp; - mddb_rb32_t *saverbp; - mddb_lb_t *lbp; - int li; - uint_t checksum; - mddb_recid_t *idp; - int err = 0; - set_t setno; - - if (panicstr) - cmn_err(CE_PANIC, "md: mddb: commit not allowed"); - - /* - * scan through and make sure ids are from the same set - */ - setno = DBSET(ids[0]); - for (idp = ids; *idp != NULL; idp++) - ASSERT(DBSET(*idp) == setno); - - s = mddb_setenter(setno, MDDB_MUSTEXIST, NULL); - - if (checkstate(s, MDDB_PROBE)) { - mddb_setexit(s); - return (MDDB_E_NOTNOW); - } - - ASSERT(s->s_lbp != NULL); - err = 0; - - if (! ids[0]) { - mddb_setexit(s); - return (0); - } - - single_thread_start(s); - /* - * scan through and make sure ids all exist - */ - for (idp = ids; *idp != NULL; idp++) { - for (dbp = s->s_dbp; dbp != NULL; dbp = dbp->db_next) { - for (dep = dbp->db_firstentry; dep; - dep = dep->de_next) { - if (dep->de_recid == DBID(*idp)) - break; - } - if (dep != NULL) - break; - } - if (dep == NULL) { - single_thread_end(s); - mddb_setexit(s); - return (MDDB_E_NORECORD); - } - } - - /* - * scan through records fix commit counts and - * zero fiddles and update time stamp and rechecksum record - */ - checksum = 0; - idp = ids; - saverbp = NULL; - while (*idp) { - for (dbp = s->s_dbp; dbp != NULL; dbp = dbp->db_next) { - for (dep = dbp->db_firstentry; dep; - dep = dep->de_next) { - if (dep->de_recid == DBID(*idp)) - break; - } - if (dep != NULL) - break; - } - rbp = dep->de_rb; - ASSERT(! (dep->de_flags & MDDB_F_OPT)); - - getuserdata(setno, dep); - /* Don't do fiddles for CHANGE LOG records */ - if (!(dep->de_flags & MDDB_F_CHANGELOG)) { - checksum ^= rbp->rb_checksum_fiddle; - rbp->rb_checksum_fiddle = 0; - checksum ^= rbp->rb_checksum; - saverbp = rbp; - } - rbp->rb_commitcnt++; - uniqtime32(&rbp->rb_timestamp); - /* Generate the crc for this record */ - rec_crcgen(s, dep, rbp); - - /* Don't do fiddles for CHANGE LOG records */ - if (!(dep->de_flags & MDDB_F_CHANGELOG)) { - checksum ^= rbp->rb_checksum; - } - idp++; - } - - if (saverbp) - saverbp->rb_checksum_fiddle = checksum; - - /* - * If this is a MN set but we are not the master, then we are not - * supposed to update the mddb on disk. So we finish at this point. - */ - if ((setno != MD_LOCAL_SET) && (s->s_lbp->lb_flags & MDDB_MNSET) && - (md_set[setno].s_am_i_master == 0)) { - single_thread_end(s); - mddb_setexit(s); - return (0); - } - - lbp = s->s_lbp; - for (li = 0; li < lbp->lb_loccnt; li++) { - if (! (lbp->lb_locators[li].l_flags & MDDB_F_ACTIVE)) - continue; - - idp = ids; - while (*idp) { - for (dbp = s->s_dbp; dbp != NULL; dbp = dbp->db_next) { - dep = dbp->db_firstentry; - while (dep && (dep->de_recid != DBID(*idp))) - dep = dep->de_next; - if (dep != NULL) - break; - } - rbp = dep->de_rb; - err = wrtblklst(s, (caddr_t)rbp, dep->de_blks, - dep->de_blkcount, li, (mddb_bf_t **)0, - MDDB_WR_ONLY_MASTER); - if (err) - break; - idp++; - } - if (err) - break; - } - if (err) { - if (writeretry(s)) { - single_thread_end(s); - mddb_setexit(s); - return (MDDB_E_NOTNOW); - } - } - single_thread_end(s); - mddb_setexit(s); - return (0); -} - -mddb_recid_t -mddb_makerecid( - set_t setno, - mddb_recid_t id -) -{ - return (MAKERECID(setno, id)); -} - -set_t -mddb_getsetnum( - mddb_recid_t id -) -{ - return (DBSET(id)); -} - -char * -mddb_getsetname( - set_t setno -) -{ - return (((mddb_set_t *)md_set[setno].s_db)->s_setname); -} - -side_t -mddb_getsidenum( - set_t setno -) -{ - if (md_set[setno].s_db) - return (((mddb_set_t *)md_set[setno].s_db)->s_sideno); - return (0); -} - -int -mddb_ownset( - set_t setno -) -{ - if ((md_get_setstatus(setno) & MD_SET_TAGDATA) && md_set[setno].s_db) - return (1); - - if (md_set[setno].s_db && ((mddb_set_t *)md_set[setno].s_db)->s_lbp) - return (1); - - return (0); -} - -/*ARGSUSED*/ -int -getmed_ioctl(mddb_med_parm_t *medpp, int mode) -{ - mddb_set_t *s; - int err = 0; - set_t setno = medpp->med_setno; - md_error_t *ep = &medpp->med_mde; - - mdclrerror(ep); - - if (setno >= md_nsets) - return (mdmderror(ep, MDE_INVAL_UNIT, MD_ADM_MINOR)); - - if (md_snarf_db_set(MD_LOCAL_SET, ep) != 0) - return (0); - - if ((md_get_setstatus(setno) & MD_SET_SNARFED) == 0) - return (mdmddberror(ep, MDE_DB_NOTOWNER, NODEV32, setno)); - - if ((s = mddb_setenter(setno, MDDB_MUSTEXIST, &err)) == NULL) - return (mddbstatus2error(ep, err, NODEV32, setno)); - - medpp->med = s->s_med; /* structure assignment */ - - mddb_setexit(s); - - return (0); -} - -int -setmed_ioctl(mddb_med_parm_t *medpp, int mode) -{ - - mddb_set_t *s; - int err = 0; - set_t setno = medpp->med_setno; - md_error_t *ep = &medpp->med_mde; - - mdclrerror(ep); - - if ((mode & FWRITE) == 0) - return (mdsyserror(ep, EACCES)); - - /* - * This should be the only thing that prevents LOCAL sets from having - * mediators, at least in the kernel, userland needs to have some code - * written. - */ - if (setno == MD_LOCAL_SET) - return (mdmderror(ep, MDE_INVAL_UNIT, MD_ADM_MINOR)); - - if (setno >= md_nsets) - return (mdmderror(ep, MDE_INVAL_UNIT, MD_ADM_MINOR)); - - if (md_snarf_db_set(MD_LOCAL_SET, ep) != 0) - return (0); - - if ((md_get_setstatus(setno) & MD_SET_SNARFED) == 0) - return (mdmddberror(ep, MDE_DB_NOTOWNER, NODEV32, setno)); - - if ((s = mddb_setenter(setno, MDDB_MUSTEXIST, &err)) == NULL) - return (mddbstatus2error(ep, err, NODEV32, setno)); - - s->s_med = medpp->med; /* structure assignment */ - - mddb_setexit(s); - - return (0); -} - -int -updmed_ioctl(mddb_med_upd_parm_t *medpp, int mode) -{ - - mddb_set_t *s; - int err = 0; - set_t setno = medpp->med_setno; - md_error_t *ep = &medpp->med_mde; - - mdclrerror(ep); - - if ((mode & FWRITE) == 0) - return (mdsyserror(ep, EACCES)); - - if (setno >= md_nsets) - return (mdmderror(ep, MDE_INVAL_UNIT, MD_ADM_MINOR)); - - if (md_snarf_db_set(MD_LOCAL_SET, ep) != 0) - return (0); - - if ((md_get_setstatus(setno) & MD_SET_SNARFED) == 0) - return (mdmddberror(ep, MDE_DB_NOTOWNER, NODEV32, setno)); - - if ((s = mddb_setenter(setno, MDDB_MUSTEXIST, &err)) == NULL) - return (mddbstatus2error(ep, err, NODEV32, setno)); - - single_thread_start(s); - (void) upd_med(s, "updmed_ioctl()"); - single_thread_end(s); - - mddb_setexit(s); - - return (0); -} - -int -take_set(mddb_config_t *cp, int mode) -{ - int err = 0; - mddb_med_upd_parm_t medup; - set_t setno = cp->c_setno; - md_error_t *ep = &cp->c_mde; - int snarf_ok = 0; - - if (md_get_setstatus(setno) & MD_SET_SNARFED) - return (0); - - err = mddb_configure(MDDB_GETDEV, cp); - if (! err && mdisok(ep)) { - if (md_snarf_db_set(setno, ep) != 0) - goto out; - snarf_ok = 1; - } - - /* - * Clear replicated import flag since this is - * used during the take of a diskset with - * previously unresolved replicated disks. - */ - if (md_get_setstatus(setno) & - MD_SET_REPLICATED_IMPORT) { - md_clr_setstatus(setno, MD_SET_REPLICATED_IMPORT); - } - - if (! err && mdisok(ep)) { - if (! cp->c_flags) { - medup.med_setno = setno; - mdclrerror(&medup.med_mde); - - err = updmed_ioctl(&medup, mode); - if (! mdisok(&medup.med_mde)) - (void) mdstealerror(ep, &medup.med_mde); - } - } - -out: - /* - * In the case that the snarf failed, the diskset is - * left with s_db set, but s_lbp not set. The node is not - * an owner of the set and won't be allowed to release the - * diskset in order to cleanup. With s_db set, any call to the - * GETDEV or ENDDEV ioctl (done by libmeta routine metareplicalist) - * will cause the diskset to be loaded. So, cleanup the diskset so - * that an inadvertent start of the diskset doesn't happen later. - */ - if ((snarf_ok == 0) && md_set[setno].s_db && - (((mddb_set_t *)md_set[setno].s_db)->s_lbp == 0)) { - mutex_enter(&mddb_lock); - mddb_unload_set(setno); - mutex_exit(&mddb_lock); - } - return (err); -} - -/*ARGSUSED*/ -int -release_set(mddb_config_t *cp, int mode) -{ - int err = 0; - set_t setno = cp->c_setno; - md_error_t *ep = &cp->c_mde; - - /* - * Data integrity check - */ - if (setno >= md_nsets) - return (mdmderror(ep, MDE_INVAL_UNIT, MD_ADM_MINOR)); - - rw_enter(&md_unit_array_rw.lock, RW_WRITER); - md_haltsnarf_enter(setno); - /* - * Attempt to mark set as HOLD. If it is marked as HOLD, this means - * that the mirror code is currently searching all mirrors for a - * errored component that needs a hotspare. While this search is in - * progress, we cannot release the set and thgerefore we return EBUSY. - * Once we have set HOLD, the mirror function (check_4_hotspares) will - * block before the search until the set is released. - */ - if (md_holdset_testandenter(setno) != 0) { - md_haltsnarf_exit(setno); - rw_exit(&md_unit_array_rw.lock); - return (EBUSY); - } - - if ((err = md_halt_set(setno, MD_HALT_ALL)) == 0) - err = mddb_configure(MDDB_RELEASESET, cp); - - md_holdset_exit(setno); - md_haltsnarf_exit(setno); - rw_exit(&md_unit_array_rw.lock); - - if (! err && mdisok(ep)) { - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_RELEASE, SVM_TAG_SET, setno, - NODEV64); - } - - return (err); -} - -int -gettag_ioctl(mddb_dtag_get_parm_t *dtgpp, int mode) -{ - mddb_set_t *s; - int err = 0; - mddb_dtag_lst_t *dtlp; - set_t setno = dtgpp->dtgp_setno; - md_error_t *ep = &dtgpp->dtgp_mde; - - mdclrerror(ep); - - if ((mode & FREAD) == 0) - return (mdsyserror(ep, EACCES)); - - if (setno >= md_nsets) - return (mdmderror(ep, MDE_INVAL_UNIT, MD_ADM_MINOR)); - - if (md_snarf_db_set(MD_LOCAL_SET, ep) != 0) - return (0); - - if ((s = mddb_setenter(setno, MDDB_NOINIT, &err)) == NULL) - return (mddbstatus2error(ep, err, NODEV32, setno)); - - /* - * Data tags not supported on MN sets so return invalid operation. - * This ioctl could be called before the mddb has been read in so - * the set status may not yet be set to MNSET, so code following - * this check must handle a MN diskset properly. - */ - if (md_get_setstatus(setno) & MD_SET_MNSET) { - mddb_setexit(s); - return (mderror(ep, MDE_INVAL_MNOP)); - } - - /* s_dtlp is NULL for MN diskset */ - dtlp = s->s_dtlp; - while (dtlp != NULL) { - if (dtgpp->dtgp_dt.dt_id == 0 || - dtgpp->dtgp_dt.dt_id == dtlp->dtl_dt.dt_id) { - bcopy((caddr_t)&dtlp->dtl_dt, (caddr_t)&dtgpp->dtgp_dt, - sizeof (mddb_dtag_t)); - break; - } - dtlp = dtlp->dtl_nx; - } - - /* Walked the whole list and id not found, return error */ - if (dtlp == (mddb_dtag_lst_t *)NULL) { - mddb_setexit(s); - return (mdmddberror(ep, MDE_DB_NOTAG, NODEV32, setno)); - } - - mddb_setexit(s); - - return (0); -} - -int -usetag_ioctl(mddb_dtag_use_parm_t *dtupp, int mode) -{ - mddb_set_t *s; - int err = 0; - mddb_config_t *cp; - mddb_ri_t *trip = NULL; - mddb_dtag_t *dtagp = NULL; - set_t setno = dtupp->dtup_setno; - md_error_t *ep = &dtupp->dtup_mde; - - mdclrerror(ep); - - if ((mode & FWRITE) == 0) - return (mdsyserror(ep, EACCES)); - - if (setno >= md_nsets) - return (mdmderror(ep, MDE_INVAL_UNIT, MD_ADM_MINOR)); - - if (dtupp->dtup_id < 0) - return (mdsyserror(ep, EINVAL)); - else if (dtupp->dtup_id == 0) - return (mdmddberror(ep, MDE_DB_NOTAG, NODEV32, setno)); - - if (md_snarf_db_set(MD_LOCAL_SET, ep) != 0) - return (0); - - if ((md_get_setstatus(setno) & MD_SET_TAGDATA) == 0) - return (mdmddberror(ep, MDE_DB_NTAGDATA, NODEV32, setno)); - - if ((s = mddb_setenter(setno, MDDB_NOINIT, &err)) == NULL) - return (mddbstatus2error(ep, err, NODEV32, setno)); - - /* - * Data tags not supported on MN sets so return invalid operation. - * This ioctl could be called before the mddb has been read in so - * the set status may not yet be set to MNSET, so code following - * this check must handle a MN diskset properly. - */ - if (md_get_setstatus(setno) & MD_SET_MNSET) { - mddb_setexit(s); - return (mderror(ep, MDE_INVAL_MNOP)); - } - - /* Validate and find the id requested - nothing found if MN diskset */ - if ((dtagp = dtl_findl(s, dtupp->dtup_id)) == NULL) { - mddb_setexit(s); - return (mdmddberror(ep, MDE_DB_NOTAG, NODEV32, setno)); - } - - /* Usetag is only valid when more than one tag exists */ - if (dtl_cntl(s) < 2) { - mddb_setexit(s); - return (mdmddberror(ep, MDE_DB_NTAGDATA, NODEV32, setno)); - } - - /* Put the selected tag in place */ - dt_setup(s, dtagp); - - cp = kmem_zalloc(sizeof (mddb_config_t), KM_SLEEP); - - /* Save the hint information */ - trip = save_rip(s); - - cp->c_timestamp = s->s_ident.createtime; /* struct assignment */ - cp->c_setno = setno; - cp->c_sideno = s->s_sideno; - (void) strncpy(cp->c_setname, s->s_setname, MD_MAX_SETNAME); - cp->c_setname[MD_MAX_SETNAME] = '\0'; - cp->c_med = s->s_med; /* struct assignment */ - - mddb_setexit(s); - - s = NULL; - - /* shorthand */ - setno = cp->c_setno; - - /* Let unload know not to free the tag */ - md_set_setstatus(setno, MD_SET_KEEPTAG); - - /* Release the set */ - if (err = release_set(cp, mode)) - goto out; - - if (! mdisok(&cp->c_mde)) { - (void) mdstealerror(ep, &cp->c_mde); - err = 1; - goto out; - } - - /* Re-init set using the saved mddb_config_t structure */ - if ((s = mddb_setenter(setno, MDDB_NOINIT, &err)) == NULL) { - if ((s = init_set(cp, MDDB_NOINIT, &err)) == NULL) { - err = mddbstatus2error(ep, err, NODEV32, setno); - goto out; - } - } - - ASSERT(s->s_rip == (mddb_ri_t *)NULL); - - /* use the saved rip structure */ - s->s_rip = trip; - trip = (mddb_ri_t *)NULL; - - /* Let the take code know a tag is being used */ - md_set_setstatus(setno, MD_SET_USETAG); - - mddb_setexit(s); - - s = NULL; - - /* Take the set */ - if (err = take_set(cp, mode)) - goto out; - - if (! mdisok(&cp->c_mde)) - (void) mdstealerror(ep, &cp->c_mde); - -out: - md_clr_setstatus(setno, (MD_SET_USETAG | MD_SET_KEEPTAG)); - - kmem_free(cp, sizeof (mddb_config_t)); - - if (trip) - free_rip(&trip); - - if (s) - mddb_setexit(s); - - return (err); -} - -int -accept_ioctl(mddb_accept_parm_t *accpp, int mode) -{ - mddb_set_t *s; - int err = 0; - mddb_config_t *cp; - mddb_ri_t *trip = NULL; - set_t setno = accpp->accp_setno; - md_error_t *ep = &accpp->accp_mde; - - mdclrerror(ep); - - if ((mode & FWRITE) == 0) - return (mdsyserror(ep, EACCES)); - - if (setno >= md_nsets) - return (mdmderror(ep, MDE_INVAL_UNIT, MD_ADM_MINOR)); - - if (md_snarf_db_set(MD_LOCAL_SET, ep) != 0) - return (0); - - if ((md_get_setstatus(setno) & MD_SET_ACCOK) == 0) - return (mdmddberror(ep, MDE_DB_ACCNOTOK, NODEV32, setno)); - - if ((s = mddb_setenter(setno, MDDB_MUSTEXIST, &err)) == NULL) - return (mddbstatus2error(ep, err, NODEV32, setno)); - - /* - * Data tags not supported on MN sets so return invalid operation. - * mddb is guaranteed to be incore at this point, so this - * check will catch all MN disksets. - */ - if (md_get_setstatus(setno) & MD_SET_MNSET) { - mddb_setexit(s); - return (mderror(ep, MDE_INVAL_MNOP)); - } - - cp = kmem_zalloc(sizeof (mddb_config_t), KM_SLEEP); - - trip = save_rip(s); - - cp->c_timestamp = s->s_ident.createtime; /* struct assignment */ - cp->c_setno = setno; - cp->c_sideno = s->s_sideno; - (void) strncpy(cp->c_setname, s->s_setname, MD_MAX_SETNAME); - cp->c_setname[MD_MAX_SETNAME] = '\0'; - cp->c_med = s->s_med; /* struct assignment */ - - /* Tag the data */ - if (err = set_dtag(s, ep)) { - err = mdsyserror(ep, err); - goto out; - } - - /* If we had a BADTAG, it will be re-written, so clear the bit. */ - if (md_get_setstatus(setno) & MD_SET_BADTAG) - md_clr_setstatus(setno, MD_SET_BADTAG); - - if (err = dt_write(s)) { - err = mdsyserror(ep, err); - goto out; - } - - mddb_setexit(s); - - s = NULL; - - /* shorthand */ - setno = cp->c_setno; - - /* Clear the keeptag */ - md_clr_setstatus(setno, MD_SET_KEEPTAG); - - /* Release the set */ - if (err = release_set(cp, mode)) - goto out; - - if (! mdisok(&cp->c_mde)) { - (void) mdstealerror(ep, &cp->c_mde); - goto out; - } - - /* Re-init set using the saved mddb_config_t structure */ - if ((s = mddb_setenter(setno, MDDB_NOINIT, &err)) == NULL) { - if ((s = init_set(cp, MDDB_NOINIT, &err)) == NULL) { - err = mddbstatus2error(ep, err, NODEV32, setno); - goto out; - } - } - - ASSERT(s->s_rip == (mddb_ri_t *)NULL); - - /* Free the allocated rip structure */ - if (s->s_rip != (mddb_ri_t *)NULL) - free_rip(&s->s_rip); - - /* use the saved rip structure */ - s->s_rip = trip; - trip = (mddb_ri_t *)NULL; - - /* Let the set init code know an accept is in progress */ - md_set_setstatus(setno, MD_SET_ACCEPT); - - mddb_setexit(s); - - s = NULL; - - /* Take the set */ - if (err = take_set(cp, mode)) - goto out; - - if (! mdisok(&cp->c_mde)) - (void) mdstealerror(ep, &cp->c_mde); - -out: - md_clr_setstatus(setno, (MD_SET_ACCOK | MD_SET_ACCEPT)); - - kmem_free(cp, sizeof (mddb_config_t)); - - if (trip) - free_rip(&trip); - - if (s) - mddb_setexit(s); - - return (err); -} - -/* - * mddb_getinvlb_devid - cycles through the locator block and determines - * if the device id's for any of the replica disks are invalid. - * If so, it returns the diskname in the ctdptr. - * RETURN - * -1 Error - * cnt number of invalid device id's - */ -int -mddb_getinvlb_devid( - set_t setno, - int count, - int size, - char **ctdptr -) -{ - mddb_set_t *s; - int err = 0; - mddb_lb_t *lbp; - int li; - mddb_did_blk_t *did_blk; - mddb_did_info_t *did_info; - int len; - int cnt = 0; - char *cptr; - md_name_suffix *sn; - int i, dont_add_it; - char *tmpctd, *diskname; - char *tmpname; - - cptr = *ctdptr; - if ((s = mddb_setenter(setno, MDDB_MUSTEXIST, &err)) == NULL) { - return (-1); - } - - single_thread_start(s); - lbp = s->s_lbp; - - if (lbp->lb_setno != setno) { - single_thread_end(s); - mddb_setexit(s); - return (-1); - } - - /* check for lb being devid style */ - if (lbp->lb_flags & MDDB_DEVID_STYLE) { - did_blk = s->s_did_icp->did_ic_blkp; - for (li = 0; li < lbp->lb_loccnt; li++) { - did_info = &(did_blk->blk_info[li]); - /* Only if devid exists and isn't valid */ - if ((did_info->info_flags & MDDB_DID_EXISTS) && - !(did_info->info_flags & MDDB_DID_VALID)) { - /* - * if we count more invalid did's than - * was passed in there's an error somewhere - */ - if (cnt++ > count) { - single_thread_end(s); - mddb_setexit(s); - return (-1); - } - - /* - * Future note: Need to do something here - * for the MN diskset case when device ids - * are supported in disksets. - * Can't add until merging devids_in_diskset - * code into code base. - */ - - sn = &s->s_lnp->ln_suffixes[0][li]; - /* - * check to make sure length of device name is - * not greater than computed first time through - */ - len = sn->suf_len; - if (len > size) { - single_thread_end(s); - mddb_setexit(s); - return (-1); - } - tmpctd = *ctdptr; - /* strip off slice part */ - diskname = md_strdup(sn->suf_data); - tmpname = strrchr(diskname, 's'); - *tmpname = '\0'; - dont_add_it = 0; - /* look to see if diskname is already in list */ - for (i = 0; i < (cnt-1); i++) { - if (strcmp(diskname, tmpctd) == 0) { - /* already there, don't add */ - dont_add_it = 1; - break; - } - /* point to next diskname in list */ - tmpctd += size; - } - if (dont_add_it == 0) { - /* add diskname to list */ - (void) strcpy(cptr, diskname); - cptr += size; - } - kmem_free(diskname, strlen(sn->suf_data) + 1); - } - } - } - /* null terminate the list */ - *cptr = '\0'; - /* - * need to save the new pointer so that calling routine can continue - * to add information onto the end. - */ - *ctdptr = cptr; - single_thread_end(s); - mddb_setexit(s); - return (cnt); -} - -/* - * mddb_validate_lb - count the number of lb's with invalid device id's. Keep - * track of length of longest devicename. - * RETURN - * -1 error - * cnt number of lb's with invalid devid's - */ -int -mddb_validate_lb( - set_t setno, - int *rmaxsz -) -{ - mddb_set_t *s; - int err = 0; - mddb_lb_t *lbp; - int li; - mddb_did_blk_t *did_blk; - mddb_did_info_t *did_info; - int len; - int cnt = 0; - - if ((s = mddb_setenter(setno, MDDB_MUSTEXIST, &err)) == NULL) - return (-1); - - single_thread_start(s); - lbp = s->s_lbp; - - if (lbp->lb_setno != setno) { - single_thread_end(s); - mddb_setexit(s); - return (-1); - } - - /* lb must be in devid style */ - if ((lbp->lb_flags & MDDB_DEVID_STYLE) == 0) - goto mvl_out; - - did_blk = s->s_did_icp->did_ic_blkp; - for (li = 0; li < lbp->lb_loccnt; li++) { - char *minor_name; - mddb_locator_t *lp; - dev_t ddi_dev; - ddi_devid_t devid; - ddi_devid_t rtn_devid = NULL; - int get_rval; - - did_info = &(did_blk->blk_info[li]); - if (((did_info->info_flags & MDDB_DID_EXISTS) == 0) || - (did_info->info_flags & MDDB_DID_VALID)) - continue; - - /* Here we know, did exists but isn't valid */ - - lp = &lbp->lb_locators[li]; - ddi_dev = expldev(lp->l_dev); - get_rval = mddb_devid_get(s, li, &devid, &minor_name); - ASSERT(get_rval == 1); - if ((ddi_lyr_get_devid(ddi_dev, &rtn_devid) == DDI_SUCCESS) && - (ddi_devid_compare(rtn_devid, devid) == 0)) { - did_info->info_flags = MDDB_DID_VALID | - MDDB_DID_EXISTS | MDDB_DID_UPDATED; - } else { - cnt++; - /* - * Future note: Need to do something here - * for the MN diskset case when device ids - * are supported in disksets. - * Can't add until merging devids_in_diskset - * code into code base. - */ - len = (&s->s_lnp->ln_suffixes[0][li])-> suf_len; - if (*rmaxsz < len) - *rmaxsz = len; - } - if (rtn_devid != NULL) - ddi_devid_free(rtn_devid); - } - -mvl_out: - - if (push_lb(s) != 0) - cnt = -1; - (void) upd_med(s, "mddb_validate_lb(0)"); - single_thread_end(s); - mddb_setexit(s); - return (cnt); -} - -int -check_active_locators() -{ - mddb_set_t *s; - mddb_lb_t *lbp; - int li; - int active = 0; - - mutex_enter(&mddb_lock); - /* there is nothing here..so we can unload */ - if ((mddb_set_t *)md_set[MD_LOCAL_SET].s_db == NULL) { - mutex_exit(&mddb_lock); - return (0); - } - s = (mddb_set_t *)md_set[MD_LOCAL_SET].s_db; - lbp = s->s_lbp; - if (lbp == NULL) { - mutex_exit(&mddb_lock); - return (0); - } - - for (li = 0; li < lbp->lb_loccnt; li++) { - mddb_locator_t *lp = &lbp->lb_locators[li]; - if (lp->l_flags & MDDB_F_ACTIVE) { - active = 1; - break; - } - } - mutex_exit(&mddb_lock); - return (active); -} - -/* - * regetoptrecord: - * -------------- - * Update the in-core optimized resync record contents by re-reading the - * record from the on-disk metadb. - * The contents of the resync record will be overwritten by calling this - * routine. This means that callers that require the previous contents to - * be preserved must save the data before calling this routine. - * Return values: - * 0 - successfully read in resync record from a mddb - * 1 - failure. Unable to read resync record from either mddb. - */ -static int -regetoptrecord( - mddb_set_t *s, - mddb_de_ic_t *dep -) -{ - mddb_lb_t *lbp; - mddb_locator_t *lp; - mddb_rb32_t *rbp, *crbp; - int li; - int i; - int err = 0; - size_t recsize; - -#if defined(_ILP32) && !defined(lint) - ASSERT(sizeof (mddb_rb_t) == sizeof (mddb_rb32_t)); -#endif - - recsize = dep->de_recsize; - crbp = (mddb_rb32_t *)kmem_zalloc(recsize, KM_SLEEP); - - single_thread_start(s); - rbp = dep->de_rb; - - dep->de_optinfo[0].o_flags |= MDDB_F_EDATA; - dep->de_optinfo[1].o_flags |= MDDB_F_EDATA; - - lbp = s->s_lbp; - - for (i = 0; i < 2; i++) { - if (! (dep->de_optinfo[i].o_flags & MDDB_F_ACTIVE)) - continue; - li = dep->de_optinfo[i].o_li; - lp = &lbp->lb_locators[li]; - - if (! (lp->l_flags & MDDB_F_ACTIVE) || - (lp->l_flags & MDDB_F_EMASTER)) - continue; - - /* - * re-read the optimized resync record with failfast set - * since a failed disk could lead to a very long wait. - */ - err = readblklst(s, (caddr_t)rbp, dep->de_blks, - dep->de_blkcount, li, B_FAILFAST); - - if (err) - continue; - - if (rbp->rb_magic != MDDB_MAGIC_RB) - continue; - - if (revchk(MDDB_REV_RB, rbp->rb_revision)) - continue; - - /* Check the crc for this record */ - if (rec_crcchk(s, dep, rbp)) { - continue; - } - dep->de_optinfo[i].o_flags = MDDB_F_ACTIVE; - - if (rbp == crbp) { - if (rbp->rb_checksum != crbp->rb_checksum) - dep->de_optinfo[1].o_flags |= MDDB_F_EDATA; - break; - } - rbp = crbp; - } - - single_thread_end(s); - - if (rbp == crbp) { - rbp->rb_private = 0; - kmem_free((caddr_t)crbp, recsize); - return (0); - } - uniqtime32(&rbp->rb_timestamp); - /* Generate the crc for this record */ - rec_crcgen(s, dep, rbp); - kmem_free((caddr_t)crbp, recsize); - return (1); -} - -/* - * mddb_reread_rr: - * Re-read the resync record from the on-disk copy. This is required for - * multi-node support so that a new mirror-owner can determine if a resync - * operation is required to guarantee data integrity. - * - * Arguments: - * setno Associated set - * id Resync record ID - * - * Return Value: - * 0 successful reread - * -1 invalid set (not multi-node or non-existant) - * >0 metadb state invalid, failed to reread - */ -int -mddb_reread_rr( - set_t setno, - mddb_recid_t id -) -{ - mddb_set_t *s; - int err = 0; - mddb_db_t *dbp; - mddb_de_ic_t *dep; - - if (setno >= md_nsets) - return (-1); - - if ((s = mddb_setenter(setno, MDDB_MUSTEXIST, &err)) == NULL) - return (-1); - - if ((setno == MD_LOCAL_SET) || !(s->s_lbp->lb_flags & MDDB_MNSET)) { - mddb_setexit(s); - return (-1); - } - - for (dbp = s->s_dbp; dbp != NULL; dbp = dbp->db_next) { - dep = dbp->db_firstentry; - while (dep && (dep->de_recid != DBID(id))) - dep = dep->de_next; - if (dep != NULL) - break; - } - - if (dep != NULL) { - err = regetoptrecord(s, dep); - } else { - err = -1; - } - mddb_setexit(s); - return (err); -} - -/* - * Set owner associated with MN optimized resync record. - * - * Optimized records have an owner node associated with them in - * a MN diskset. The owner is only set on a node that is actively - * writing to that record. The other nodes will show that record - * as having an invalid owner. The owner for an optimized record - * is used during fixoptrecord to determine which node should - * write out the record when the replicas associated with that - * optimized record have been changed. - * - * Called directly from mirror driver and not from an ioctl. - * - * Returns - * NULL if successful. - * MDDB_E_NORECORD if record not found. - */ -int -mddb_setowner( - mddb_recid_t id, - md_mn_nodeid_t owner -) -{ - mddb_set_t *s; - mddb_db_t *dbp; - mddb_de_ic_t *dep; - int found = 0; - - - if (DBSET(id) >= md_nsets) - return (MDDB_E_NORECORD); - - if ((s = mddb_setenter(DBSET(id), MDDB_MUSTEXIST, NULL)) == NULL) - return (MDDB_E_NORECORD); - - id = DBID(id); - for (dbp = s->s_dbp; dbp != NULL; dbp = dbp->db_next) { - for (dep = dbp->db_firstentry; - dep != NULL; dep = dep->de_next) { - if (dep->de_recid != id) - continue; - dep->de_owner_nodeid = owner; - found = 1; - break; - } - if (found) - break; - } - - mddb_setexit(s); - - if (!found) { - return (MDDB_E_NORECORD); - } - - return (NULL); -} - -/* - * mddb_parse re-reads portions of the mddb from disk given a list - * of good replicas to read from and flags describing - * which portion of the mddb to read in. - * - * Used in a MN diskset when the master has made a change to some part - * of the mddb and wants to relay this information to the slaves. - */ -int -mddb_parse(mddb_parse_parm_t *mpp) -{ - mddb_set_t *s; - int err = 0; - mddb_locator_t *lp, *old_lp; - mddb_lb_t *lbp, *old_lbp; - int rval = 0; - int i, li; - int found_good_one = 0; - mddb_ln_t *lnp; - mddb_block_t ln_blkcnt; - md_error_t *ep = &mpp->c_mde; - - if (mpp->c_setno >= md_nsets) - return (EINVAL); - - if (md_snarf_db_set(MD_LOCAL_SET, ep) != 0) - return (0); - - if ((s = mddb_setenter(mpp->c_setno, MDDB_MUSTEXIST, &err)) == NULL) { - return (mddbstatus2error(ep, err, NODEV32, mpp->c_setno)); - } - - if (!(MD_MNSET_SETNO(mpp->c_setno))) { - mddb_setexit_no_parse(s); - return (EINVAL); - } - - /* - * Master node initiated this request, so there's no work for - * the master node to do. - */ - if (md_set[mpp->c_setno].s_am_i_master) { - mddb_setexit_no_parse(s); - return (rval); - } - - single_thread_start(s); - - if (mpp->c_parse_flags & MDDB_PARSE_LOCBLK) { - lbp = 0; - for (i = 0; i < MDDB_NLB; i++) { - /* Walk through master's active list */ - if (!(mpp->c_lb_flags[i] & MDDB_F_ACTIVE)) - continue; - if (s->s_mbiarray[i] == NULL) - continue; - - /* Assumes master blocks are already setup */ - if (lbp == (mddb_lb_t *)NULL) { - lbp = (mddb_lb_t *)kmem_zalloc( - dbtob(MDDB_MNLBCNT), KM_SLEEP); - } - err |= readblks(s, (caddr_t)lbp, 0, lbp->lb_blkcnt, i); - - if (err) - continue; - - if (lbp->lb_magic != MDDB_MAGIC_LB) - continue; - if (lbp->lb_blkcnt != MDDB_MNLBCNT) - continue; - if (revchk(MDDB_REV_MNLB, lbp->lb_revision)) - continue; - if (crcchk(lbp, &lbp->lb_checksum, dbtob(MDDB_MNLBCNT), - NULL)) - continue; - if (lbp->lb_setno != s->s_setno) - continue; - /* - * a commit count of zero means this locator has - * been deleted - */ - if (lbp->lb_commitcnt == 0) { - continue; - } - /* Found a good locator - keep it */ - found_good_one = 1; - break; - } - - /* - * If found a good copy of the mddb, then read it into - * this node's locator block. Fix up the set's s_mbiarray - * pointer (master block incore array pointer) to be - * in sync with the newly read in locator block. If a - * new mddb was added, read in the master blocks associated - * with the new mddb. If an mddb was deleted, free the - * master blocks associated with deleted mddb. - */ - if (found_good_one) { - /* Compare old and new view of mddb locator blocks */ - old_lbp = s->s_lbp; - for (li = 0; li < lbp->lb_loccnt; li++) { - int mn_set; - - lp = &lbp->lb_locators[li]; - old_lp = &old_lbp->lb_locators[li]; - - /* If old and new views match, continue */ - if ((lp->l_flags & MDDB_F_ACTIVE) == - (old_lp->l_flags & MDDB_F_ACTIVE)) - continue; - - if (lp->l_flags & MDDB_F_ACTIVE) { - /* - * If new mddb has been added - delete - * old mbiarray and get new one. - * - * When devids are supported, will - * need to get dev from devid. - */ - if (s->s_mbiarray[li]) { - free_mbipp(&s->s_mbiarray[li]); - } - /* - * If getmasters fails, getmasters - * will set appropriate error flags. - */ - s->s_mbiarray[li] = getmasters(s, - md_expldev(lp->l_dev), lp->l_blkno, - (uint_t *)&(lp->l_flags), &mn_set); - } else if (lp->l_flags & MDDB_F_DELETED) { - /* - * If old one has been deleted - - * delete old mbiarray. - */ - if (s->s_mbiarray[li]) { - free_mbipp(&s->s_mbiarray[li]); - } - } - } - - /* Free this node's old view of mddb locator blocks */ - kmem_free((caddr_t)s->s_lbp, - dbtob(s->s_lbp->lb_blkcnt)); - s->s_lbp = lbp; - } else { - if (lbp) - kmem_free(lbp, dbtob(MDDB_MNLBCNT)); - } - } - - if (mpp->c_parse_flags & MDDB_PARSE_LOCNM) { - lnp = s->s_lnp; - lbp = s->s_lbp; - ln_blkcnt = lbp->lb_lnblkcnt; - s->s_lnp = NULL; /* readlocnames does this anyway */ - for (li = 0; li < lbp->lb_loccnt; li++) { - lp = &lbp->lb_locators[li]; - - if ((! (lp->l_flags & MDDB_F_ACTIVE)) || - (lp->l_flags & MDDB_F_EMASTER)) - continue; - - /* Successfully read the locator names */ - if (readlocnames(s, li) == 0) - break; - } - - if (li == lbp->lb_loccnt) { - /* Did not successfully read locnames; restore lnp */ - s->s_lnp = lnp; - } else { - /* readlocnames successful, free old struct */ - kmem_free((caddr_t)lnp, dbtob(ln_blkcnt)); - } - } - - if (mpp->c_parse_flags & MDDB_PARSE_OPTRECS) { - mddb_de_ic_t *dep, *tdep, *first_dep, *dep2; - mddb_db_t *dbp; - mddb_db32_t *db32p; - mddb_de32_t *de32p, *de32p2; - int writeout; - - lbp = s->s_lbp; - /* - * Walk through directory block and directory entry incore - * linked list looking for optimized resync records. - * For each opt record found, re-read in directory block. - * The directoy block consists of a number of directory - * entries. The directory entry for this opt record will - * describe which 2 mddbs actually contain the resync record - * since it could have been relocated by the master node - * due to mddb failure or mddb deletion. If this node - * is the record owner for this opt record, then write out - * the record to the 2 mddbs listed in the directory entry - * if the mddbs locations are different than previously known. - */ - for (dbp = s->s_dbp; dbp != NULL; dbp = dbp->db_next) { - for (dep = dbp->db_firstentry; dep; - dep = dep->de_next) { - /* Found an opt record */ - if (dep->de_flags & MDDB_F_OPT) - break; - } - /* If no opt records found, go to next dbp */ - if (dep == NULL) - continue; - - /* - * Reread directory block from disk since - * master could have rewritten in during fixoptrecord. - */ - db32p = (mddb_db32_t *)kmem_zalloc(MDDB_BSIZE, - KM_SLEEP); - create_db32rec(db32p, dbp); - for (li = 0; li < lbp->lb_loccnt; li++) { - lp = &lbp->lb_locators[li]; - - if ((! (lp->l_flags & MDDB_F_ACTIVE)) || - (lp->l_flags & MDDB_F_EMASTER)) - continue; - - err = readblks(s, (caddr_t)db32p, - db32p->db32_blknum, 1, li); - if (err) - continue; - - /* Reverify db; go to next mddb if bad */ - if ((db32p->db32_magic != MDDB_MAGIC_DB) || - (revchk(MDDB_REV_DB, - db32p->db32_revision)) || - (crcchk(db32p, &db32p->db32_checksum, - MDDB_BSIZE, NULL))) { - continue; - } else { - break; - } - } - /* - * If all mddbs are unavailable then panic since - * this slave cannot be allowed to continue out-of-sync - * with the master node. Since the optimized resync - * records are written by all nodes, all nodes must - * stay in sync with the master. - * - * This also handles the case when all storage - * connectivity to a slave node has failed. The - * slave node will send an MDDB_OPTRECERR message to - * the master node when the slave node has been unable - * to write an optimized resync record to both - * designated mddbs. After the master has fixed the - * optimized records to be on available mddbs, the - * MDDB_PARSE message (with the flag MDDB_PARSE_OPTRECS) - * is sent to all slave nodes. If a slave node is - * unable to access any mddb in order to read in the - * relocated optimized resync record, then the slave - * node must panic. - */ - if (li == lbp->lb_loccnt) { - kmem_free((caddr_t)db32p, MDDB_BSIZE); - cmn_err(CE_PANIC, "md: mddb: Node unable to " - "access any SVM state database " - "replicas for diskset %s\n", s->s_setname); - } - /* - * Setup temp copy of linked list of de's. - * Already have an incore copy, but need to walk - * the directory entry list contained in the - * new directory block that was just read in above. - * After finding the directory entry of an opt record - * by walking the incore list, find the corresponding - * entry in the temporary list and then update - * the incore directory entry record with - * the (possibly changed) mddb location stored - * for the optimized resync records. - */ - de32p = (mddb_de32_t *) - ((void *) ((caddr_t) - (&db32p->db32_firstentry) - + sizeof (db32p->db32_firstentry))); - tdep = (mddb_de_ic_t *) - kmem_zalloc(sizeof (mddb_de_ic_t) - - sizeof (mddb_block_t) + - sizeof (mddb_block_t) * - de32p->de32_blkcount, KM_SLEEP); - de32tode(de32p, tdep); - first_dep = tdep; - while (de32p && de32p->de32_next) { - de32p2 = nextentry(de32p); - dep2 = (mddb_de_ic_t *)kmem_zalloc( - sizeof (mddb_de_ic_t) - - sizeof (mddb_block_t) + - sizeof (mddb_block_t) * - de32p2->de32_blkcount, KM_SLEEP); - de32tode(de32p2, dep2); - tdep->de_next = dep2; - tdep = dep2; - de32p = de32p2; - } - - /* Now, walk the incore directory entry list */ - for (dep = dbp->db_firstentry; dep; - dep = dep->de_next) { - if (! (dep->de_flags & MDDB_F_OPT)) - continue; - /* - * Found an opt record in the incore copy. - * Find the corresponding entry in the temp - * list. If anything has changed in the - * opt record info between the incore copy - * and the temp copy, update the incore copy - * and set a flag to writeout the opt record - * to the new mddb locations. - */ - for (tdep = first_dep; tdep; - tdep = tdep->de_next) { - if (dep->de_recid == tdep->de_recid) { - writeout = 0; - /* Check first mddb location */ - if ((dep->de_optinfo[0].o_li != - tdep->de_optinfo[0].o_li) || - (dep->de_optinfo[0]. - o_flags != tdep->de_optinfo - [0].o_flags)) { - dep->de_optinfo[0] = - tdep->de_optinfo[0]; - writeout = 1; - } - /* Check second mddb location */ - if ((dep->de_optinfo[1].o_li != - tdep->de_optinfo[1].o_li) || - (dep->de_optinfo[1]. - o_flags != tdep->de_optinfo - [1].o_flags)) { - dep->de_optinfo[1] = - tdep->de_optinfo[1]; - writeout = 1; - } - /* - * Record owner should rewrite - * it - */ - if ((writeout) && - (dep->de_owner_nodeid == - md_set[mpp->c_setno]. - s_nodeid)) - (void) writeoptrecord(s, - dep); - break; - } - } - } - /* - * Update the incore checksum information for this - * directory block to match the newly read in checksum. - * This should have only changed if the incore and - * temp directory entries differed, but it takes - * more code to do the check than to just update - * the information everytime. - */ - dbp->db_checksum = db32p->db32_checksum; - - /* Now free everything */ - tdep = first_dep; - while (tdep) { - dep2 = tdep->de_next; - kmem_free((caddr_t)tdep, - sizeofde(tdep)); - tdep = dep2; - } - kmem_free((caddr_t)db32p, MDDB_BSIZE); - } - rval = 0; - } -out: - single_thread_end(s); - mddb_setexit_no_parse(s); - return (rval); -} - -int -mddb_block(mddb_block_parm_t *mbp) -{ - mddb_set_t *s; - int err = 0; - md_error_t *ep = &mbp->c_mde; - - if (mbp->c_setno >= md_nsets) - return (EINVAL); - - /* - * If the new_master flag is set for this setno we are in the middle - * of a reconfig cycle, and blocking or unblocking is not needed. - * Hence we can return success immediately - */ - if (md_get_setstatus(mbp->c_setno) & MD_SET_MN_NEWMAS_RC) { - return (0); - } - - if (md_snarf_db_set(MD_LOCAL_SET, ep) != 0) - return (0); - - if ((s = mddb_setenter(mbp->c_setno, MDDB_MUSTEXIST, &err)) == NULL) { - return (mddbstatus2error(ep, err, NODEV32, mbp->c_setno)); - } - - if (!(MD_MNSET_SETNO(mbp->c_setno))) { - mddb_setexit_no_parse(s); - return (EINVAL); - } - - single_thread_start(s); - - if (mbp->c_blk_flags & MDDB_BLOCK_PARSE) - md_set_setstatus(mbp->c_setno, MD_SET_MNPARSE_BLK); - - if (mbp->c_blk_flags & MDDB_UNBLOCK_PARSE) - md_clr_setstatus(mbp->c_setno, MD_SET_MNPARSE_BLK); - - single_thread_end(s); - mddb_setexit_no_parse(s); - return (err); -} - -/* - * mddb_optrecfix marks up to 2 mddbs as failed and calls fixoptrecords - * to relocate any optimized resync records to available mddbs. - * This routine is only called on the master node. - * - * Used in a MN diskset when a slave node has failed to write an optimized - * resync record. The failed mddb information is sent to the master node - * so the master can relocate the optimized records, if possible. If the - * failed mddb information has a mddb marked as failed that was previously - * marked active on the master, the master sets its incore mddb state to - * EWRITE and sets the PARSE_LOCBLK flag. The master node then attempts - * to relocate any optimized records on the newly failed mddbs by calling - * fixoptrecords. (fixoptrecords will set the PARSE_OPTRECS flag if any - * optimized records are relocated.) - * - * When mddb_optrecfix is finished, the ioctl exit code will notice the PARSE - * flags and will send a PARSE message to the slave nodes. The PARSE_LOCBLK - * flag causes the slave node to re-read in the locator block from disk. - * The PARSE_OPTRECS flag causes the slave node to re-read in the directory - * blocks and write out any optimized resync records that have been - * relocated to a different mddb. - */ -int -mddb_optrecfix(mddb_optrec_parm_t *mop) -{ - mddb_set_t *s; - int err = 0; - mddb_lb_t *lbp; - mddb_mnlb_t *mnlbp; - mddb_locator_t *lp; - int li; - mddb_mnsidelocator_t *mnslp; - mddb_drvnm_t *dn; - int i, j; - md_replica_recerr_t *recerr; - md_error_t *ep = &mop->c_mde; - int something_changed = 0; - int alc, lc; - int setno; - - setno = mop->c_setno; - if (mop->c_setno >= md_nsets) - return (EINVAL); - - if (md_snarf_db_set(MD_LOCAL_SET, ep) != 0) - return (0); - - if ((s = mddb_setenter(mop->c_setno, MDDB_MUSTEXIST, &err)) == NULL) { - return (mddbstatus2error(ep, err, NODEV32, mop->c_setno)); - } - - if (!(MD_MNSET_SETNO(mop->c_setno))) { - mddb_setexit(s); - return (EINVAL); - } - - single_thread_start(s); - lbp = s->s_lbp; - mnlbp = (mddb_mnlb_t *)lbp; - - /* - * If slave node has seen an mddb failure, but the master node - * hasn't encountered this failure, mark the mddb as failed on - * the master node and set the something_changed flag to 1. - */ - for (i = 0; i < 2; i++) { - recerr = &mop->c_recerr[i]; - if (recerr->r_flags & MDDB_F_EWRITE) { - li = recerr->r_li; - lp = &lbp->lb_locators[li]; - for (j = 0; j < MD_MNMAXSIDES; j++) { - mnslp = &mnlbp->lb_mnsidelocators[j][li]; - if (mnslp->mnl_sideno == s->s_sideno) - break; - } - /* Do quick check using li */ - if (j != MD_MNMAXSIDES) - dn = &lbp->lb_drvnm[mnslp->mnl_drvnm_index]; - - if ((j != MD_MNMAXSIDES) && - (strncmp(dn->dn_data, recerr->r_driver_name, - MD_MAXDRVNM) == 0) && - (recerr->r_blkno == lp->l_blkno) && - (recerr->r_mnum == mnslp->mnl_mnum)) { - if ((lp->l_flags & MDDB_F_ACTIVE) || - ((lp->l_flags & MDDB_F_EWRITE) == 0)) { - something_changed = 1; - lp->l_flags |= MDDB_F_EWRITE; - lp->l_flags &= ~MDDB_F_ACTIVE; - } - } else { - /* - * Passed in li from slave does not match - * the replica in the master's structures. - * This could have occurred if a delete - * mddb command was running when the - * optimized resync record had a failure. - * Search all replicas for this entry. - * If no match, just ignore. - * If a match, set replica in error. - */ - for (li = 0; li < lbp->lb_loccnt; li++) { - lp = &lbp->lb_locators[li]; - if (lp->l_flags & MDDB_F_DELETED) - continue; - - for (j = 0; j < MD_MNMAXSIDES; j++) { - mnslp = - &mnlbp-> - lb_mnsidelocators[j][li]; - if (mnslp->mnl_sideno == - s->s_sideno) - break; - } - if (j == MD_MNMAXSIDES) - continue; - - dn = &lbp-> - lb_drvnm[mnslp->mnl_drvnm_index]; - if ((strncmp(dn->dn_data, - recerr->r_driver_name, - MD_MAXDRVNM) == 0) && - (recerr->r_blkno == lp->l_blkno) && - (recerr->r_mnum == - mnslp->mnl_mnum)) { - if ((lp->l_flags & - MDDB_F_ACTIVE) || - ((lp->l_flags & - MDDB_F_EWRITE) == 0)) { - something_changed = 1; - lp->l_flags |= - MDDB_F_EWRITE; - lp->l_flags &= - ~MDDB_F_ACTIVE; - } - break; - } - } - } - } - } - - /* - * If this message changed nothing, then we're done since this - * failure has already been handled. - * If some mddb state has been changed, send a parse message to - * the slave nodes so that the slaves will re-read the locator - * block from disk. - */ - if (something_changed == 0) { - single_thread_end(s); - mddb_setexit(s); - return (0); - } else { - s->s_mn_parseflags |= MDDB_PARSE_LOCBLK; - } - - /* - * Scan replicas setting MD_SET_TOOFEW if - * 50% or more of the mddbs have seen errors. - * Note: Don't call selectreplicas or writeretry - * since these routines may end up setting the ACTIVE flag - * on a failed mddb if the master is able to access the mddb - * but the slave node couldn't. Need to have the ACTIVE flag - * turned off in order to relocate the optimized records to - * mddbs that are (hopefully) available on all nodes. - */ - alc = 0; - lc = 0; - for (li = 0; li < lbp->lb_loccnt; li++) { - lp = &lbp->lb_locators[li]; - if (lp->l_flags & MDDB_F_DELETED) - continue; - lc++; - if (! (lp->l_flags & MDDB_F_ACTIVE)) - continue; - alc++; - } - - /* - * If more than 50% mddbs have failed, then don't relocate opt recs. - * The node sending the mddb failure information will detect TOOFEW - * and will panic when it attempts to re-write the optimized record. - */ - if (alc < ((lc + 1) / 2)) { - md_set_setstatus(setno, MD_SET_TOOFEW); - (void) push_lb(s); - (void) upd_med(s, "mddb_optrecfix(0)"); - single_thread_end(s); - mddb_setexit(s); - return (0); - } - - /* Attempt to relocate optimized records that are on failed mddbs */ - (void) fixoptrecords(s); - - /* Push changed locator block out to disk */ - (void) push_lb(s); - (void) upd_med(s, "mddb_optrecfix(1)"); - - /* Recheck for TOOFEW after writing out locator blocks */ - alc = 0; - lc = 0; - for (li = 0; li < lbp->lb_loccnt; li++) { - lp = &lbp->lb_locators[li]; - if (lp->l_flags & MDDB_F_DELETED) - continue; - lc++; - if (! (lp->l_flags & MDDB_F_ACTIVE)) - continue; - alc++; - } - - /* If more than 50% mddbs have failed, then don't relocate opt recs */ - if (alc < ((lc + 1) / 2)) { - md_set_setstatus(setno, MD_SET_TOOFEW); - single_thread_end(s); - mddb_setexit(s); - return (0); - } - - single_thread_end(s); - mddb_setexit(s); - return (0); -} - -/* - * Check if incore mddb on master node matches ondisk mddb. - * If not, master writes out incore view to all mddbs. - * Have previously verified that master is an owner of the - * diskset (master has snarfed diskset) and that diskset is - * not stale. - * - * Meant to be called during reconfig cycle during change of master. - * Previous master in diskset may have changed the mddb and - * panic'd before relaying information to slave nodes. New - * master node just writes out its incore view of the mddb and - * the replay of the change log will resync all the nodes. - * - * Only supported for MN disksets. - * - * Return values: - * 0 - success - * non-zero - failure - */ -int -mddb_check_write_ioctl(mddb_config_t *info) -{ - int err = 0; - set_t setno = info->c_setno; - mddb_set_t *s; - int li; - mddb_locator_t *lp; - mddb_lb_t *lbp; - mddb_mnlb_t *mnlbp_od; - mddb_ln_t *lnp; - mddb_mnln_t *mnlnp_od; - mddb_db_t *dbp; - mddb_de_ic_t *dep; - int write_out_mddb; - md_error_t *ep = &info->c_mde; - int mddb_err = 0; - int prev_li = 0; - int rval = 0; - int alc, lc; - int mddbs_present = 0; - - /* Verify that setno is in valid range */ - if (setno >= md_nsets) - return (EINVAL); - - if (md_snarf_db_set(MD_LOCAL_SET, ep) != 0) - return (0); - - if ((s = mddb_setenter(setno, MDDB_MUSTEXIST, &err)) == NULL) { - return (mddbstatus2error(ep, err, NODEV32, setno)); - } - - /* Calling diskset must be a MN diskset */ - if (!(MD_MNSET_SETNO(setno))) { - mddb_setexit(s); - return (EINVAL); - } - - /* Re-verify that set is not stale */ - if (md_get_setstatus(setno) & MD_SET_STALE) { - mddb_setexit(s); - return (mdmddberror(ep, MDE_DB_STALE, NODEV32, setno)); - } - - lbp = s->s_lbp; - lnp = s->s_lnp; - - /* - * Previous master could have died during the write of data to - * the mddbs so that the ondisk mddbs may not be consistent. - * So, need to check the contents of the first and last active mddb - * to see if the mddbs need to be rewritten. - */ - for (li = 0; li < lbp->lb_loccnt; li++) { - int checkcopy_err; - - lp = &lbp->lb_locators[li]; - /* Find replica that is active */ - if (lp->l_flags & MDDB_F_DELETED) - continue; - mddbs_present = 1; - if (! (lp->l_flags & MDDB_F_ACTIVE)) - continue; - if (s->s_mbiarray[li] == NULL) - continue; - /* Check locator block */ - mnlbp_od = (mddb_mnlb_t *)kmem_zalloc(dbtob(MDDB_MNLBCNT), - KM_SLEEP); - /* read in on-disk locator block */ - err = readblks(s, (caddr_t)mnlbp_od, 0, lbp->lb_blkcnt, li); - - /* If err, try next mddb */ - if (err) { - kmem_free(mnlbp_od, dbtob(MDDB_MNLBCNT)); - continue; - } - - /* - * We resnarf all changelog entries for this set. - * They may have been altered by the previous master - */ - for (dbp = s->s_dbp; dbp != NULL; dbp = dbp->db_next) { - for (dep = dbp->db_firstentry; dep; dep = - dep->de_next) { - if ((dep->de_flags & MDDB_F_CHANGELOG) == 0) { - continue; - } - /* - * This has been alloc'ed while - * joining the set - */ - if (dep->de_rb) { - kmem_free(dep->de_rb, dep->de_recsize); - dep->de_rb = (mddb_rb32_t *)NULL; - } - if (dep->de_rb_userdata) { - kmem_free(dep->de_rb_userdata, - dep->de_reqsize); - dep->de_rb_userdata = (caddr_t)NULL; - } - - err = getrecord(s, dep, li); - if (err) { - /* - * When we see on error while reading - * the changelog entries, we move on - * to the next mddb - */ - err = 1; - break; /* out of inner for-loop */ - } - allocuserdata(dep); - } - if (err) - break; /* out of outer for-loop */ - } - - /* If err, try next mddb */ - if (err) { - kmem_free(mnlbp_od, dbtob(MDDB_MNLBCNT)); - continue; - } - - /* Is incore locator block same as ondisk? */ - if (bcmp((mddb_mnlb_t *)lbp, mnlbp_od, dbtob(MDDB_MNLBCNT)) - == 1) { - write_out_mddb = 1; - kmem_free((caddr_t)mnlbp_od, dbtob(MDDB_MNLBCNT)); - break; - } - - kmem_free((caddr_t)mnlbp_od, dbtob(MDDB_MNLBCNT)); - - /* If lb ok, check locator names */ - mnlnp_od = (mddb_mnln_t *)kmem_zalloc(dbtob(MDDB_MNLNCNT), - KM_SLEEP); - /* read in on-disk locator names */ - err = readblks(s, (caddr_t)mnlnp_od, lbp->lb_lnfirstblk, - lbp->lb_lnblkcnt, li); - - /* If err, try next mddb */ - if (err) { - kmem_free(mnlnp_od, dbtob(MDDB_MNLNCNT)); - continue; - } - - /* Are incore locator names same as ondisk? */ - if (bcmp((mddb_mnln_t *)lnp, mnlnp_od, dbtob(MDDB_MNLNCNT)) - == 1) { - kmem_free((caddr_t)mnlnp_od, dbtob(MDDB_MNLNCNT)); - write_out_mddb = 1; - break; - } - - kmem_free((caddr_t)mnlnp_od, dbtob(MDDB_MNLNCNT)); - - /* - * Check records in mddb. - * If a read error is encountered, set the error flag and - * continue to the next mddb. Otherwise, if incore data is - * different from ondisk, then set the flag to write out - * the mddb and break out. - */ - checkcopy_err = checkcopy(s, li); - if (checkcopy_err == MDDB_F_EREAD) { - lp->l_flags |= MDDB_F_EREAD; - mddb_err = 1; - continue; - } else if (checkcopy_err == 1) { - write_out_mddb = 1; - break; - } - /* - * Have found first active mddb and the data is the same as - * incore - break out of loop - */ - write_out_mddb = 0; - break; - } - - /* - * Skip checking for last active mddb if: - * - already found a mismatch in the first active mddb - * (write_out_mddb is 1) OR - * - didn't find a readable mddb when looking for first - * active mddb (there are mddbs present but all failed - * when read was attempted). - * - * In either case, go to write_out_mddb label in order to attempt - * to write out the data. If < 50% mddbs are available, panic. - */ - if ((write_out_mddb == 1) || - ((li == lbp->lb_loccnt) && mddbs_present)) { - write_out_mddb = 1; - goto write_out_mddb; - } - - /* - * Save which index was checked for the first active mddb. If only 1 - * active mddb, don't want to recheck the same mddb when looking for - * last active mddb. - */ - prev_li = li; - - /* - * Now, checking for last active mddb. If found same index as before - * (only 1 active mddb), then skip. - */ - for (li = (lbp->lb_loccnt - 1); li >= 0; li--) { - int checkcopy_err; - - lp = &lbp->lb_locators[li]; - /* Find replica that is active */ - if (! (lp->l_flags & MDDB_F_ACTIVE)) - continue; - if (lp->l_flags & MDDB_F_DELETED) - continue; - if (s->s_mbiarray[li] == NULL) - continue; - /* If already checked mddb, bail out */ - if (li == prev_li) - break; - /* Check locator block */ - mnlbp_od = (mddb_mnlb_t *)kmem_zalloc(dbtob(MDDB_MNLBCNT), - KM_SLEEP); - /* read in on-disk locator block */ - err = readblks(s, (caddr_t)mnlbp_od, 0, lbp->lb_blkcnt, li); - - /* If err, try next mddb */ - if (err) { - kmem_free(mnlbp_od, dbtob(MDDB_MNLBCNT)); - continue; - } - - - /* Is incore locator block same as ondisk? */ - if (bcmp((mddb_mnlb_t *)lbp, mnlbp_od, dbtob(MDDB_MNLBCNT)) - == 1) { - kmem_free((caddr_t)mnlbp_od, dbtob(MDDB_MNLBCNT)); - write_out_mddb = 1; - break; - } - - kmem_free((caddr_t)mnlbp_od, dbtob(MDDB_MNLBCNT)); - - /* If lb ok, check locator names */ - mnlnp_od = (mddb_mnln_t *) - kmem_zalloc(dbtob(MDDB_MNLNCNT), KM_SLEEP); - - /* read in on-disk locator names */ - err = readblks(s, (caddr_t)mnlnp_od, lbp->lb_lnfirstblk, - lbp->lb_lnblkcnt, li); - - /* If err, try next mddb */ - if (err) { - kmem_free(mnlnp_od, dbtob(MDDB_MNLNCNT)); - continue; - } - - /* Are incore locator names same as ondisk? */ - if (bcmp((mddb_mnln_t *)lnp, mnlnp_od, dbtob(MDDB_MNLNCNT)) - == 1) { - kmem_free((caddr_t)mnlnp_od, dbtob(MDDB_MNLNCNT)); - write_out_mddb = 1; - break; - } - - kmem_free((caddr_t)mnlnp_od, dbtob(MDDB_MNLNCNT)); - - /* - * Check records in mddb. - * If a read error is encountered, set the error flag and - * continue to the next mddb. Otherwise, if incore data is - * different from ondisk, then set the flag to write out - * the mddb and break out. - */ - checkcopy_err = checkcopy(s, li); - if (checkcopy_err == MDDB_F_EREAD) { - lp->l_flags |= MDDB_F_EREAD; - mddb_err = 1; - continue; - } else if (checkcopy_err == 1) { - write_out_mddb = 1; - break; - } - /* - * Have found last active mddb and the data is the same as - * incore - break out of loop - */ - write_out_mddb = 0; - break; - } - - /* - * If ondisk and incore versions of the mddb don't match, then - * write out this node's incore version to disk. - * Or, if unable to read a copy of the mddb, attempt to write - * out a new one. - */ -write_out_mddb: - if (write_out_mddb) { - /* Recompute free blocks based on incore information */ - computefreeblks(s); /* set up free block bits */ - - /* - * Write directory entries and record blocks. - * Use flag MDDB_WRITECOPY_SYNC so that writecopy - * routine won't write out change log records. - */ - for (li = 0; li < lbp->lb_loccnt; li++) { - lp = &lbp->lb_locators[li]; - /* Don't write to inactive or deleted mddbs */ - if (! (lp->l_flags & MDDB_F_ACTIVE)) - continue; - if (lp->l_flags & MDDB_F_DELETED) - continue; - if (s->s_mbiarray[li] == NULL) - continue; - /* If encounter a write error, save it for later */ - if (writecopy(s, li, MDDB_WRITECOPY_SYNC)) { - lp->l_flags |= MDDB_F_EWRITE; - mddb_err = 1; - } - } - - /* - * Write out locator blocks to all replicas. - * push_lb will set MDDB_F_EWRITE on replicas that fail. - */ - if (push_lb(s)) - mddb_err = 1; - (void) upd_med(s, "mddb_check_write_ioctl(0)"); - - /* Write out locator names to all replicas */ - lnp = s->s_lnp; - uniqtime32(&lnp->ln_timestamp); - lnp->ln_revision = MDDB_REV_MNLN; - crcgen(lnp, &lnp->ln_checksum, dbtob(lbp->lb_lnblkcnt), NULL); - - /* writeall sets MDDB_F_EWRITE if writes fails to replica */ - if (writeall(s, (caddr_t)lnp, lbp->lb_lnfirstblk, - lbp->lb_lnblkcnt, 0)) - mddb_err = 1; - - /* - * The writes to the replicas above would have set - * the MDDB_F_EWRITE flags if any write error was - * encountered. - * If < 50% of the mddbs are available, panic. - */ - lc = alc = 0; - for (li = 0; li < lbp->lb_loccnt; li++) { - lp = &lbp->lb_locators[li]; - if (lp->l_flags & MDDB_F_DELETED) - continue; - lc++; - /* - * If mddb: - * - is not active (previously had an error) - * - had an error reading the master blocks or - * - had an error in writing to the mddb - * then don't count this mddb in the active count. - */ - if (! (lp->l_flags & MDDB_F_ACTIVE) || - (lp->l_flags & MDDB_F_EMASTER) || - (lp->l_flags & MDDB_F_EWRITE)) - continue; - alc++; - } - if (alc < ((lc + 1) / 2)) { - cmn_err(CE_PANIC, - "md: Panic due to lack of DiskSuite state\n" - " database replicas. Fewer than 50%% of " - "the total were available,\n so panic to " - "ensure data integrity."); - } - } - - /* - * If encountered an error during checking or writing of - * mddbs, call selectreplicas so that replica error can - * be properly handled. This will involve another attempt - * to write the mddb out to any mddb marked MDDB_F_EWRITE. - * If mddb still fails, it will have the MDDB_F_ACTIVE bit - * turned off. Set the MDDB_SCANALLSYNC flag so that - * selectreplicas doesn't overwrite the change log entries. - * - * Set the PARSE_LOCBLK flag in the mddb_set structure to show - * that the locator block has been changed. - */ - if (mddb_err) { - (void) selectreplicas(s, MDDB_SCANALLSYNC); - s->s_mn_parseflags |= MDDB_PARSE_LOCBLK; - } - -write_out_end: - mddb_setexit(s); - return (rval); -} - -/* - * Set/reset/get set flags in set structure. - * Used during reconfig cycle - * Only supported for MN disksets. - * - * Return values: - * 0 - success - * non-zero - failure - */ -int -mddb_setflags_ioctl(mddb_setflags_config_t *info) -{ - set_t setno = info->sf_setno; - - /* Verify that setno is in valid range */ - if (setno >= md_nsets) - return (EINVAL); - - /* - * When setting the flags, the set may not - * be snarfed yet. So, don't check for SNARFED or MNset - * and don't call mddb_setenter. - * In order to discourage bad ioctl calls, - * verify that magic field in structure is set correctly. - */ - if (info->sf_magic != MDDB_SETFLAGS_MAGIC) - return (EINVAL); - - switch (info->sf_flags) { - case MDDB_NM_SET: - if (info->sf_setflags & MD_SET_MN_NEWMAS_RC) - md_set_setstatus(setno, MD_SET_MN_NEWMAS_RC); - if (info->sf_setflags & MD_SET_MN_START_RC) - md_set_setstatus(setno, MD_SET_MN_START_RC); - if (info->sf_setflags & MD_SET_MN_MIR_STATE_RC) - md_set_setstatus(setno, MD_SET_MN_MIR_STATE_RC); - break; - - case MDDB_NM_RESET: - if (info->sf_setflags & MD_SET_MN_NEWMAS_RC) - md_clr_setstatus(setno, MD_SET_MN_NEWMAS_RC); - if (info->sf_setflags & MD_SET_MN_START_RC) - md_clr_setstatus(setno, MD_SET_MN_START_RC); - if (info->sf_setflags & MD_SET_MN_MIR_STATE_RC) - md_clr_setstatus(setno, MD_SET_MN_MIR_STATE_RC); - break; - - case MDDB_NM_GET: - info->sf_setflags = md_get_setstatus(setno) & - (MD_SET_MN_NEWMAS_RC|MD_SET_MN_START_RC| - MD_SET_MN_MIR_STATE_RC); - break; - } - - return (0); -} - -/* - * md_update_minor - * - * This function updates the minor in the namespace entry for an - * underlying metadevice. The function is called in mod_imp_set - * where mod is sp, stripe, mirror and raid. - * - */ -int -md_update_minor( - set_t setno, - side_t side, - mdkey_t key -) -{ - struct nm_next_hdr *nh; - struct nm_name *n; - char *shn; - int retval = 1; - side_t s; - - /* - * Load the devid name space if it exists - */ - (void) md_load_namespace(setno, NULL, NM_DEVID); - if (! md_load_namespace(setno, NULL, 0L)) { - /* - * Unload the devid namespace - */ - (void) md_unload_namespace(setno, NM_DEVID); - return (0); - } - - rw_enter(&nm_lock.lock, RW_READER); - - if ((nh = get_first_record(setno, 0, NM_NOTSHARED)) == NULL) { - retval = 0; - goto out; - } - - /* - * Look up the key - */ - for (s = 0; s < MD_MAXSIDES; s++) { - /* - * For side other than the import 'side', cleanup its entry - */ - if ((n = lookup_entry(nh, setno, s, key, NODEV64, 0L)) != - NULL) { - if (n->n_side == side) { - /* - * Update its n_minor if metadevice - */ - if (((shn = (char *)getshared_name(setno, - n->n_drv_key, 0L)) != NULL) && - (strcmp(shn, "md") == 0)) { - n->n_minor = MD_MKMIN(setno, - MD_MIN2UNIT(n->n_minor)); - } - } else { - /* We are not the import side, cleanup */ - (void) remove_entry(nh, n->n_side, key, 0L); - } - } - } - -out: - rw_exit(&nm_lock.lock); - return (retval); -} - -/* - * md_update_top_device_minor - * - * This function updates the minor in the namespace entry for a top - * level metadevice. The function is called in mod_imp_set where - * mod is sp, stripe, mirror and raid. - * - */ -int -md_update_top_device_minor( - set_t setno, - side_t side, - md_dev64_t dev -) -{ - struct nm_next_hdr *nh; - struct nm_name *n; - char *shn; - int retval = 1; - - /* - * Load the devid name space if it exists - */ - (void) md_load_namespace(setno, NULL, NM_DEVID); - if (! md_load_namespace(setno, NULL, 0L)) { - /* - * Unload the devid namespace - */ - (void) md_unload_namespace(setno, NM_DEVID); - return (0); - } - - rw_enter(&nm_lock.lock, RW_READER); - - if ((nh = get_first_record(setno, 0, NM_NOTSHARED)) == NULL) { - retval = 0; - goto out; - } - - /* - * Look up the key - */ - if ((n = lookup_entry(nh, setno, side, MD_KEYWILD, dev, 0L)) != NULL) { - /* - * Find the entry, update its n_minor if metadevice - */ - if ((shn = (char *)getshared_name(setno, n->n_drv_key, 0L)) - == NULL) { - retval = 0; - goto out; - } - - if (strcmp(shn, "md") == 0) { - n->n_minor = MD_MKMIN(setno, MD_MIN2UNIT(n->n_minor)); - } - } - -out: - rw_exit(&nm_lock.lock); - return (retval); -} - -static void -md_imp_nm( - mddb_set_t *s -) -{ - mddb_db_t *dbp; - mddb_de_ic_t *dep; - struct nm_rec_hdr *hdr; - struct nm_header *hhdr; - set_t setno = s->s_setno; - - for (dbp = s->s_dbp; dbp != NULL; dbp = dbp->db_next) { - for (dep = dbp->db_firstentry; dep != NULL; - dep = dep->de_next) { - switch (dep->de_type1) { - - case MDDB_NM_HDR: - case MDDB_DID_NM_HDR: - - hhdr = (struct nm_header *) - dep->de_rb_userdata; - - hdr = &hhdr->h_names; - if (hdr->r_next_recid > 0) { - hdr->r_next_recid = MAKERECID(setno, - DBID(hdr->r_next_recid)); - } - - hdr = &hhdr->h_shared; - if (hdr->r_next_recid > 0) { - hdr->r_next_recid = MAKERECID(setno, - DBID(hdr->r_next_recid)); - } - break; - - case MDDB_NM: - case MDDB_DID_NM: - case MDDB_SHR_NM: - case MDDB_DID_SHR_NM: - - hdr = (struct nm_rec_hdr *) - dep->de_rb_userdata; - - if (hdr->r_next_recid > 0) { - hdr->r_next_recid = MAKERECID - (setno, DBID(hdr->r_next_recid)); - } - break; - - default: - break; - } - } - } -} - -static int -update_db_rec( - mddb_set_t *s -) -{ - mddb_db_t *dbp; - mddb_de_ic_t *dep; - mddb_recid_t ids[2]; - - for (dbp = s->s_dbp; dbp != NULL; dbp = dbp->db_next) { - for (dep = dbp->db_firstentry; dep != NULL; - dep = dep->de_next) { - if (! (dep->de_flags & MDDB_F_OPT)) { - ids[0] = MAKERECID(s->s_setno, dep->de_recid); - ids[1] = 0; - if (mddb_commitrecs(ids)) { - return (MDDB_E_NORECORD); - } - } - } - } - return (0); -} - -static int -update_mb( - mddb_set_t *s -) -{ - mddb_ri_t *rip; - int err = 0; - - for (rip = s->s_rip; rip != NULL; rip = rip->ri_next) { - if (rip->ri_flags & MDDB_F_EMASTER) - /* disk is powered off or not there */ - continue; - - if (md_get_setstatus(s->s_setno) & MD_SET_REPLICATED_IMPORT) { - /* - * It is a replicated set - */ - if (rip->ri_devid == (ddi_devid_t)NULL) { - return (-1); - } - err = update_mb_devid(s, rip, rip->ri_devid); - } else { - /* - * It is a non-replicated set - * and there is no need to update - * devid - */ - err = update_mb_devid(s, rip, NULL); - } - - if (err) - return (err); - } - - return (0); -} - -static int -update_setname( - set_t setno -) -{ - struct nm_next_hdr *nh; - struct nm_shared_name *shn, *new_shn; - char *prefix = "/dev/md/"; - char *shrname; - int len; - mdkey_t o_key; - uint32_t o_count, o_data; - mddb_recid_t recid, ids[3]; - int err = 0; - mddb_set_t *dbp; - - /* Import setname */ - dbp = (mddb_set_t *)md_set[setno].s_db; - len = strlen(prefix) + strlen(dbp->s_setname) + strlen("/dsk/") + 1; - shrname = kmem_zalloc(len, KM_SLEEP); - (void) sprintf(shrname, "%s%s%s", prefix, dbp->s_setname, "/dsk/"); - - rw_enter(&nm_lock.lock, RW_WRITER); - if ((nh = get_first_record(setno, 0, NM_SHARED)) == NULL) { - /* - * No namespace is okay - */ - err = 0; - goto out; - } - - if ((shn = (struct nm_shared_name *)lookup_shared_entry(nh, - 0, prefix, NULL, NM_SHARED | NM_IMP_SHARED)) == NULL) { - /* - * No metadevice is okay - */ - err = 0; - goto out; - } - - /* - * We have it, go ahead and update the namespace. - */ - o_key = shn->sn_key; - o_count = shn->sn_count; - o_data = shn->sn_data; - - if (remove_shared_entry(nh, o_key, NULL, 0L | NM_IMP_SHARED | - NM_NOCOMMIT | NM_KEY_RECYCLE)) { - err = MDDB_E_NORECORD; - goto out; - } - if ((new_shn = (struct nm_shared_name *)alloc_entry( - nh, md_set[setno].s_nmid, len, NM_SHARED | - NM_NOCOMMIT, &recid)) == NULL) { - err = MDDB_E_NORECORD; - goto out; - } - - new_shn->sn_key = o_key; - new_shn->sn_count = o_count; - new_shn->sn_data = o_data; - new_shn->sn_namlen = (ushort_t)len; - (void) strcpy(new_shn->sn_name, shrname); - - ids[0] = recid; - ids[1] = md_set[setno].s_nmid; - ids[2] = 0; - err = mddb_commitrecs(ids); - -out: - if (shrname) - kmem_free(shrname, len); - rw_exit(&nm_lock.lock); - return (err); -} - -/* - * Returns 0 on success. - * Returns -1 on failure with ep filled in. - */ -static int -md_imp_db( - set_t setno, - int stale_flag, - md_error_t *ep -) -{ - mddb_set_t *s; - int err = 0; - mddb_dt_t *dtp; - mddb_lb_t *lbp; - int i; - int loccnt; - - if ((s = mddb_setenter(setno, MDDB_MUSTEXIST, &err)) == NULL) { - return (mddbstatus2error(ep, err, NODEV32, setno)); - } - - /* Update dt */ - if ((dtp = (mddb_dt_t *)md_set[setno].s_dtp) != NULL) { - crcgen(dtp, &dtp->dt_cks, MDDB_DT_BYTES, NULL); - } - - if ((err = dt_write(s)) != 0) { - err = mdsyserror(ep, err); - mddb_setexit(s); - return (err); - } - - /* - * Update lb, no need to update the mediator because - * the diskset will only exist on the importing node - * and as such a mediator adds no value. - */ - - /* Update lb */ - if (stale_flag & MD_IMP_STALE_SET) { - lbp = s->s_lbp; - loccnt = lbp->lb_loccnt; - for (i = 0; i < loccnt; i++) { - mddb_locator_t *lp = &lbp->lb_locators[i]; - md_dev64_t ndev = md_expldev(lp->l_dev); - ddi_devid_t devid_ptr; - - devid_ptr = s->s_did_icp->did_ic_devid[i]; - if (devid_ptr == NULL) { - /* - * Already deleted, go to next one. - */ - continue; - } - if (mddb_devid_validate((ddi_devid_t)devid_ptr, &ndev, - NULL)) { - /* disk unavailable, mark deleted */ - lp->l_flags = MDDB_F_DELETED; - /* then remove the device id from the list */ - free_mbipp(&s->s_mbiarray[i]); - (void) mddb_devid_delete(s, i); - } - } - md_clr_setstatus(setno, MD_SET_STALE); - } - - if ((err = writelocall(s)) != 0) { - err = mdmddberror(ep, MDDB_E_NOTNOW, NODEV32, setno); - mddb_setexit(s); - return (err); - } - - mddb_setexit(s); - - /* Update db records */ - if ((err = update_db_rec(s)) != 0) { - return (mddbstatus2error(ep, err, NODEV32, setno)); - } - - /* Update setname embedded in the namespace */ - if ((err = update_setname(setno)) != 0) - return (mddbstatus2error(ep, err, NODEV32, setno)); - - return (err); -} - -static void -md_dr_add( - md_set_record *sr, - md_drive_record *dr -) -{ - md_drive_record *drv; - - if (sr->sr_driverec == 0) { - sr->sr_driverec = dr->dr_selfid; - return; - } - - for (drv = (md_drive_record *)mddb_getrecaddr(sr->sr_driverec); - drv->dr_nextrec != 0; - drv = (md_drive_record *)mddb_getrecaddr(drv->dr_nextrec)) - ; - drv->dr_nextrec = dr->dr_selfid; -} - -static void -md_setup_recids( - md_set_record *sr, - mddb_recid_t **ids, - size_t size -) -{ - md_drive_record *drv; - int cnt; - mddb_recid_t *recids; - - recids = (mddb_recid_t *)kmem_zalloc(sizeof (mddb_recid_t) - * size, KM_SLEEP); - recids[0] = sr->sr_selfid; - cnt = 1; - - for (drv = (md_drive_record *)mddb_getrecaddr(sr->sr_driverec); - /* CSTYLED */ - drv != NULL;) { - recids[cnt++] = drv->dr_selfid; - if (drv->dr_nextrec != 0) - drv = (md_drive_record *)mddb_getrecaddr - (drv->dr_nextrec); - else - drv = NULL; - } - recids[cnt] = 0; - *ids = &recids[0]; -} - -/* - * The purpose of this function is to replace the old_devid with the - * new_devid in the given namespace. This is used for importing - * remotely replicated drives. - */ -int -md_update_namespace_rr_did( - mddb_config_t *cp -) -{ - set_t setno = cp->c_setno; - struct nm_next_hdr *nh; - mdkey_t key = MD_KEYWILD; - side_t side = MD_SIDEWILD; - mddb_recid_t recids[3]; - struct did_min_name *n; - struct nm_next_hdr *did_shr_nh; - struct did_shr_name *shr_n; - mdkey_t ent_did_key; - uint32_t ent_did_count; - uint32_t ent_did_data; - ddi_devid_t devid = NULL; - struct did_shr_name *shn; - void *old_devid, *new_devid; - - if (!(md_get_setstatus(setno) & MD_SET_NM_LOADED)) - return (EIO); - - old_devid = (void *)(uintptr_t)cp->c_locator.l_old_devid; - new_devid = (void *)(uintptr_t)cp->c_locator.l_devid; - - /* - * It is okay if we dont have any configuration - */ - if ((nh = get_first_record(setno, 0, NM_DEVID | NM_NOTSHARED)) - == NULL) { - return (0); - } - while ((key = md_getnextkey(setno, side, key, NULL)) != MD_KEYWILD) { - /* check out every entry in the namespace */ - if ((n = (struct did_min_name *)lookup_entry(nh, setno, - side, key, NODEV64, NM_DEVID)) == NULL) { - continue; - } else { - did_shr_nh = get_first_record(setno, 0, NM_DEVID | - NM_SHARED); - if (did_shr_nh == NULL) { - return (ENOENT); - } - - shr_n = (struct did_shr_name *)lookup_shared_entry( - did_shr_nh, n->min_devid_key, (char *)0, - &recids[0], NM_DEVID); - if (shr_n == NULL) { - return (ENOENT); - } - rw_enter(&nm_lock.lock, RW_WRITER); - devid = (ddi_devid_t)shr_n->did_devid; - /* find this devid in the incore replica */ - if (ddi_devid_compare(devid, old_devid) == 0) { - /* - * found the corresponding entry - * update with new devid - */ - /* first remove old devid info */ - ent_did_key = shr_n ->did_key; - ent_did_count = shr_n->did_count; - ent_did_data = shr_n->did_data; - (void) remove_shared_entry(did_shr_nh, - shr_n->did_key, NULL, NM_DEVID | - NM_IMP_SHARED | NM_KEY_RECYCLE); - - /* add in new devid info */ - if ((shn = (struct did_shr_name *) - alloc_entry(did_shr_nh, - md_set[setno].s_did_nmid, - cp->c_locator.l_devid_sz, - NM_DEVID | NM_SHARED | NM_NOCOMMIT, - &recids[0])) == NULL) { - rw_exit(&nm_lock.lock); - return (ENOMEM); - } - shn->did_key = ent_did_key; - shn->did_count = ent_did_count; - ent_did_data |= NM_DEVID_VALID; - shn->did_data = ent_did_data; - shn->did_size = ddi_devid_sizeof( - new_devid); - bcopy((void *)new_devid, (void *) - shn->did_devid, shn->did_size); - recids[1] = md_set[setno].s_nmid; - recids[2] = 0; - mddb_commitrecs_wrapper(recids); - } - rw_exit(&nm_lock.lock); - } - } - - return (0); -} - -/* - * namespace is loaded before this is called. - * This function is a wrapper for md_update_namespace_rr_did. - * - * md_update_namespace_rr_did may be called twice if attempting to - * resolve a replicated device id during the take of a diskset - once - * for the diskset namespace and a second time for the local namespace. - * The local namespace would need to be updated when a drive has been - * found during a take of the diskset that hadn't been resolved during - * the import (aka partial replicated import). - * - * If being called during the import of the diskset (IMPORT flag set) - * md_update_namespace_rr_did will only be called once with the disket - * namespace. - */ -int -md_update_nm_rr_did_ioctl( - mddb_config_t *cp -) -{ - int rval = 0; - - /* If update of diskset namespace fails, stop and return failure */ - if ((rval = md_update_namespace_rr_did(cp)) != 0) - return (rval); - - if (cp->c_flags & MDDB_C_IMPORT) - return (0); - - /* If update of local namespace fails, return failure */ - cp->c_setno = MD_LOCAL_SET; - rval = md_update_namespace_rr_did(cp); - return (rval); -} - -/*ARGSUSED*/ -int -md_imp_snarf_set( - mddb_config_t *cp -) -{ - set_t setno; - int stale_flag; - mddb_set_t *s; - int i, err = 0; - md_ops_t *ops; - md_error_t *ep = &cp->c_mde; - - setno = cp->c_setno; - stale_flag = cp->c_flags; - - mdclrerror(ep); - if (setno >= md_nsets) { - return (mdsyserror(ep, EINVAL)); - } - - md_haltsnarf_enter(setno); - if (md_get_setstatus(setno) & MD_SET_IMPORT) { - goto out; - } - - /* Set the bit first otherwise load_old_replicas can fail */ - md_set_setstatus(setno, MD_SET_IMPORT); - - if ((s = mddb_setenter(setno, MDDB_MUSTEXIST, &err)) == NULL) { - err = mddbstatus2error(ep, err, NODEV32, setno); - goto out; - } - - /* - * Upon completion of load_old_replicas, the old setno is - * restored from the disk so we need to reset - */ - s->s_lbp->lb_setno = setno; - - /* - * Fixup the NM records before loading namespace - */ - (void) md_imp_nm(s); - mddb_setexit(s); - - /* - * Load the devid name space if it exists - * and ask each module to fixup unit records - */ - if (!md_load_namespace(setno, NULL, NM_DEVID)) { - err = mdsyserror(ep, ENOENT); - goto cleanup; - } - if (!md_load_namespace(setno, NULL, 0L)) { - (void) md_unload_namespace(setno, NM_DEVID); - err = mdsyserror(ep, ENOENT); - goto cleanup; - } - - do { - i = 0; - for (ops = md_opslist; ops != NULL; ops = ops->md_next) - if (ops->md_imp_set != NULL) - i += ops->md_imp_set(setno); - } while (i); - - /* - * Fixup - * (1) locator block - * (2) locator name block if necessary - * (3) master block - * (4) directory block - * calls appropriate writes to push changes out - */ - if ((err = md_imp_db(setno, stale_flag, ep)) != 0) { - goto cleanup; - } - - /* - * Don't unload namespace if importing a replicated diskset. - * Namespace will be unloaded with an explicit RELEASE_SET ioctl. - */ - if (md_get_setstatus(s->s_setno) & MD_SET_REPLICATED_IMPORT) { - md_haltsnarf_exit(setno); - return (err); - } - -cleanup: - /* - * Halt the set - */ - rw_enter(&md_unit_array_rw.lock, RW_WRITER); - (void) md_halt_set(setno, MD_HALT_ALL); - rw_exit(&md_unit_array_rw.lock); - - /* - * Unload the namespace for the imported set - */ - mutex_enter(&mddb_lock); - mddb_unload_set(setno); - mutex_exit(&mddb_lock); - -out: - md_haltsnarf_exit(setno); - md_clr_setstatus(setno, MD_SET_IMPORT | MD_SET_REPLICATED_IMPORT); - return (err); -} -#endif /* MDDB_FAKE */ diff --git a/usr/src/uts/common/io/lvm/md/md_med.c b/usr/src/uts/common/io/lvm/md/md_med.c deleted file mode 100644 index 4f3f5765638a..000000000000 --- a/usr/src/uts/common/io/lvm/md/md_med.c +++ /dev/null @@ -1,1790 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2015 Nexenta Systems, Inc. All rights reserved. - */ - -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -/* #include */ -/* #include */ -#include - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include /* PMAPPORT */ -#include -#include -#include /* This also gets us htonl() et al. */ - - -#include - -#define MDDB -#include -#include -#include -#include - -/* - * Flag to turn off the kernel RPC client delay code. This only takes effect - * if the route to the remote node is marked as RTF_REJECT and the RPC path - * manager has been flushed such that any 'old' path information is no longer - * present. - */ -static bool_t clset = TRUE; - -extern int md_nmedh; /* declared in md.c */ -extern char *md_med_trans_lst; -extern md_set_t md_set[]; /* declared in md.c */ - -/* - * Structures used only by mediators - */ -typedef struct med_thr_a_args { - uint_t mtaa_mag; - char *mtaa_h_nm; - in_addr_t mtaa_h_ip; - uint_t mtaa_h_flags; - int (*mtaa_err_func)(struct med_thr_a_args *); - struct med_thr_h_args *mtaa_mthap; - int mtaa_flags; - rpcprog_t mtaa_prog; - rpcvers_t mtaa_vers; - rpcproc_t mtaa_proc; - xdrproc_t mtaa_inproc; - caddr_t mtaa_in; - xdrproc_t mtaa_outproc; - caddr_t mtaa_out; - struct timeval *mtaa_timout; - int mtaa_err; -} med_thr_a_args_t; - -#define MTAA_MAGIC 0xbadbabed -#define MDT_A_OK 0x00000001 - -typedef struct med_thr_h_args { - uint_t mtha_mag; - md_hi_t *mtha_mhp; - char *mtha_setname; - med_data_t *mtha_meddp; - struct med_thr *mtha_mtp; - int mtha_flags; - set_t mtha_setno; - int mtha_a_cnt; - kcondvar_t mtha_a_cv; - kmutex_t mtha_a_mx; - uint_t mtha_a_nthr; - med_thr_a_args_t mtha_a_args[MAX_HOST_ADDRS]; -} med_thr_h_args_t; - -#define MTHA_MAGIC 0xbadbabee -#define MDT_H_OK 0x00000001 - -typedef struct med_thr { - uint_t mt_mag; - kmutex_t mt_mx; - kcondvar_t mt_cv; - uint_t mt_nthr; - med_thr_h_args_t *mt_h_args[MED_MAX_HOSTS]; -} med_thr_t; - -#define MTH_MAGIC 0xbadbabef - -#ifdef DEBUG - -static struct timeval btv; -static struct timeval etv; - -#define DBGLVL_NONE 0x00000000 -#define DBGLVL_MAJOR 0x00000100 -#define DBGLVL_MINOR 0x00000200 -#define DBGLVL_MINUTE 0x00000400 -#define DBGLVL_TRIVIA 0x00000800 -#define DBGLVL_HIDEOUS 0x00001000 - -#define DBGFLG_NONE 0x00000000 -#define DBGFLG_NOPANIC 0x00000001 -#define DBGFLG_LVLONLY 0x00000002 -#define DBGFLG_FIXWOULDPANIC 0x00000004 - -#define DBGFLG_FLAGMASK 0x0000000F -#define DBGFLG_LEVELMASK ~DBGFLG_FLAGMASK - -#define DEBUG_FLAGS (md_medup_failure_dbg & DBGFLG_FLAGMASK) -#define DEBUG_LEVEL (md_medup_failure_dbg & DBGFLG_LEVELMASK) - -#ifdef JEC -unsigned int md_medup_failure_dbg = DBGLVL_MINOR | DBGFLG_NONE; -#else /* ! JEC */ -unsigned int md_medup_failure_dbg = DBGLVL_NONE | DBGFLG_NONE; -#endif /* JEC */ - -#define DCALL(dbg_level, call) \ - { \ - if (DEBUG_LEVEL != DBGLVL_NONE) { \ - if (DEBUG_FLAGS & DBGFLG_LVLONLY) { \ - if (DEBUG_LEVEL & dbg_level) { \ - call; \ - } \ - } else { \ - if (dbg_level <= DEBUG_LEVEL) { \ - call; \ - } \ - } \ - } \ - } - -#define DPRINTF(dbg_level, msg) DCALL(dbg_level, printf msg) - -#define MAJOR(msg) DPRINTF(DBGLVL_MAJOR, msg) -#define MINOR(msg) DPRINTF(DBGLVL_MINOR, msg) -#define MINUTE(msg) DPRINTF(DBGLVL_MINUTE, msg) -#define TRIVIA(msg) DPRINTF(DBGLVL_TRIVIA, msg) -#define HIDEOUS(msg) DPRINTF(DBGLVL_HIDEOUS, msg) -#define BSTAMP { uniqtime(&btv); } - -#define ESTAMP(msg) \ - { \ - time_t esec, eusec; \ - \ - uniqtime(&etv); \ - \ - eusec = etv.tv_usec - btv.tv_usec; \ - esec = etv.tv_sec - btv.tv_sec; \ - if (eusec < 0) { \ - eusec += MICROSEC; \ - esec--; \ - } \ - MINOR(("%s: sec=%ld, usec=%ld\n", msg, esec, eusec)); \ - } - -#else /* ! DEBUG */ - -#define DCALL(ignored_dbg_level, ignored_routine) -#define MAJOR(ignored) -#define MINOR(ignored) -#define MINUTE(ignored) -#define TRIVIA(ignored) -#define HIDEOUS(ignored) -#define BSTAMP { } -#define ESTAMP(msg) { } - -#endif /* DEBUG */ - -static int md_med_protocol_retry = 2; -static int md_med_transdevs_set = 0; - -/* - * Definitions and declarations. - */ -kmutex_t med_lck; - -struct med_client { - rpcprog_t prog; - rpcvers_t vers; - struct netbuf addr; /* Address to this */ - CLIENT *client; -}; - -/* - * unrecoverable RPC status codes; cf. rfscall() - */ -#define MED_IS_UNRECOVERABLE_RPC(s) (((s) == RPC_AUTHERROR) || \ - ((s) == RPC_CANTENCODEARGS) || \ - ((s) == RPC_CANTDECODERES) || \ - ((s) == RPC_VERSMISMATCH) || \ - ((s) == RPC_PROCUNAVAIL) || \ - ((s) == RPC_PROGUNAVAIL) || \ - ((s) == RPC_PROGVERSMISMATCH) || \ - ((s) == RPC_CANTDECODEARGS)) - -/* - * When trying to contact a portmapper that doesn't speak the version we're - * using, we should theoretically get back RPC_PROGVERSMISMATCH. - * Unfortunately, some (all?) 4.x hosts return an accept_stat of - * PROG_UNAVAIL, which gets mapped to RPC_PROGUNAVAIL, so we have to check - * for that, too. - */ -#define PMAP_WRONG_VERSION(s) ((s) == RPC_PROGVERSMISMATCH || \ - (s) == RPC_PROGUNAVAIL) - -#define NULLSTR(str) (! (str) || *(str) == '\0'? "" : (str)) -#define NULSTRING "" - -/* Flags used in med_addr (netconfig) table */ - -#define UAFLG_NONE 0x00000000 -#define UAFLG_SKIP 0x00000001 -#define UAFLG_ERROR 0x00000002 -#define UAFLG_RPCERROR 0x00000004 -#define UAFLG_LOOPBACK 0x00000008 -#define UAFLG_LOCKINIT 0x00000010 - -/* - * most of this data is static. The mutex protects the changable items: - * ua_flags - */ -static struct med_addr { - struct knetconfig ua_kn; - char *ua_devname; /* const */ - char *ua_netid; /* const */ - uint_t ua_flags; - kmutex_t ua_mutex; -} med_addr_tab[] = - -/* - * The order of the entries in this table is the order in - * which we'll try to connect to the user-level daemon. - * The final entry must have a NULL ua_devname. - * - * This is basically a tablified version of /etc/netconfig - * (with additional entries for loopback TCP and UDP networks - * that are missing from the user-level version.) - */ -{ - -/* loopback UDP */ - /* semantics protofmly proto, dev_t */ -{ { NC_TPI_CLTS, NC_INET, NC_UDP, NODEV }, - /* devname netid flags */ - "/dev/udp", "udp-loopback", UAFLG_LOOPBACK -}, - -/* UDP */ - /* semantics protofmly proto, dev_t */ -{ { NC_TPI_CLTS, NC_INET, NC_UDP, NODEV }, - /* devname netid flags */ - "/dev/udp", "udp", UAFLG_NONE -}, - -/* loopback TCP */ - /* semantics protofmly proto, dev_t */ -{ { NC_TPI_COTS_ORD, NC_INET, NC_TCP, NODEV }, - /* devname netid flags */ - "/dev/tcp", "tcp-loopback", UAFLG_LOOPBACK -}, - -/* TCP */ - /* semantics protofmly proto, dev_t */ -{ { NC_TPI_COTS_ORD, NC_INET, NC_TCP, NODEV }, - /* devname netid flags */ - "/dev/tcp", "tcp", UAFLG_NONE -}, - -/* ticlts */ - /* semantics protofmly proto, dev_t */ -{ { NC_TPI_CLTS, NC_LOOPBACK, NC_NOPROTO, NODEV }, - /* devname netid flags */ - "/dev/ticlts", "ticlts", UAFLG_LOOPBACK -}, - -/* ticotsord */ - /* semantics protofmly proto, dev_t */ -{ { NC_TPI_COTS_ORD, NC_LOOPBACK, NC_NOPROTO, NODEV }, - /* devname netid flags */ - "/dev/ticotsord", "ticotsord", UAFLG_LOOPBACK -}, - -/* ticots */ - /* semantics protofmly proto, dev_t */ -{ { NC_TPI_COTS, NC_LOOPBACK, NC_NOPROTO, NODEV }, - /* devname netid flags */ - "/dev/ticots", "ticots", UAFLG_LOOPBACK -} -}; - -/* The number of entries in the table */ -int med_addr_tab_nents = sizeof (med_addr_tab) / sizeof (med_addr_tab[0]); - -/* - * Private Functions - */ - -/* A useful utility. */ -static char * -med_dup(void *str, int len) -{ - char *s = (char *)kmem_zalloc(len, KM_SLEEP); - - if (s == NULL) - return (NULL); - - bcopy(str, s, len); - - return (s); -} - -/* - * Utilities for manipulating netbuf's. - * These utilities are the only knc_protofmly specific functions in the MED. - */ - -/* - * Utilities to patch a port number (for NC_INET protocols) or a - * port name (for NC_LOOPBACK) into a network address. - */ -static void -med_put_inet_port(struct netbuf *addr, ushort_t port) -{ - /* - * Easy - we always patch an unsigned short on top of an - * unsigned short. No changes to addr's len or maxlen are - * necessary. - */ - /*LINTED*/ - ((struct sockaddr_in *)(addr->buf))->sin_port = port; -} - -static void -med_put_loopback_port(struct netbuf *addr, char *port) -{ - char *dot; - char *newbuf; - int newlen; - - /* - * We must make sure the addr has enough space for us, - * patch in `port', and then adjust addr's len and maxlen - * to reflect the change. - */ - if ((dot = strchr(addr->buf, '.')) == (char *)NULL) { - TRIVIA(("put_loopb_port - malformed loopback addr %s\n", - addr->buf)); - return; - } - - newlen = (int)((dot - addr->buf + 1) + strlen(port)); - if (newlen > addr->maxlen) { - newbuf = (char *)kmem_zalloc((size_t)newlen, KM_SLEEP); - (void) bcopy(addr->buf, newbuf, (size_t)addr->len); - kmem_free(addr->buf, (size_t)addr->maxlen); - addr->buf = newbuf; - addr->len = addr->maxlen = (uint_t)newlen; - dot = strchr(addr->buf, '.'); - } else { - addr->len = newlen; - } - - (void) strncpy(++dot, port, strlen(port)); - -} - -/* - * Make sure the given netbuf has a maxlen at least as big as the given - * length. - */ -static void -grow_netbuf(struct netbuf *nb, size_t length) -{ - char *newbuf; - - if (nb->maxlen >= length) - return; - - newbuf = kmem_zalloc(length, KM_SLEEP); - bcopy(nb->buf, newbuf, (size_t)nb->len); - kmem_free(nb->buf, (size_t)nb->maxlen); - nb->buf = newbuf; - nb->maxlen = (uint_t)length; -} - -/* - * Convert a loopback universal address to a loopback transport address. - */ -static void -loopb_u2t(const char *ua, struct netbuf *addr) -{ - size_t stringlen = strlen(ua) + 1; - const char *univp; /* ptr into universal addr */ - char *transp; /* ptr into transport addr */ - - /* Make sure the netbuf will be big enough. */ - if (addr->maxlen < stringlen) { - grow_netbuf(addr, stringlen); - } - - univp = ua; - transp = addr->buf; - while (*univp != NULL) { - if (*univp == '\\' && *(univp+1) == '\\') { - *transp = '\\'; - univp += 2; - } else if (*univp == '\\') { - /* octal character */ - *transp = (((*(univp+1) - '0') & 3) << 6) + - (((*(univp+2) - '0') & 7) << 3) + - ((*(univp+3) - '0') & 7); - univp += 4; - } else { - *transp = *univp; - univp++; - } - transp++; - } - - addr->len = (uint_t)(transp - addr->buf); - ASSERT(addr->len <= addr->maxlen); -} - - -/* - * xdr_md_pmap - * - * Taken from libnsl/rpc/pmap_prot.c - */ -bool_t -xdr_md_pmap(xdrs, regs) - XDR *xdrs; - struct pmap *regs; -{ - if (xdr_u_int(xdrs, ®s->pm_prog) && - xdr_u_int(xdrs, ®s->pm_vers) && - xdr_u_int(xdrs, ®s->pm_prot)) - return (xdr_u_int(xdrs, ®s->pm_port)); - return (FALSE); -} - -/* - * We need an version of CLNT_DESTROY which also frees the auth structure. - */ -static void -med_clnt_destroy(CLIENT **clp) -{ - if (*clp) { - if ((*clp)->cl_auth) { - AUTH_DESTROY((*clp)->cl_auth); - (*clp)->cl_auth = NULL; - } - CLNT_DESTROY(*clp); - *clp = NULL; - } -} - -/* - * Release this med_client entry. - * Do also destroy the entry if there was an error != EINTR, - * and mark the entry as not-valid, by setting time=0. - */ -static void -med_rel_client(struct med_client *medc, int error) -{ - TRIVIA(("rel_client - addr = (%p, %u %u)\n", - (void *) medc->addr.buf, medc->addr.len, medc->addr.maxlen)); - /*LINTED*/ - if (1 || error && error != EINTR) { - TRIVIA(("rel_client - destroying addr = (%p, %u %u)\n", - (void *) medc->addr.buf, medc->addr.len, - medc->addr.maxlen)); - med_clnt_destroy(&medc->client); - if (medc->addr.buf) { - kmem_free(medc->addr.buf, medc->addr.maxlen); - medc->addr.buf = NULL; - } - } -} - -/* - * Try to get the address for the desired service by using the old - * portmapper protocol. Ignores signals. - * - * Returns RPC_UNKNOWNPROTO if the request uses the loopback transport. - * Use med_get_rpcb_addr instead. - */ -static enum clnt_stat -med_get_pmap_addr( - struct knetconfig *kncfp, - rpcprog_t prog, - rpcvers_t vers, - struct netbuf *addr -) -{ - ushort_t port = 0; - int error; - enum clnt_stat status; - CLIENT *client = NULL; - struct pmap parms; - struct timeval tmo; - k_sigset_t oldmask; - k_sigset_t newmask; - - /* - * Call rpcbind version 2 or earlier (SunOS portmapper, remote - * only) to get an address we can use in an RPC client handle. - * We simply obtain a port no. for and plug it - * into `addr'. - */ - if (strcmp(kncfp->knc_protofmly, NC_INET) == 0) { - med_put_inet_port(addr, htons(PMAPPORT)); - } else { - TRIVIA(("get_pmap_addr - unsupported protofmly %s\n", - kncfp->knc_protofmly)); - status = RPC_UNKNOWNPROTO; - goto out; - } - - TRIVIA(("get_pmap_addr - semantics=%u, protofmly=%s, proto=%s\n", - kncfp->knc_semantics, kncfp->knc_protofmly, kncfp->knc_proto)); - - /* - * Mask signals for the duration of the handle creation and - * RPC call. This allows relatively normal operation with a - * signal already posted to our thread. - * - * Any further exit paths from this routine must restore - * the original signal mask. - */ - sigfillset(&newmask); - sigreplace(&newmask, &oldmask); - - if ((error = clnt_tli_kcreate(kncfp, addr, PMAPPROG, PMAPVERS, - 0, 0, kcred, &client)) != RPC_SUCCESS) { - status = RPC_TLIERROR; - sigreplace(&oldmask, (k_sigset_t *)NULL); - MINUTE(("get_pmap_addr - kcreate() returned %d\n", error)); - goto out; - } - - if (!CLNT_CONTROL(client, CLSET_NODELAYONERR, (char *)&clset)) { - MINUTE(("get_pmap_addr - unable to set CLSET_NODELAYONERR\n")); - } - - client->cl_auth = authkern_create(); - - parms.pm_prog = prog; - parms.pm_vers = vers; - if (strcmp(kncfp->knc_proto, NC_TCP) == 0) { - parms.pm_prot = IPPROTO_TCP; - } else { - parms.pm_prot = IPPROTO_UDP; - } - parms.pm_port = 0; - tmo = md_med_pmap_timeout; - - if ((status = CLNT_CALL(client, PMAPPROC_GETPORT, - xdr_md_pmap, (char *)&parms, - xdr_u_short, (char *)&port, - tmo)) != RPC_SUCCESS) { - sigreplace(&oldmask, (k_sigset_t *)NULL); - MINUTE(("get_pmap_addr - CLNT_CALL(GETPORT) returned %d\n", - status)); - goto out; - } - - sigreplace(&oldmask, (k_sigset_t *)NULL); - - /* A zero value of port indicates a mapping failure */ - if (port == 0) { - status = RPC_PROGNOTREGISTERED; - MINUTE(("get_pmap_addr - program not registered\n")); - goto out; - } - - TRIVIA(("get_pmap_addr - port=%d\n", port)); - med_put_inet_port(addr, ntohs(port)); - -out: - if (client) - med_clnt_destroy(&client); - return (status); -} - -/* - * Try to get the address for the desired service by using the rpcbind - * protocol. Ignores signals. - */ -static enum clnt_stat -med_get_rpcb_addr( - struct knetconfig *kncfp, - rpcprog_t prog, - rpcvers_t vers, - struct netbuf *addr -) -{ - int error; - char *ua = NULL; - enum clnt_stat status; - RPCB parms; - struct timeval tmo; - CLIENT *client = NULL; - k_sigset_t oldmask; - k_sigset_t newmask; - ushort_t port; - - /* - * Call rpcbind (local or remote) to get an address we can use - * in an RPC client handle. - */ - tmo = md_med_pmap_timeout; - parms.r_prog = prog; - parms.r_vers = vers; - parms.r_addr = parms.r_owner = ""; - - if (strcmp(kncfp->knc_protofmly, NC_INET) == 0) { - if (strcmp(kncfp->knc_proto, NC_TCP) == 0) { - parms.r_netid = "tcp"; - } else { - parms.r_netid = "udp"; - } - med_put_inet_port(addr, htons(PMAPPORT)); - } else if (strcmp(kncfp->knc_protofmly, NC_LOOPBACK) == 0) { - parms.r_netid = "ticlts"; - med_put_loopback_port(addr, "rpc"); - TRIVIA(( - "get_rpcb_addr - semantics=%s, protofmly=%s, proto=%s\n", - (kncfp->knc_semantics == NC_TPI_CLTS ? - "NC_TPI_CLTS" : "?"), - kncfp->knc_protofmly, kncfp->knc_proto)); - } else { - TRIVIA(("get_rpcb_addr - unsupported protofmly %s\n", - kncfp->knc_protofmly)); - status = RPC_UNKNOWNPROTO; - goto out; - } - - /* - * Mask signals for the duration of the handle creation and - * RPC calls. This allows relatively normal operation with a - * signal already posted to our thread. - * - * Any further exit paths from this routine must restore - * the original signal mask. - */ - sigfillset(&newmask); - sigreplace(&newmask, &oldmask); - - if ((error = clnt_tli_kcreate(kncfp, addr, RPCBPROG, RPCBVERS, - 0, 0, kcred, &client)) != 0) { - status = RPC_TLIERROR; - sigreplace(&oldmask, (k_sigset_t *)NULL); - MINUTE(("get_rpcb_addr - kcreate() returned %d\n", error)); - goto out; - } - - if (!CLNT_CONTROL(client, CLSET_NODELAYONERR, (char *)&clset)) { - MINUTE(("get_rpcb_addr - unable to set CLSET_NODELAYONERR\n")); - } - - client->cl_auth = authkern_create(); - - if ((status = CLNT_CALL(client, RPCBPROC_GETADDR, - xdr_rpcb, (char *)&parms, xdr_wrapstring, (char *)&ua, - tmo)) != RPC_SUCCESS) { - sigreplace(&oldmask, (k_sigset_t *)NULL); - MINUTE(("get_rpcb_addr - CLNT_CALL(GETADDR) returned %d\n", - status)); - goto out; - } - - sigreplace(&oldmask, (k_sigset_t *)NULL); - - if (ua == NULL || *ua == NULL) { - status = RPC_PROGNOTREGISTERED; - MINUTE(("get_rpcb_addr - program not registered\n")); - goto out; - } - - /* - * Convert the universal address to the transport address. - * Theoretically, we should call the local rpcbind to translate - * from the universal address to the transport address, but it gets - * complicated (e.g., there's no direct way to tell rpcbind that we - * want an IP address instead of a loopback address). Note that - * the transport address is potentially host-specific, so we can't - * just ask the remote rpcbind, because it might give us the wrong - * answer. - */ - if (strcmp(kncfp->knc_protofmly, NC_INET) == 0) { - port = rpc_uaddr2port(AF_INET, ua); - med_put_inet_port(addr, ntohs(port)); - } else if (strcmp(kncfp->knc_protofmly, NC_LOOPBACK) == 0) { - loopb_u2t(ua, addr); - } else { - /* "can't happen" - should have been checked for above */ - cmn_err(CE_PANIC, "med_get_rpcb_addr: bad protocol family"); - } - -out: - if (client != NULL) - med_clnt_destroy(&client); - if (ua != NULL) - xdr_free(xdr_wrapstring, (char *)&ua); - return (status); -} - -/* - * Get the RPC client handle to talk to the service at addrp. - * Returns: - * RPC_SUCCESS Success. - * RPC_RPCBFAILURE Couldn't talk to the remote portmapper (e.g., - * timeouts). - * RPC_INTR Caught a signal before we could successfully return. - * RPC_TLIERROR Couldn't initialize the handle after talking to the - * remote portmapper (shouldn't happen). - */ -static enum clnt_stat -med_get_rpc_handle( - struct knetconfig *kncfp, - struct netbuf *addrp, - rpcprog_t prog, - rpcvers_t vers, - CLIENT **clientp -) -{ - enum clnt_stat status; - k_sigset_t oldmask; - k_sigset_t newmask; - int error; - - /* - * Try to get the address from either portmapper or rpcbind. - * We check for posted signals after trying and failing to - * contact the portmapper since it can take uncomfortably - * long for this entire procedure to time out. - */ - BSTAMP - status = med_get_pmap_addr(kncfp, prog, vers, addrp); - if (MED_IS_UNRECOVERABLE_RPC(status) && status != RPC_UNKNOWNPROTO && - ! PMAP_WRONG_VERSION(status)) { - status = RPC_RPCBFAILURE; - goto bailout; - } - - if (status == RPC_SUCCESS) - ESTAMP("done OK med_get_pmap_addr") - else - ESTAMP("done Not OK med_get_pmap_addr") - - if (status != RPC_SUCCESS) { - BSTAMP - status = med_get_rpcb_addr(kncfp, prog, vers, addrp); - if (status != RPC_SUCCESS) { - ESTAMP("done Not OK med_get_rpcb_addr") - MINOR(( - "get_rpc_handle - can't contact portmapper or rpcbind\n")); - status = RPC_RPCBFAILURE; - goto bailout; - } - } - ESTAMP("done OK med_get_rpcb_addr") - - med_clnt_destroy(clientp); - - /* - * Mask signals for the duration of the handle creation, - * allowing relatively normal operation with a signal - * already posted to our thread. - * - * Any further exit paths from this routine must restore - * the original signal mask. - */ - sigfillset(&newmask); - sigreplace(&newmask, &oldmask); - - if ((error = clnt_tli_kcreate(kncfp, addrp, prog, vers, - 0, 0, kcred, clientp)) != 0) { - status = RPC_TLIERROR; - sigreplace(&oldmask, (k_sigset_t *)NULL); - MINUTE(("get_rpc_handle - kcreate(prog) returned %d\n", error)); - goto bailout; - } - - if (!CLNT_CONTROL(*clientp, CLSET_NODELAYONERR, (char *)&clset)) { - MINUTE(("get_rpc_handle - unable to set CLSET_NODELAYONERR\n")); - } - - (*clientp)->cl_auth = authkern_create(); - - sigreplace(&oldmask, (k_sigset_t *)NULL); - -bailout: - return (status); -} - -/* - * Return a med_client to the . - * The med_client found is marked as in_use. - * It is the responsibility of the caller to release the med_client by - * calling med_rel_client(). - * - * Returns: - * RPC_SUCCESS Success. - * RPC_CANTSEND Temporarily cannot send. - * RPC_TLIERROR Unspecified TLI error. - * RPC_UNKNOWNPROTO kncfp is from an unrecognised protocol family. - * RPC_PROGNOTREGISTERED The prog `prog' isn't registered on the server. - * RPC_RPCBFAILURE Couldn't contact portmapper on remote host. - * Any unsuccessful return codes from CLNT_CALL(). - */ -static enum clnt_stat -med_get_client( - struct knetconfig *kncfp, - struct netbuf *addrp, - rpcprog_t prog, - rpcvers_t vers, - struct med_client **mcp -) -{ - struct med_client *med_clnt = NULL; - enum clnt_stat status = RPC_SUCCESS; - - mutex_enter(&med_lck); - - /* - * Create an med_client - */ - med_clnt = kmem_zalloc(sizeof (*med_clnt), KM_SLEEP); - med_clnt->client = NULL; - med_clnt->prog = prog; - med_clnt->vers = vers; - med_clnt->addr.buf = med_dup(addrp->buf, addrp->maxlen); - med_clnt->addr.len = addrp->len; - med_clnt->addr.maxlen = addrp->maxlen; - - mutex_exit(&med_lck); - - status = med_get_rpc_handle(kncfp, &med_clnt->addr, prog, vers, - &med_clnt->client); - -out: - TRIVIA(("get_client - End: med_clnt=%p status=%d, client=%p\n", - (void *)med_clnt, status, - (med_clnt ? med_clnt->client : (void *) -1L))); - - if (status == RPC_SUCCESS) { - *mcp = med_clnt; - } else { - /* Cleanup */ - if (med_clnt) { - mutex_enter(&med_lck); - med_rel_client(med_clnt, EINVAL); - kmem_free(med_clnt, sizeof (*med_clnt)); - mutex_exit(&med_lck); - } - *mcp = NULL; - } - - return (status); -} - -/* - * Make an RPC call to addr via config. - * - * Returns: - * 0 Success. - * EIO Couldn't get client handle, timed out, or got unexpected - * RPC status within md_med_protocol_retry attempts. - * EINVAL Unrecoverable error in RPC call. Causes client handle - * to be destroyed. - * EINTR RPC call was interrupted within md_med_protocol_retry attempts. - */ -static int -med_callrpc( - struct knetconfig *kncfp, - struct netbuf *addrp, - rpcprog_t prog, - rpcvers_t vers, - rpcproc_t proc, - xdrproc_t inproc, - caddr_t in, - xdrproc_t outproc, - caddr_t out, - struct timeval *timout -) -{ - struct med_client *med_clnt = NULL; - enum clnt_stat cl_stat; - int tries = md_med_protocol_retry; - int error; - k_sigset_t oldmask; - k_sigset_t newmask; - - MINUTE(("med_callrpc - Calling [%u, %u, %u]\n", prog, vers, proc)); - - sigfillset(&newmask); - - while (tries--) { - error = 0; - cl_stat = med_get_client(kncfp, addrp, prog, vers, &med_clnt); - if (MED_IS_UNRECOVERABLE_RPC(cl_stat)) { - error = EINVAL; - goto rel_client; - } else if (cl_stat != RPC_SUCCESS) { - error = EIO; - continue; - } - - ASSERT(med_clnt != NULL); - ASSERT(med_clnt->client != NULL); - - sigreplace(&newmask, &oldmask); - cl_stat = CLNT_CALL(med_clnt->client, proc, inproc, in, - outproc, out, *timout); - sigreplace(&oldmask, (k_sigset_t *)NULL); - - switch (cl_stat) { - case RPC_SUCCESS: - /* - * Update the timestamp on the client cache entry. - */ - error = 0; - break; - - case RPC_TIMEDOUT: - MINOR(("med_callrpc - RPC_TIMEDOUT\n")); - if (timout == 0) { - /* - * We will always time out when timout == 0. - */ - error = 0; - break; - } - /* FALLTHROUGH */ - case RPC_CANTSEND: - case RPC_XPRTFAILED: - default: - if (MED_IS_UNRECOVERABLE_RPC(cl_stat)) { - error = EINVAL; - } else { - error = EIO; - } - } - -rel_client: - MINOR(("med_callrpc - RPC cl_stat=%d error=%d\n", - cl_stat, error)); - if (med_clnt != NULL) { - med_rel_client(med_clnt, error); - kmem_free(med_clnt, sizeof (*med_clnt)); - } - - /* - * If EIO, loop else we're done. - */ - if (error != EIO) { - break; - } - } - - MINUTE(("med_callrpc - End: error=%d, tries=%d\n", error, tries)); - - return (error); -} - -/* - * Try various transports to get the rpc call through. - */ -static int -med_net_callrpc( - char *h_nm, - in_addr_t h_ip, - uint_t h_flags, - rpcprog_t prog, - rpcvers_t vers, - rpcproc_t proc, - xdrproc_t inproc, - caddr_t in, - xdrproc_t outproc, - caddr_t out, - struct timeval *timout -) -{ - int err; - struct med_addr *uap; - int uapi; - struct netbuf dst; - int done = 0; - - ASSERT(h_nm != NULL); - ASSERT(h_ip != 0); - - /* - * Loop through our table of transports and try to get the data out. - */ - for (uapi = 0; uapi < med_addr_tab_nents && ! done; uapi++) { - - /* Shorthand */ - uap = &med_addr_tab[uapi]; - - /* - * UAFLG_SKIP is used for debugging and by the protocol - * selection code. - */ - if (uap->ua_flags & UAFLG_SKIP) { - MINUTE(("med_net_callrpc - %s - marked \"skip\"\n", - uap->ua_netid)); - continue; - } - - /* - * If we are not talking to this host, we can skip all LOOPBACK - * transport options. - */ - if (! (h_flags & NMIP_F_LOCAL) && - (uap->ua_flags & UAFLG_LOOPBACK)) - continue; - - if (uap->ua_flags & UAFLG_ERROR) - continue; - - if (uap->ua_flags & UAFLG_RPCERROR) - continue; - - /* Unknown protocol, skip it */ - if (! uap->ua_kn.knc_protofmly) { - MINUTE(("med_net_callrpc - bad protofmly\n")); - continue; - } - - if (strcmp(uap->ua_kn.knc_protofmly, NC_LOOPBACK) == 0) { - /* - * strlen("localhost.") is 10 - */ - dst.len = dst.maxlen = 10; - dst.buf = kmem_alloc(dst.len, KM_SLEEP); - (void) strncpy(dst.buf, "localhost.", dst.len); - } else if (strcmp(uap->ua_kn.knc_protofmly, NC_INET) == 0) { - struct sockaddr_in *s; - - /* - * If we have not allocated a buffer for an INET addrs - * or the buffer allocated will not contain an INET - * addr, allocate or re-allocate. - */ - dst.buf = kmem_zalloc(sizeof (struct sockaddr_in), - KM_SLEEP); - dst.maxlen = sizeof (struct sockaddr_in); - - /* Short hand */ - /*LINTED*/ - s = (struct sockaddr_in *)dst.buf; - - /* Initialize the socket */ - if (uap->ua_flags & UAFLG_LOOPBACK) - s->sin_addr.s_addr = htonl(INADDR_LOOPBACK); - else - s->sin_addr.s_addr = h_ip; - s->sin_port = 0; - s->sin_family = AF_INET; - } - - dst.len = dst.maxlen; - - MINOR(("med_net_callrpc - Trying %s\n", uap->ua_netid)); - - err = med_callrpc(&uap->ua_kn, &dst, prog, vers, proc, inproc, - in, outproc, out, timout); - - if (dst.buf) { - kmem_free(dst.buf, dst.maxlen); - dst.buf = NULL; - dst.len = 0; - dst.maxlen = 0; - } - - if (err) { - MINUTE(("med_net_callrpc - %s failed\n\n", - uap->ua_netid)); - continue; - } - - MINUTE(("med_net_callrpc - %s OK\n\n", uap->ua_netid)); - done = 1; - } - - /* - * Print a message if we could not reach a host. - */ - if (! done) { - cmn_err(CE_WARN, "%s on host %s not responding", MED_SERVNAME, - h_nm); - return (1); - } - - return (0); -} - -/* - * Validate the mediator data - */ -static int -med_ok(set_t setno, med_data_t *meddp) -{ - /* Not initialized, or not a mediator data record */ - if (meddp->med_dat_mag != MED_DATA_MAGIC) - goto fail; - - MINUTE(("Magic OK\n")); - - /* Mismatch in revisions */ - if (meddp->med_dat_rev != MED_DATA_REV) - goto fail; - - MINUTE(("Revision OK\n")); - - /* Not for the right set, this is paranoid */ - if (setno != meddp->med_dat_sn) - goto fail; - - MINUTE(("Setno OK\n")); - - /* The record checksum is not correct */ - if (crcchk(meddp, &meddp->med_dat_cks, sizeof (med_data_t), NULL)) - goto fail; - - MINUTE(("Mediator validated\n")); - - return (1); - -fail: - return (0); -} - -static void -med_adl(med_data_lst_t **meddlpp, med_data_t *meddp) -{ - /* - * Run to the end of the list - */ - for (/* void */; (*meddlpp != NULL); meddlpp = &(*meddlpp)->mdl_nx) - /* void */; - - *meddlpp = (med_data_lst_t *)kmem_zalloc(sizeof (med_data_lst_t), - KM_SLEEP); - - (*meddlpp)->mdl_med = (med_data_t *)med_dup(meddp, sizeof (med_data_t)); -} - -static void -mtaa_upd_init(med_thr_a_args_t *mtaap, med_thr_h_args_t *mthap) -{ - med_upd_data_args_t *argsp; - med_err_t *resp; - - argsp = kmem_zalloc(sizeof (med_upd_data_args_t), KM_SLEEP); - argsp->med.med_setno = mthap->mtha_setno; - if (MD_MNSET_SETNO(argsp->med.med_setno)) { - /* - * In MN diskset, use a generic nodename, multiowner, in the - * mediator record which allows any node to access mediator - * information. MN diskset reconfig cycle forces consistent - * view of set/node/drive/mediator information across all nodes - * in the MN diskset. This allows the relaxation of - * node name checking in rpc.metamedd for MN disksets. - */ - argsp->med.med_caller = md_strdup(MED_MN_CALLER); - } else { - argsp->med.med_caller = md_strdup(utsname.nodename); - } - argsp->med.med_setname = md_strdup(mthap->mtha_setname); - argsp->med_data = *mthap->mtha_meddp; - - resp = kmem_zalloc(sizeof (med_err_t), KM_SLEEP); - - mtaap->mtaa_mag = MTAA_MAGIC; - mtaap->mtaa_mthap = mthap; - mtaap->mtaa_prog = MED_PROG; - mtaap->mtaa_vers = MED_VERS; - mtaap->mtaa_proc = MED_UPD_DATA; - mtaap->mtaa_inproc = xdr_med_upd_data_args_t; - mtaap->mtaa_in = (caddr_t)argsp; - mtaap->mtaa_outproc = xdr_med_err_t; - mtaap->mtaa_out = (caddr_t)resp; - mtaap->mtaa_timout = (struct timeval *)&md_med_def_timeout; -} - -static void -mtaa_upd_free(med_thr_a_args_t *mtaap) -{ - med_upd_data_args_t *argsp = (med_upd_data_args_t *)mtaap->mtaa_in; - med_err_t *resp = (med_err_t *)mtaap->mtaa_out; - - freestr(argsp->med.med_caller); - freestr(argsp->med.med_setname); - kmem_free(argsp, sizeof (med_upd_data_args_t)); - - if (mtaap->mtaa_flags & MDT_A_OK) - xdr_free(mtaap->mtaa_outproc, mtaap->mtaa_out); - - kmem_free(resp, sizeof (med_err_t)); -} - -static int -mtaa_upd_err(med_thr_a_args_t *mtaap) -{ - /*LINTED*/ - med_err_t *resp = (med_err_t *)mtaap->mtaa_out; - - if (resp->med_errno == MDE_MED_NOERROR) { - MAJOR(("upd_med_hosts - %s - OK\n\n", mtaap->mtaa_h_nm)); - return (0); - } else { - MAJOR(("upd_med_hosts - %s - errno=%d\n\n", mtaap->mtaa_h_nm, - resp->med_errno)); - return (1); - } -} - -static void -mtaa_get_init(med_thr_a_args_t *mtaap, med_thr_h_args_t *mthap) -{ - med_args_t *argsp; - med_get_data_res_t *resp; - - argsp = kmem_zalloc(sizeof (med_args_t), KM_SLEEP); - argsp->med.med_setno = mthap->mtha_setno; - if (MD_MNSET_SETNO(argsp->med.med_setno)) { - /* - * In MN diskset, use a generic nodename, multiowner, in the - * mediator record which allows any node to access mediator - * information. MN diskset reconfig cycle forces consistent - * view of set/node/drive/mediator information across all nodes - * in the MN diskset. This allows the relaxation of - * node name checking in rpc.metamedd for MN disksets. - */ - argsp->med.med_caller = md_strdup(MED_MN_CALLER); - } else { - argsp->med.med_caller = md_strdup(utsname.nodename); - } - - argsp->med.med_setname = md_strdup(mthap->mtha_setname); - - resp = kmem_zalloc(sizeof (med_get_data_res_t), KM_SLEEP); - - mtaap->mtaa_mag = MTAA_MAGIC; - mtaap->mtaa_mthap = mthap; - mtaap->mtaa_prog = MED_PROG; - mtaap->mtaa_vers = MED_VERS; - mtaap->mtaa_proc = MED_GET_DATA; - mtaap->mtaa_inproc = xdr_med_args_t; - mtaap->mtaa_in = (caddr_t)argsp; - mtaap->mtaa_outproc = xdr_med_get_data_res_t; - mtaap->mtaa_out = (caddr_t)resp; - mtaap->mtaa_timout = (struct timeval *)&md_med_def_timeout; -} - -static void -mtaa_get_free(med_thr_a_args_t *mtaap) -{ - /*LINTED*/ - med_args_t *argsp = (med_args_t *)mtaap->mtaa_in; - /*LINTED*/ - med_get_data_res_t *resp = (med_get_data_res_t *)mtaap->mtaa_out; - - freestr(argsp->med.med_caller); - freestr(argsp->med.med_setname); - kmem_free(argsp, sizeof (med_args_t)); - - if (mtaap->mtaa_flags & MDT_A_OK) - xdr_free(mtaap->mtaa_outproc, mtaap->mtaa_out); - - kmem_free(resp, sizeof (med_get_data_res_t)); -} - -static int -mtaa_get_err(med_thr_a_args_t *mtaap) -{ - /*LINTED*/ - med_get_data_res_t *resp = (med_get_data_res_t *)mtaap->mtaa_out; - - if (resp->med_status.med_errno == MDE_MED_NOERROR) { - MAJOR(("get_med_host_data - %s - OK\n\n", mtaap->mtaa_h_nm)); - return (0); - } else { - MAJOR(("get_med_host_data - %s - errno=%d\n\n", - mtaap->mtaa_h_nm, resp->med_status.med_errno)); - return (1); - } -} - -static void -mtha_init( - med_thr_t *mtp, - med_thr_h_args_t *mthap, - md_hi_t *mhp, - char *setname, - med_data_t *meddp, - set_t setno, - void (*mtaa_init_func)(med_thr_a_args_t *, - med_thr_h_args_t *), - int (*mtaa_err_func)(med_thr_a_args_t *) -) -{ - int j; - - mthap->mtha_mag = MTHA_MAGIC; - mthap->mtha_mtp = mtp; - mthap->mtha_mhp = mhp; - mthap->mtha_setname = md_strdup(setname); - if (meddp) - mthap->mtha_meddp = meddp; - else - mthap->mtha_meddp = NULL; - mthap->mtha_setno = setno; - mthap->mtha_a_cnt = mhp->a_cnt; - mthap->mtha_a_nthr = 0; - - mutex_init(&mthap->mtha_a_mx, NULL, MUTEX_DEFAULT, - NULL); - cv_init(&mthap->mtha_a_cv, NULL, CV_DEFAULT, NULL); - - j = MIN(mthap->mtha_a_cnt, MAX_HOST_ADDRS) - 1; - for (; j >= 0; j--) { - (*mtaa_init_func)(&mthap->mtha_a_args[j], mthap); - mthap->mtha_a_args[j].mtaa_h_nm = mhp->a_nm[j]; - mthap->mtha_a_args[j].mtaa_h_ip = mhp->a_ip[j]; - mthap->mtha_a_args[j].mtaa_h_flags = mhp->a_flg; - mthap->mtha_a_args[j].mtaa_err_func = mtaa_err_func; - } -} - -static void -mtha_free( - med_thr_h_args_t *mthap, - void (*mtaa_free_func)(med_thr_a_args_t *) -) -{ - int j; - - freestr(mthap->mtha_setname); - - j = MIN(mthap->mtha_a_cnt, MAX_HOST_ADDRS) - 1; - for (; j >= 0; j--) - (*mtaa_free_func)(&mthap->mtha_a_args[j]); - - mutex_destroy(&mthap->mtha_a_mx); - cv_destroy(&mthap->mtha_a_cv); -} - -static void -med_a_thr(med_thr_a_args_t *mtaap) -{ - callb_cpr_t cprinfo; - - /* - * Register cpr callback - */ - CALLB_CPR_INIT(&cprinfo, &mtaap->mtaa_mthap->mtha_a_mx, - callb_generic_cpr, "med_a_thr"); - - mutex_enter(&mtaap->mtaa_mthap->mtha_a_mx); - if (mtaap->mtaa_mthap->mtha_flags & MDT_H_OK) - goto done; - - mutex_exit(&mtaap->mtaa_mthap->mtha_a_mx); - - mtaap->mtaa_err = med_net_callrpc( - mtaap->mtaa_h_nm, mtaap->mtaa_h_ip, mtaap->mtaa_h_flags, - mtaap->mtaa_prog, mtaap->mtaa_vers, mtaap->mtaa_proc, - mtaap->mtaa_inproc, mtaap->mtaa_in, - mtaap->mtaa_outproc, mtaap->mtaa_out, - mtaap->mtaa_timout); - - mutex_enter(&mtaap->mtaa_mthap->mtha_a_mx); - - if (mtaap->mtaa_err) { - MAJOR(("med_net_callrpc(%u, %u, %u) - %s - failed\n\n", - mtaap->mtaa_prog, mtaap->mtaa_vers, mtaap->mtaa_proc, - mtaap->mtaa_h_nm)); - xdr_free(mtaap->mtaa_outproc, mtaap->mtaa_out); - } else { - if ((*mtaap->mtaa_err_func)(mtaap) == 0) { - if (! (mtaap->mtaa_mthap->mtha_flags & MDT_H_OK)) { - mtaap->mtaa_mthap->mtha_flags |= MDT_H_OK; - mtaap->mtaa_flags |= MDT_A_OK; - } else - xdr_free(mtaap->mtaa_outproc, mtaap->mtaa_out); - } else - xdr_free(mtaap->mtaa_outproc, mtaap->mtaa_out); - } - -done: - mtaap->mtaa_mthap->mtha_a_nthr--; - cv_signal(&mtaap->mtaa_mthap->mtha_a_cv); - - /* - * CALLB_CPR_EXIT will do mutex_exit(&mtaap->mtaa_mthap->mtha_a_mx) - */ - CALLB_CPR_EXIT(&cprinfo); - thread_exit(); -} - -static void -med_h_thr(med_thr_h_args_t *mthap) -{ - int j; - callb_cpr_t cprinfo; - - /* - * Register cpr callback - */ - CALLB_CPR_INIT(&cprinfo, &mthap->mtha_mtp->mt_mx, callb_generic_cpr, - "med_a_thr"); - /* - * Lock mthap->mtha_mtp->mt_mx is held early to avoid releasing the - * locks out of order. - */ - mutex_enter(&mthap->mtha_mtp->mt_mx); - mutex_enter(&mthap->mtha_a_mx); - - j = MIN(mthap->mtha_a_cnt, MAX_HOST_ADDRS) - 1; - for (; j >= 0; j--) { - (void) thread_create(NULL, 0, med_a_thr, - &mthap->mtha_a_args[j], 0, &p0, TS_RUN, minclsyspri); - mthap->mtha_a_nthr++; - } - - /* - * cpr safe to suspend while waiting for other threads - */ - CALLB_CPR_SAFE_BEGIN(&cprinfo); - while (mthap->mtha_a_nthr > 0) - cv_wait(&mthap->mtha_a_cv, &mthap->mtha_a_mx); - mutex_exit(&mthap->mtha_a_mx); - CALLB_CPR_SAFE_END(&cprinfo, &mthap->mtha_mtp->mt_mx); - - - mthap->mtha_mtp->mt_nthr--; - cv_signal(&mthap->mtha_mtp->mt_cv); - - /* - * set up cpr exit - * CALLB_CPR_EXIT will do mutex_exit(&mtaap->mta_mtp->mt_mx) - */ - CALLB_CPR_EXIT(&cprinfo); - thread_exit(); -} - -static med_get_data_res_t * -mtaa_get_resp(med_thr_h_args_t *mthap) -{ - med_thr_a_args_t *mtaap; - int j; - - j = MIN(mthap->mtha_a_cnt, MAX_HOST_ADDRS) - 1; - for (; j >= 0; j--) { - mtaap = &mthap->mtha_a_args[j]; - if (mtaap->mtaa_flags & MDT_A_OK) - /*LINTED*/ - return ((med_get_data_res_t *)mtaap->mtaa_out); - } - return ((med_get_data_res_t *)NULL); -} - -/* - * Public Functions - */ - -/* - * initializes med structs, locks, etc - */ -void -med_init(void) -{ - int uapi; - - TRIVIA(("[med_init")); - - for (uapi = 0; uapi < med_addr_tab_nents; uapi++) { - struct med_addr *uap = &med_addr_tab[uapi]; - - /* If the protocol is skipped, the mutex is not needed either */ - if (md_med_trans_lst != NULL && - strstr(md_med_trans_lst, uap->ua_kn.knc_proto) == NULL && - strstr(md_med_trans_lst, uap->ua_netid) == NULL) { - uap->ua_flags |= UAFLG_SKIP; - continue; - } - - mutex_init(&uap->ua_mutex, NULL, MUTEX_DEFAULT, NULL); - uap->ua_flags |= UAFLG_LOCKINIT; - bzero((caddr_t)&uap->ua_kn.knc_unused, - sizeof (uap->ua_kn.knc_unused)); - } - - TRIVIA(("]\n")); -} - -/* - * free any med structs, locks, etc - */ -void -med_fini(void) -{ - int uapi; - - TRIVIA(("[med_fini")); - - for (uapi = 0; uapi < med_addr_tab_nents; uapi++) { - struct med_addr *uap = &med_addr_tab[uapi]; - - if (uap->ua_flags & UAFLG_LOCKINIT) { - mutex_destroy(&uap->ua_mutex); - uap->ua_flags &= ~UAFLG_LOCKINIT; - } - } - - TRIVIA(("]\n")); -} - -/* - * Update all the mediators - */ -int -upd_med_hosts( - md_hi_arr_t *mp, - char *setname, - med_data_t *meddp, - char *caller -) -{ - med_thr_t *mtp; - med_thr_h_args_t *mthap; - int i; - int medok = 0; - - MAJOR(("upd_med_hosts - called from <%s>\n", NULLSTR(caller))); - - /* No mediators, were done */ - if (mp->n_cnt == 0) - return (0); - - mtp = kmem_zalloc(sizeof (med_thr_t), KM_SLEEP); - ASSERT(mtp != NULL); - - mutex_init(&mtp->mt_mx, NULL, MUTEX_DEFAULT, NULL); - cv_init(&mtp->mt_cv, NULL, CV_DEFAULT, NULL); - mtp->mt_mag = MTH_MAGIC; - - mutex_enter(&mtp->mt_mx); - - mtp->mt_nthr = 0; - - /* Loop through our list of mediator hosts, start a thread per host */ - for (i = 0; i < md_nmedh; i++) { - - if (mp->n_lst[i].a_cnt == 0) - continue; - - mtp->mt_h_args[i] = kmem_zalloc(sizeof (med_thr_h_args_t), - KM_SLEEP); - mthap = mtp->mt_h_args[i]; - ASSERT(mthap != NULL); - mtha_init(mtp, mthap, &mp->n_lst[i], setname, meddp, - meddp->med_dat_sn, mtaa_upd_init, mtaa_upd_err); - - MAJOR(("upd_med_hosts - updating %s\n", - NULLSTR(mp->n_lst[i].a_nm[0]))); - - (void) thread_create(NULL, 0, med_h_thr, mthap, 0, &p0, - TS_RUN, minclsyspri); - - mtp->mt_nthr++; - } - - while (mtp->mt_nthr > 0) - cv_wait(&mtp->mt_cv, &mtp->mt_mx); - - mutex_exit(&mtp->mt_mx); - - for (i = 0; i < md_nmedh; i++) { - mthap = mtp->mt_h_args[i]; - if (mthap != NULL) { - if (mthap->mtha_flags & MDT_H_OK) - medok++; - mtha_free(mthap, mtaa_upd_free); - kmem_free(mthap, sizeof (med_thr_h_args_t)); - } - } - - mutex_destroy(&mtp->mt_mx); - cv_destroy(&mtp->mt_cv); - - kmem_free(mtp, sizeof (med_thr_t)); - - return (medok); -} - -/* - * Get the mediator data. - */ -med_data_lst_t * -get_med_host_data( - md_hi_arr_t *mp, - char *setname, - set_t setno -) -{ - med_thr_t *mtp; - med_thr_h_args_t *mthap; - med_get_data_res_t *resp; - med_data_lst_t *retval = NULL; - int i; - - /* No mediators, were done */ - if (mp->n_cnt == 0) - return (NULL); - - mtp = kmem_zalloc(sizeof (med_thr_t), KM_SLEEP); - ASSERT(mtp != NULL); - - mutex_init(&mtp->mt_mx, NULL, MUTEX_DEFAULT, NULL); - cv_init(&mtp->mt_cv, NULL, CV_DEFAULT, NULL); - - mutex_enter(&mtp->mt_mx); - - mtp->mt_nthr = 0; - - /* Loop through our list of mediator hosts, start a thread per host */ - for (i = 0; i < md_nmedh; i++) { - - if (mp->n_lst[i].a_cnt == 0) - continue; - - mtp->mt_h_args[i] = kmem_zalloc(sizeof (med_thr_h_args_t), - KM_SLEEP); - mthap = mtp->mt_h_args[i]; - ASSERT(mthap != NULL); - mtha_init(mtp, mthap, &mp->n_lst[i], setname, NULL, setno, - mtaa_get_init, mtaa_get_err); - - MAJOR(("get_med_host_data from %s\n", - NULLSTR(mp->n_lst[i].a_nm[0]))); - - (void) thread_create(NULL, 0, med_h_thr, mthap, 0, &p0, - TS_RUN, minclsyspri); - - mtp->mt_nthr++; - } - - while (mtp->mt_nthr > 0) - cv_wait(&mtp->mt_cv, &mtp->mt_mx); - - mutex_exit(&mtp->mt_mx); - - for (i = 0; i < md_nmedh; i++) { - mthap = mtp->mt_h_args[i]; - if (mthap != NULL) { - if (mthap->mtha_flags & MDT_H_OK) { - resp = mtaa_get_resp(mthap); - ASSERT(resp != NULL); - - if (med_ok(setno, &resp->med_data)) - med_adl(&retval, &resp->med_data); - } - mtha_free(mthap, mtaa_get_free); - kmem_free(mthap, sizeof (med_thr_h_args_t)); - } - } - - mutex_destroy(&mtp->mt_mx); - cv_destroy(&mtp->mt_cv); - - kmem_free(mtp, sizeof (med_thr_t)); - - return (retval); -} - -int -med_get_t_size_ioctl(mddb_med_t_parm_t *tpp, int mode) -{ - md_error_t *ep = &tpp->med_tp_mde; - - mdclrerror(ep); - - if ((mode & FREAD) == 0) - return (mdsyserror(ep, EACCES)); - - tpp->med_tp_nents = med_addr_tab_nents; - tpp->med_tp_setup = md_med_transdevs_set; - - return (0); -} - -int -med_get_t_ioctl(mddb_med_t_parm_t *tpp, int mode) -{ - md_error_t *ep = &tpp->med_tp_mde; - int uapi = 0; - - mdclrerror(ep); - - if ((mode & FREAD) == 0) - return (mdsyserror(ep, EACCES)); - - for (uapi = 0; uapi < med_addr_tab_nents; uapi++) { - struct med_addr *uap = &med_addr_tab[uapi]; - - (void) strncpy(tpp->med_tp_ents[uapi].med_te_nm, - uap->ua_devname, MED_TE_NM_LEN); - tpp->med_tp_ents[uapi].med_te_dev = - (md_dev64_t)uap->ua_kn.knc_rdev; - } - - tpp->med_tp_nents = med_addr_tab_nents; - - return (0); -} - -int -med_set_t_ioctl(mddb_med_t_parm_t *tpp, int mode) -{ - md_error_t *ep = &tpp->med_tp_mde; - int uapi = 0; - - mdclrerror(ep); - - if ((mode & FWRITE) == 0) - return (mdsyserror(ep, EACCES)); - - for (uapi = 0; uapi < med_addr_tab_nents; uapi++) { - struct med_addr *uap = &med_addr_tab[uapi]; - - mutex_enter(&uap->ua_mutex); - uap->ua_kn.knc_rdev = md_dev64_to_dev( - tpp->med_tp_ents[uapi].med_te_dev); - mutex_exit(&uap->ua_mutex); - } - - md_med_transdevs_set = 1; - - return (0); -} diff --git a/usr/src/uts/common/io/lvm/md/md_names.c b/usr/src/uts/common/io/lvm/md/md_names.c deleted file mode 100644 index 8a716e5730f4..000000000000 --- a/usr/src/uts/common/io/lvm/md/md_names.c +++ /dev/null @@ -1,4371 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. - */ - -#include -#include -#include -#include -#include - -#define MDDB -#include -#include -#include -#include - -extern md_set_t md_set[]; -extern int *md_nm_snarfed; -void *lookup_entry(struct nm_next_hdr *, set_t, - side_t, mdkey_t, md_dev64_t, int); -void *lookup_shared_entry(struct nm_next_hdr *, - mdkey_t, char *, mddb_recid_t *, int); -static void add_to_devid_list(ddi_devid_t did); -static int devid_is_unique(ddi_devid_t did); -static size_t free_devid_list(int *count); -void md_devid_cleanup(set_t, uint_t); -extern md_krwlock_t nm_lock; - -typedef enum lookup_dev_result { - LOOKUP_DEV_FOUND, /* Found a good record. */ - LOOKUP_DEV_NOMATCH, /* No matching record in DB. */ - LOOKUP_DEV_CONFLICT /* Name conflicts with existing record. */ -} lookup_dev_result_t; - -/* List of SVM module names. */ -static char *meta_names[] = { - "md", - MD_STRIPE, - MD_MIRROR, - MD_TRANS, - MD_HOTSPARES, - MD_RAID, - MD_VERIFY, - MD_SP, - MD_NOTIFY -}; - -#define META_NAME_COUNT (sizeof (meta_names) / sizeof (char *)) - -/* - * Used in translating from the md major name on miniroot to - * md major name on target system. This is only needed during - * upgrade. - */ - -extern major_t md_major, md_major_targ; - -/* - * During upgrade, SVM basically runs with the devt from the target - * being upgraded. Translations are made from the miniroot devt to/from the - * target devt when the devt is to be stored in the SVM metadriver's - * unit structures. - * - * The following routines return a translated (aka miniroot) devt: - * - md_getdevnum - * - the metadriver's get_devs routines (stripe_getdevs, etc.) - * - * By the same token, the major number and major name conversion operations - * need to use the name_to_major file from the target system instead - * of the name_to_major file on the miniroot. So, calls to - * ddi_name_to_major must be replaced with calls to md_targ_name_to_major - * when running on an upgrade. Same is true with calls to - * ddi_major_to_name. - */ - -static mdkey_t -create_key(struct nm_next_hdr *nh) -{ - mdkey_t retval; - struct nm_rec_hdr *rh = (struct nm_rec_hdr *)nh->nmn_record; - - retval = rh->r_next_key; - /* increment the next_key, keeps them unique */ - rh->r_next_key++; - - return (retval); -} - -static int -unused_key(struct nm_next_hdr *nh, int shared, mdkey_t key) -{ - mdkey_t min_value; - int nmspace; - - if (shared & NM_DEVID) { - min_value = 1; - nmspace = NM_DEVID; - } else { - min_value = ((shared & NM_SHARED) ? MDDB_FIRST_MODID : 1); - nmspace = 0; - } - - /* Just say no if the key passed in is less than the initial */ - if (key < min_value) - return (0); - - if ((shared & NM_SHARED) && (lookup_shared_entry(nh, key, (char *)0, - NULL, nmspace) != NULL)) - return (0); - - /* - * The set num in lookup_entry is not used in this case - * we dont keep track of the nonshared in the devid nmspace - */ - if (!(shared & NM_NOTSHARED) && - (lookup_entry(nh, 0, -1, key, NODEV64, 0L) != NULL)) - return (0); - - return (1); -} - -static void -destroy_key(struct nm_next_hdr *nh, int shared, mdkey_t key) -{ - struct nm_rec_hdr *rh = (struct nm_rec_hdr *)nh->nmn_record; - - if ((key + 1) != rh->r_next_key) - return; - - /* - * Here the key is the highest allocated key. - * Check to see if this key is recycled or not and if yes, - * then keep r_next_key intact. - */ - if (shared & NM_KEY_RECYCLE) - return; - - while (unused_key(nh, shared, key)) - key--; - rh->r_next_key = key + 1; -} - -static void -cleanup_unused_rec(set_t setno, int devid_nm) -{ - mddb_recid_t recid; - mddb_type_t hdr, shr, notshr; - - hdr = ((devid_nm & NM_DEVID) ? MDDB_DID_NM_HDR : MDDB_NM_HDR); - notshr = ((devid_nm & NM_DEVID) ? MDDB_DID_NM : MDDB_NM); - shr = ((devid_nm & NM_DEVID) ? MDDB_DID_SHR_NM : MDDB_SHR_NM); - - recid = mddb_makerecid(setno, 0); - while ((recid = mddb_getnextrec(recid, hdr, 0)) > 0) - if (! (mddb_getrecprivate(recid) & MD_PRV_GOTIT)) - mddb_setrecprivate(recid, MD_PRV_PENDDEL); - - recid = mddb_makerecid(setno, 0); - while ((recid = mddb_getnextrec(recid, notshr, 0)) > 0) - if (! (mddb_getrecprivate(recid) & MD_PRV_GOTIT)) - mddb_setrecprivate(recid, MD_PRV_PENDDEL); - - recid = mddb_makerecid(setno, 0); - while ((recid = mddb_getnextrec(recid, shr, 0)) > 0) - if (! (mddb_getrecprivate(recid) & MD_PRV_GOTIT)) - mddb_setrecprivate(recid, MD_PRV_PENDDEL); -} - -static int -create_hdr(set_t setno, int shared) -{ - struct nm_header_hdr *hhdr; - mddb_recid_t nmid; - - - if (shared & NM_DEVID) { - /* - * Deal with the device id name space - */ - nmid = md_set[setno].s_did_nmid = - mddb_createrec(sizeof (struct nm_header), - MDDB_DID_NM_HDR, 1, MD_CRO_32BIT, setno); - /* - * Out of space - */ - if (nmid < 0) - return (nmid); - } else { - nmid = md_set[setno].s_nmid = - mddb_createrec(sizeof (struct nm_header), - MDDB_NM_HDR, 1, MD_CRO_32BIT, setno); - /* - * Out of space - */ - if (nmid < 0) - return (nmid); - } - - hhdr = kmem_zalloc(sizeof (*hhdr), KM_SLEEP); - - if (shared & NM_DEVID) { - md_set[setno].s_did_nm = hhdr; - } else { - md_set[setno].s_nm = hhdr; - } - - hhdr->hh_header = (struct nm_header *)mddb_getrecaddr(nmid); - hhdr->hh_names.nmn_record = &(hhdr->hh_header->h_names); - hhdr->hh_shared.nmn_record = &(hhdr->hh_header->h_shared); - - /* - * h_names.r_next_key is set to zero in devid nmspace - * since we dont keep track of it - */ - if (shared & NM_DEVID) { - hhdr->hh_header->h_names.r_next_key = 0; - hhdr->hh_header->h_shared.r_next_key = 1; - } else { - hhdr->hh_header->h_names.r_next_key = 1; - hhdr->hh_header->h_shared.r_next_key = MDDB_FIRST_MODID; - } - - mddb_commitrec_wrapper(nmid); - return (0); -} - -static int -create_record( - mddb_recid_t p_recid, /* parent recid */ - struct nm_next_hdr *nh, /* parent record header */ - int shared, - size_t needed_space) -{ - struct nm_rec_hdr *rh = (struct nm_rec_hdr *)nh->nmn_record; - struct nm_next_hdr *new_nh; - mddb_type_t rec_type; - size_t used_size; - size_t alloc_size; - mddb_recid_t recids[3]; - set_t setno; - mddb_recid_t new_id; - - setno = mddb_getsetnum(p_recid); - - if (shared & NM_DEVID) { - /* - * Device id name space - */ - rec_type = ((shared & NM_SHARED) ? - MDDB_DID_SHR_NM : MDDB_DID_NM); - used_size = ((shared & NM_SHARED) ? - (sizeof (struct devid_shr_rec) - - sizeof (struct did_shr_name)) : - (sizeof (struct devid_min_rec) - - sizeof (struct did_min_name))); - alloc_size = ((shared & NM_SHARED) ? - NM_DID_ALLOC_SIZE : NM_ALLOC_SIZE); - } else { - rec_type = ((shared & NM_SHARED) ? - MDDB_SHR_NM : MDDB_NM); - used_size = ((shared & NM_SHARED) ? - (sizeof (struct nm_shr_rec) - - sizeof (struct nm_shared_name)) : - (sizeof (struct nm_rec) - sizeof (struct nm_name))); - alloc_size = NM_ALLOC_SIZE; - } - - used_size += needed_space; - - new_id = mddb_createrec((size_t)alloc_size, rec_type, 1, - MD_CRO_32BIT, setno); - if (new_id < 0) - return (new_id); - - recids[0] = rh->r_next_recid = new_id; - recids[1] = p_recid; - recids[2] = 0; - - new_nh = (struct nm_next_hdr *)kmem_zalloc(sizeof (*new_nh), KM_SLEEP); - nh->nmn_nextp = new_nh; - new_nh->nmn_record = mddb_getrecaddr(rh->r_next_recid); - - ((struct nm_rec_hdr *)new_nh->nmn_record)->r_alloc_size = alloc_size; - ((struct nm_rec_hdr *)new_nh->nmn_record)->r_used_size = - (uint_t)used_size; - - mddb_commitrecs_wrapper(recids); - return (0); -} - -static int -expand_record( - struct nm_next_hdr *parent_nh, /* parent record header */ - mddb_recid_t parent_recid, /* parent record id */ - struct nm_next_hdr *nh, /* record hdr to be expanded */ - int shared) /* boolean - shared or not */ -{ - struct nm_rec_hdr *rh = (struct nm_rec_hdr *)nh->nmn_record; - struct nm_rec_hdr *parent_rh = (struct nm_rec_hdr *) - parent_nh->nmn_record; - struct nm_rec_hdr *new_rh; - void *new_rec; - mddb_recid_t new_id; - mddb_recid_t old_id; - mddb_recid_t recids[3]; - set_t setno; - mddb_type_t rec_type; - size_t alloc_size; - - setno = mddb_getsetnum(parent_recid); - - if (shared & NM_DEVID) { - /* - * Device id name space - */ - rec_type = ((shared & NM_SHARED) ? - MDDB_DID_SHR_NM : MDDB_DID_NM); - alloc_size = ((shared & NM_SHARED) ? - NM_DID_ALLOC_SIZE : NM_ALLOC_SIZE); - } else { - rec_type = ((shared & NM_SHARED) ? MDDB_SHR_NM : MDDB_NM); - alloc_size = NM_ALLOC_SIZE; - } - - new_id = mddb_createrec((size_t)rh->r_alloc_size + alloc_size, rec_type, - 1, MD_CRO_32BIT, setno); - /* - * No space - */ - if (new_id < 0) - return (new_id); - - new_rec = mddb_getrecaddr(new_id); - (void) bcopy(rh, new_rec, rh->r_alloc_size); - - recids[0] = parent_recid; - recids[1] = new_id; - recids[2] = 0; - - /* Fix up rec hdr to point at this new record */ - nh->nmn_record = new_rec; - old_id = parent_rh->r_next_recid; - parent_rh->r_next_recid = new_id; - - if (shared & NM_DEVID) - /* - * Device id name space - */ - new_rh = ((shared & NM_SHARED) ? - &((struct devid_shr_rec *)new_rec)->did_rec_hdr : - &((struct devid_min_rec *)new_rec)->min_rec_hdr); - else - new_rh = ((shared & NM_SHARED) ? - &((struct nm_shr_rec *)new_rec)->sr_rec_hdr : - &((struct nm_rec *)new_rec)->r_rec_hdr); - - new_rh->r_alloc_size += alloc_size; - if (!(shared & NM_NOCOMMIT)) - mddb_commitrecs_wrapper(recids); - - /* delete the old record */ - mddb_deleterec_wrapper(old_id); - - return (0); -} - -struct nm_next_hdr * -get_first_record(set_t setno, int alloc, int shared) -{ - struct nm_next_hdr *nh; - mddb_recid_t nmid; - - ASSERT(md_get_setstatus(setno) & MD_SET_NM_LOADED); - - if (shared & NM_DEVID) { - /* - * We are dealing with the device id name space. - * If set is a MN diskset, just return 0 since - * devids aren't yet supported in MN disksets. - */ - if (MD_MNSET_SETNO(setno)) - return ((struct nm_next_hdr *)0); - if (md_set[setno].s_did_nm == NULL) - if (create_hdr(setno, shared) < 0) - return ((struct nm_next_hdr *)0); - - nh = ((shared & NM_SHARED) ? - &((struct nm_header_hdr *)md_set[setno].s_did_nm)->hh_shared - : - &((struct nm_header_hdr *) - md_set[setno].s_did_nm)->hh_names); - - nmid = md_set[setno].s_did_nmid; - } else { - /* - * We are dealing with the regular one (non-devid) - */ - if (md_set[setno].s_nm == NULL) - if (create_hdr(setno, shared) < 0) - return ((struct nm_next_hdr *)0); - - nh = ((shared & NM_SHARED) ? - &((struct nm_header_hdr *)md_set[setno].s_nm)->hh_shared - : - &((struct nm_header_hdr *)md_set[setno].s_nm)->hh_names); - - nmid = md_set[setno].s_nmid; - } - - /* - * Name space exists - */ - if (nh->nmn_nextp != NULL) - return (nh); - - /* - * If name space is expected and is empty - */ - if (! alloc) - return ((struct nm_next_hdr *)0); - - /* - * Empty is okay alloc it - */ - if (create_record(nmid, nh, shared, 0L) < 0) - return ((struct nm_next_hdr *)0); - - return (nh); -} - - -void * -alloc_entry( - struct nm_next_hdr *nh, /* parent name header */ - mddb_recid_t recid, /* parent record id */ - size_t len, /* length of entry */ - int shared, /* shared boolean */ - mddb_recid_t *id) /* return of new record id */ -{ - struct nm_rec_hdr *rh; /* parent */ - mddb_recid_t this_recid; - struct nm_next_hdr *this_nh; - struct nm_rec_hdr *this_rh; - void *this_rec; - size_t needed_space; - char *name; - - if (shared & NM_DEVID) - /* - * Device id name space - */ - needed_space = ((shared & NM_SHARED) ? - sizeof (struct did_shr_name) : - sizeof (struct did_min_name)) + len - 1; - else - needed_space = ((shared & NM_SHARED) ? - sizeof (struct nm_shared_name) : - sizeof (struct nm_name)) + len - 1; - - needed_space = roundup(needed_space, sizeof (uint_t)); - - /* check the next record to see if it has space */ - /*CONSTCOND*/ - while (1) { - while ((this_nh = nh->nmn_nextp) != NULL) { - - rh = (struct nm_rec_hdr *)nh->nmn_record; - this_recid = rh->r_next_recid; - this_rec = this_nh->nmn_record; - - if (shared & NM_DEVID) - this_rh = ((shared & NM_SHARED) ? - &((struct devid_shr_rec *) - this_rec)->did_rec_hdr : - &((struct devid_min_rec *) - this_rec)->min_rec_hdr); - else - this_rh = ((shared & NM_SHARED) ? - &((struct nm_shr_rec *) - this_rec)->sr_rec_hdr : - &((struct nm_rec *)this_rec)->r_rec_hdr); - - /* check for space in this record */ - if ((this_rh->r_alloc_size - this_rh->r_used_size) >= - needed_space) { - /* allocate space in this record */ - name = (char *)this_rec + this_rh->r_used_size; - this_rh->r_used_size += (uint_t)needed_space; - if (!(shared & NM_NOCOMMIT)) - mddb_commitrec_wrapper(this_recid); - *id = this_recid; - return ((caddr_t)name); - } - - /* if we can expand the record we look again */ - if (expand_record(nh, recid, this_nh, shared) == 0) - continue; - - /* advance parent to this record, and go try next */ - recid = this_recid; - nh = this_nh; - } - - /* no space, try creating a new record after parent */ - if (create_record(recid, nh, shared, 0L) < 0) - return ((caddr_t)0); - } /* go check the new record */ - /* can't get here, but lint seems to think so */ - /* NOTREACHED */ -} - -static void * -get_next_entry( - struct nm_next_hdr *nh, - caddr_t ent, - size_t ent_size, - size_t *off) -{ - - if (((struct nm_rec_hdr *)nh->nmn_record)->r_used_size <= - (*off + ent_size)) { - if (nh->nmn_nextp == NULL) - return ((caddr_t)0); - - /* offset == 0, means go to next record */ - *off = 0; - return ((caddr_t)0); - } - - *off += ent_size; - return ((caddr_t)((char *)ent + ent_size)); -} - -static int -rem_entry( - struct nm_next_hdr *nh, /* record header for entry being removed */ - mddb_recid_t id, /* record id for entry being removed */ - void *ent, /* address of entry to be removed */ - size_t ent_size, /* size of entry to be removed */ - size_t offset, /* offset of entry within record */ - int devid_nm) /* bitwise of NM_DEVID, NM_KEY_RECYCLE */ -{ - struct nm_next_hdr *first_nh; - mddb_recid_t recids[3]; - size_t c = ((struct nm_rec_hdr *) - nh->nmn_record)->r_used_size - offset - - ent_size; - set_t setno; - mdkey_t ent_key; - - - setno = mddb_getsetnum(id); - first_nh = get_first_record(setno, 0, devid_nm | NM_NOTSHARED); - ASSERT(first_nh != NULL); - - recids[0] = id; - recids[1] = ((devid_nm & NM_DEVID) ? md_set[setno].s_did_nmid : - md_set[setno].s_nmid); - recids[2] = 0; - ent_key = ((devid_nm & NM_DEVID) ? - ((struct did_min_name *)ent)->min_key : - ((struct nm_name *)ent)->n_key); - - if (c == 0) - (void) bzero(ent, ent_size); /* last entry */ - else { - (void) ovbcopy((caddr_t)ent+ent_size, ent, c); - (void) bzero((caddr_t)ent+c, ent_size); - } - - ((struct nm_rec_hdr *)nh->nmn_record)->r_used_size -= (uint_t)ent_size; - - /* - * We don't keep track of keys in the device id nonshared namespace - */ - if (!(devid_nm & NM_DEVID)) - destroy_key(first_nh, devid_nm | NM_NOTSHARED, ent_key); - - mddb_commitrecs_wrapper(recids); - return (0); -} - -static int -rem_shr_entry( - struct nm_next_hdr *nh, /* record header for entry being removed */ - mddb_recid_t id, /* record id for entry being removed */ - void *ent, /* address of entry to be removed */ - size_t ent_size, /* size of entry to be removed */ - size_t offset, /* offset of entry within record */ - int devid_nm) /* bitwise of NM_DEVID, NM_IMP_SHARED, */ - /* NM_NOCOMMIT, NM_KEY_RECYCLE */ -{ - struct nm_next_hdr *first_nh; - mddb_recid_t recids[3]; - size_t c = ((struct nm_rec_hdr *) - nh->nmn_record)->r_used_size - offset - - ent_size; - set_t setno; - uint_t count; - - setno = mddb_getsetnum(id); - first_nh = get_first_record(setno, 0, devid_nm | NM_SHARED); - ASSERT(first_nh != NULL); - - recids[0] = id; - recids[1] = ((devid_nm & NM_DEVID) ? md_set[setno].s_did_nmid : - md_set[setno].s_nmid); - recids[2] = 0; - - if (devid_nm & NM_DEVID) { - count = --((struct did_shr_name *)ent)->did_count; - } else { - count = --((struct nm_shared_name *)ent)->sn_count; - } - - if (count == 0 || devid_nm & NM_IMP_SHARED) { - mdkey_t ent_key; - - ent_key = ((devid_nm & NM_DEVID) ? - ((struct did_shr_name *)ent)->did_key : - ((struct nm_shared_name *)ent)->sn_key); - - if (c == 0) - (void) bzero(ent, ent_size); /* last entry */ - else { - (void) ovbcopy((caddr_t)ent+ent_size, ent, c); - (void) bzero((caddr_t)ent+c, ent_size); - } - - ((struct nm_rec_hdr *)nh->nmn_record)->r_used_size -= - (uint_t)ent_size; - destroy_key(first_nh, devid_nm | NM_SHARED, ent_key); - } - - if (!(devid_nm & NM_NOCOMMIT)) - mddb_commitrecs_wrapper(recids); - return (0); -} - -static mdkey_t -setshared_name(set_t setno, char *shrname, mdkey_t shrkey, int devid_nm) -{ - struct nm_next_hdr *nh; - struct nm_shared_name *shn; - struct did_shr_name *did_shn = (struct did_shr_name *)NULL; - mddb_recid_t recid; - mddb_recid_t recids[3]; - size_t len; - mdkey_t key; - int shared = NM_SHARED; - - - if (shrkey == MD_KEYWILD) { - len = ((devid_nm & NM_DEVID) ? - ddi_devid_sizeof((ddi_devid_t)shrname) : - (strlen(shrname) + 1)); - } - /* - * If devid_nm is not NULL, nh will point to the did name space - */ - if (devid_nm & NM_NOCOMMIT) { - if ((nh = get_first_record(setno, 0, devid_nm | NM_SHARED)) - == NULL) - return (MD_KEYBAD); - } else { - if ((nh = get_first_record(setno, 1, devid_nm | NM_SHARED)) - == NULL) - return (MD_KEYBAD); - } - if (devid_nm & NM_NOCOMMIT) - shared = NM_NOCOMMIT | shared; - if (devid_nm & NM_DEVID) { - /* - * A key has been supplied so find the corresponding entry - * which must exist. - */ - if (shrkey != MD_KEYWILD) { - did_shn = (struct did_shr_name *)lookup_shared_entry(nh, - shrkey, NULL, &recid, devid_nm); - if (did_shn == (struct did_shr_name *)NULL) - return (MD_KEYBAD); - } else { - did_shn = (struct did_shr_name *)lookup_shared_entry(nh, - 0, shrname, &recid, devid_nm); - } - if (did_shn != (struct did_shr_name *)NULL) { - did_shn->did_count++; - if (!(devid_nm & NM_NOCOMMIT)) - mddb_commitrec_wrapper(recid); - return (did_shn->did_key); - } - - - /* allocate an entry and fill it in */ - if ((did_shn = (struct did_shr_name *)alloc_entry(nh, - md_set[setno].s_did_nmid, len, shared | NM_DEVID, - &recid)) == NULL) - return (MD_KEYBAD); - did_shn->did_key = create_key(nh); - did_shn->did_count = 1; - did_shn->did_size = (ushort_t)len; - /* - * Let the whole world know it is valid devid - */ - did_shn->did_data = NM_DEVID_VALID; - bcopy((void *)shrname, (void *)did_shn->did_devid, len); - key = did_shn->did_key; - } else { - if ((shn = (struct nm_shared_name *)lookup_shared_entry(nh, - 0, shrname, &recid, 0L)) != NULL) { - /* Increment reference count */ - shn->sn_count++; - if (!(devid_nm & NM_NOCOMMIT)) - mddb_commitrec_wrapper(recid); - return (shn->sn_key); - } - - /* allocate an entry and fill it in */ - if ((shn = (struct nm_shared_name *)alloc_entry(nh, - md_set[setno].s_nmid, len, shared, &recid)) == NULL) - return (MD_KEYBAD); - shn->sn_key = create_key(nh); - shn->sn_count = 1; - shn->sn_namlen = (ushort_t)len; - (void) strcpy(shn->sn_name, shrname); - key = shn->sn_key; - } - - recids[0] = recid; - recids[1] = ((devid_nm & NM_DEVID) ? md_set[setno].s_did_nmid : - md_set[setno].s_nmid); - recids[2] = 0; - - if (!(devid_nm & NM_NOCOMMIT)) - mddb_commitrecs_wrapper(recids); - return (key); -} - -void * -getshared_name(set_t setno, mdkey_t shrkey, int devid_nm) -{ - char *shn; - struct nm_next_hdr *nh; - mddb_recid_t recid; - - if ((nh = get_first_record(setno, 0, devid_nm | NM_SHARED)) == NULL) - return ((void *)0); - - shn = (char *)((devid_nm & NM_DEVID) ? - lookup_shared_entry(nh, shrkey, (char *)0, &recid, devid_nm) : - lookup_shared_entry(nh, shrkey, (char *)0, &recid, 0L)); - - if (shn == NULL) - return ((void *)0); - - return ((void *)((devid_nm & NM_DEVID) ? - ((struct did_shr_name *)shn)->did_devid : - ((struct nm_shared_name *)shn)->sn_name)); -} - -static mdkey_t -getshared_key(set_t setno, char *shrname, int devid_nm) -{ - struct nm_next_hdr *nh; - char *shn; - mddb_recid_t recid; - - if ((nh = get_first_record(setno, 1, devid_nm | NM_SHARED)) == NULL) - return (MD_KEYBAD); - - shn = (char *)lookup_shared_entry(nh, 0, shrname, &recid, devid_nm); - - if (shn == NULL) - return (MD_KEYBAD); - - return (((devid_nm & NM_DEVID) ? - ((struct did_shr_name *)shn)->did_key : - ((struct nm_shared_name *)shn)->sn_key)); -} - -static int -setshared_data(set_t setno, mdkey_t shrkey, caddr_t data) -{ - struct nm_shared_name *shn; - struct nm_next_hdr *nh; - mddb_recid_t recid; - - if ((nh = get_first_record(setno, 0, NM_SHARED)) == NULL) - return (ENOENT); - - shn = (struct nm_shared_name *)lookup_shared_entry(nh, shrkey, - (char *)0, &recid, 0L); - if (shn == NULL) - return (ENOENT); - shn->sn_data = (uint32_t)(uintptr_t)data; - return (0); -} - -int -update_entry( - struct nm_next_hdr *nh, /* head record header */ - side_t side, /* (key 1) side number */ - mdkey_t key, /* (key 2) via md_setdevname */ - int devid_nm) /* Which name space? */ -{ - struct nm_rec_hdr *rh = (struct nm_rec_hdr *)nh->nmn_record; - struct nm_next_hdr *this_nh = nh->nmn_nextp; - void *record = this_nh->nmn_record; - mddb_recid_t recid = rh->r_next_recid; - struct nm_rec_hdr *this_rh; - caddr_t n; - size_t offset, n_offset, n_size; - mdkey_t n_key; - side_t n_side; - - n_offset = offset = ((devid_nm & NM_DEVID) ? - (sizeof (struct devid_min_rec) - sizeof (struct did_min_name)) - : - (sizeof (struct nm_rec) - sizeof (struct nm_name))); - - this_rh = ((devid_nm & NM_DEVID) ? - &((struct devid_min_rec *)record)->min_rec_hdr : - &((struct nm_rec *)record)->r_rec_hdr); - - n = ((devid_nm & NM_DEVID) ? - ((caddr_t)&((struct devid_min_rec *)record)->minor_name[0]) : - ((caddr_t)&((struct nm_rec *)record)->r_name[0])); - - /*CONSTCOND*/ - while (1) { - - if (devid_nm & NM_DEVID) { - n_side = ((struct did_min_name *)n)->min_side; - n_key = ((struct did_min_name *)n)->min_key; - n_size = DID_NAMSIZ((struct did_min_name *)n); - - } else { - n_side = ((struct nm_name *)n)->n_side; - n_key = ((struct nm_name *)n)->n_key; - n_size = NAMSIZ((struct nm_name *)n); - } - - if ((side == n_side) && (key == n_key)) { - mddb_commitrec_wrapper(recid); - return (0); - } - - n = (caddr_t)get_next_entry(this_nh, n, n_size, &offset); - - if (n == NULL) { - if (offset) - return (ENOENT); - - /* Go to next record */ - offset = n_offset; - this_nh = this_nh->nmn_nextp; - record = this_nh->nmn_record; - recid = this_rh->r_next_recid; - this_rh = ((devid_nm & NM_DEVID) ? - &((struct devid_min_rec *)record)->min_rec_hdr - : - &((struct nm_rec *)record)->r_rec_hdr); - n = ((devid_nm & NM_DEVID) ? - ((caddr_t)&((struct devid_min_rec *) - record)->minor_name[0]) : - ((caddr_t)&((struct nm_rec *) - record)->r_name[0])); - } - } - /*NOTREACHED*/ -} - -int -remove_entry( - struct nm_next_hdr *nh, /* head record header */ - side_t side, /* (key 1) side number */ - mdkey_t key, /* (key 2) via md_setdevname */ - int devid_nm) /* bitwise of NM_DEVID, NM_KEY_RECYCLE */ -{ - struct nm_rec_hdr *rh = (struct nm_rec_hdr *)nh->nmn_record; - struct nm_next_hdr *this_nh = nh->nmn_nextp; - void *record = this_nh->nmn_record; - mddb_recid_t recid = rh->r_next_recid; - struct nm_rec_hdr *this_rh; - caddr_t n; - size_t offset, n_offset, n_size; - mdkey_t n_key; - side_t n_side; - - n_offset = offset = ((devid_nm & NM_DEVID) ? - (sizeof (struct devid_min_rec) - sizeof (struct did_min_name)) - : - (sizeof (struct nm_rec) - sizeof (struct nm_name))); - - this_rh = ((devid_nm & NM_DEVID) ? - &((struct devid_min_rec *)record)->min_rec_hdr : - &((struct nm_rec *)record)->r_rec_hdr); - - n = ((devid_nm & NM_DEVID) ? - ((caddr_t)&((struct devid_min_rec *)record)->minor_name[0]) : - ((caddr_t)&((struct nm_rec *)record)->r_name[0])); - - /*CONSTCOND*/ - while (1) { - - if (devid_nm & NM_DEVID) { - n_side = ((struct did_min_name *)n)->min_side; - n_key = ((struct did_min_name *)n)->min_key; - n_size = DID_NAMSIZ((struct did_min_name *)n); - } else { - n_side = ((struct nm_name *)n)->n_side; - n_key = ((struct nm_name *)n)->n_key; - n_size = NAMSIZ((struct nm_name *)n); - } - - if ((side == n_side) && (key == n_key)) - return (rem_entry(this_nh, recid, (char *)n, n_size, - offset, devid_nm)); - - n = (caddr_t)get_next_entry(this_nh, n, n_size, &offset); - - if (n == NULL) { - if (offset) - return (ENOENT); - - /* Go to next record */ - offset = n_offset; - this_nh = this_nh->nmn_nextp; - record = this_nh->nmn_record; - recid = this_rh->r_next_recid; - this_rh = ((devid_nm & NM_DEVID) ? - &((struct devid_min_rec *)record)->min_rec_hdr - : - &((struct nm_rec *)record)->r_rec_hdr); - n = ((devid_nm & NM_DEVID) ? - ((caddr_t)&((struct devid_min_rec *) - record)->minor_name[0]) : - ((caddr_t)&((struct nm_rec *) - record)->r_name[0])); - } - } - /*NOTREACHED*/ -} - -int -remove_shared_entry( - struct nm_next_hdr *nh, /* first record header to start lookup */ - mdkey_t key, /* shared key, used as key if nm is NULL */ - char *nm, /* shared name, used as key if non-NULL */ - int devid_nm) /* bitwise of NM_DEVID, NM_IMP_SHARED, */ - /* NM_NOCOMMIT, NM_KEY_RECYCLE */ -{ - struct nm_rec_hdr *rh = (struct nm_rec_hdr *)nh->nmn_record; - struct nm_next_hdr *this_nh = nh->nmn_nextp; - void *record = this_nh->nmn_record; - struct nm_rec_hdr *this_rh; - caddr_t shn; - mddb_recid_t recid = rh->r_next_recid; - size_t offset, shn_offset; - size_t nm_len = 0, shn_size; - mdkey_t shn_key; - ushort_t shn_namlen; - - if (nm == (char *)0) { - /* No name. Search by key only. */ - if (key == MD_KEYBAD) { - /* No key either. Nothing to remove. */ - return (0); - } - } else { - /* How long is the name? */ - nm_len = ((devid_nm & NM_DEVID) ? - ddi_devid_sizeof((ddi_devid_t)nm) : - (strlen(nm) + 1)); - } - - this_rh = ((devid_nm & NM_DEVID) ? - &((struct devid_shr_rec *)record)->did_rec_hdr : - &((struct nm_shr_rec *)record)->sr_rec_hdr); - - shn_offset = offset = ((devid_nm & NM_DEVID) ? - (sizeof (struct devid_shr_rec) - sizeof (struct did_shr_name)) - : - (sizeof (struct nm_shr_rec) - sizeof (struct nm_shared_name))); - - shn = ((devid_nm & NM_DEVID) ? - ((caddr_t)&((struct devid_shr_rec *)record)->device_id[0]) : - ((caddr_t)&((struct nm_shr_rec *)record)->sr_name[0])); - - /*CONSTCOND*/ - while (1) { - - if (devid_nm & NM_DEVID) { - shn_key = ((struct did_shr_name *)shn)->did_key; - shn_namlen = ((struct did_shr_name *)shn)->did_size; - shn_size = DID_SHR_NAMSIZ((struct did_shr_name *)shn); - } else { - shn_key = ((struct nm_shared_name *)shn)->sn_key; - shn_namlen = ((struct nm_shared_name *)shn)->sn_namlen; - shn_size = SHR_NAMSIZ((struct nm_shared_name *)shn); - } - - if ((key != 0) && (key == shn_key)) - return (rem_shr_entry(this_nh, recid, (char *)shn, - shn_size, offset, devid_nm)); - - if (nm_len == shn_namlen) { - if (!(devid_nm & NM_DEVID)) { - if (strcmp(nm, ((struct nm_shared_name *) - shn)->sn_name) == 0) - return (rem_shr_entry(this_nh, recid, - (char *)shn, shn_size, offset, - devid_nm)); - } else { - - if (nm == NULL || - ((struct did_shr_name *)shn)->did_devid - == NULL) { - return (0); - } - if (ddi_devid_compare((ddi_devid_t)nm, - (ddi_devid_t)(((struct did_shr_name *)shn)-> - did_devid)) == 0) - return (rem_shr_entry(this_nh, recid, - (char *)shn, shn_size, offset, - devid_nm)); - } - } - - shn = (caddr_t)get_next_entry(this_nh, - (caddr_t)shn, shn_size, &offset); - - if (shn == (caddr_t)0) { - if (offset) - return (ENOENT); - - /* Go to next record */ - offset = shn_offset; - this_nh = this_nh->nmn_nextp; - record = this_nh->nmn_record; - recid = this_rh->r_next_recid; - this_rh = ((devid_nm & NM_DEVID) ? - &((struct devid_shr_rec *)record)->did_rec_hdr : - &((struct nm_shr_rec *)record)->sr_rec_hdr); - shn = ((devid_nm & NM_DEVID) ? - ((caddr_t)&((struct devid_shr_rec *) - record)->device_id[0]) : - ((caddr_t)&((struct nm_shr_rec *) - record)->sr_name[0])); - } - } - /*NOTREACHED*/ -} - -static md_dev64_t -build_device_number(set_t setno, struct nm_name *n) -{ - major_t maj; - char *shn; - md_dev64_t dev; - - /* - * Can't determine the driver name - */ - if ((shn = (char *)getshared_name(setno, n->n_drv_key, 0L)) == NULL) - return (NODEV64); - - if (MD_UPGRADE) - maj = md_targ_name_to_major(shn); - else - maj = ddi_name_to_major(shn); - - if (maj == (major_t)-1) - return (NODEV64); - dev = md_makedevice(maj, n->n_minor); - - return (dev); -} - -void * -lookup_entry( - struct nm_next_hdr *nh, /* head record header */ - set_t setno, /* set to lookup in */ - side_t side, /* (key 1) side number */ - mdkey_t key, /* (key 2) from md_setdevname */ - md_dev64_t dev, /* (alt. key 2) use if key == KEYWILD */ - int devid_nm /* Which name space? */ -) -{ - struct nm_next_hdr *this_nh = nh->nmn_nextp; - void *record; - struct nm_rec_hdr *this_rh; - caddr_t n; - size_t offset, n_offset, n_size; - side_t n_side; - mdkey_t n_key; - - if ((key == MD_KEYWILD) && (dev == NODEV64)) - return ((void *)0); - - if (this_nh == NULL) - return ((void *)0); - - record = this_nh->nmn_record; - - this_rh = ((devid_nm & NM_DEVID) ? - &((struct devid_min_rec *)record)->min_rec_hdr : - &((struct nm_rec *)record)->r_rec_hdr); - - /* code to see if EMPTY record */ - while (this_nh && this_rh->r_used_size == sizeof (struct nm_rec_hdr)) { - /* Go to next record */ - this_nh = this_nh->nmn_nextp; - if (this_nh == NULL) - return ((void *)0); - record = this_nh->nmn_record; - this_rh = ((devid_nm & NM_DEVID) ? - &((struct devid_min_rec *)record)->min_rec_hdr : - &((struct nm_rec *)record)->r_rec_hdr); - } - - /* - * n_offset will be used to reset offset - */ - n_offset = offset = ((devid_nm & NM_DEVID) ? - (sizeof (struct devid_min_rec) - sizeof (struct did_min_name)) : - (sizeof (struct nm_rec) - sizeof (struct nm_name))); - - n = ((devid_nm & NM_DEVID) ? - ((caddr_t)&((struct devid_min_rec *)record)->minor_name[0]) : - ((caddr_t)&((struct nm_rec *)record)->r_name[0])); - - /*CONSTCOND*/ - while (1) { - - if (devid_nm & NM_DEVID) { - n_side = ((struct did_min_name *)n)->min_side; - n_key = ((struct did_min_name *)n)->min_key; - n_size = DID_NAMSIZ((struct did_min_name *)n); - } else { - n_side = ((struct nm_name *)n)->n_side; - n_key = ((struct nm_name *)n)->n_key; - n_size = NAMSIZ((struct nm_name *)n); - } - - if ((side == n_side) || (side == MD_SIDEWILD)) { - - if ((key != MD_KEYWILD) && (key == n_key)) - return ((void *)n); - - if ((key == MD_KEYWILD) && !devid_nm && - (dev == build_device_number(setno, - (struct nm_name *)n))) - return ((void *)n); - - } - - n = (caddr_t)get_next_entry(this_nh, n, n_size, &offset); - - if (n == NULL) { - /* - * No next record, return NULL - */ - if (this_nh->nmn_nextp == NULL) - return (NULL); - - /* Go to next record */ - offset = n_offset; - this_nh = this_nh->nmn_nextp; - record = this_nh->nmn_record; - this_rh = ((devid_nm & NM_DEVID) ? - &((struct devid_min_rec *)record)->min_rec_hdr : - &((struct nm_rec *)record)->r_rec_hdr); - n = ((devid_nm & NM_DEVID) ? - ((caddr_t)&((struct devid_min_rec *) - record)->minor_name[0]) : - ((caddr_t)&((struct nm_rec *) - record)->r_name[0])); - } - } - /*NOTREACHED*/ -} - -static int -is_meta_drive(set_t setno, mdkey_t key) -{ - int i; - struct nm_next_hdr *nh; - struct nm_shared_name *shn; - - if ((nh = get_first_record(setno, 0, NM_SHARED)) == NULL) - return (FALSE); - if ((shn = (struct nm_shared_name *)lookup_shared_entry(nh, - key, NULL, NULL, NM_SHARED)) == NULL) { - return (FALSE); - } - - /* See if the name is a metadevice. */ - for (i = 0; i < META_NAME_COUNT; i++) { - if (strcmp(meta_names[i], shn->sn_name) == 0) - return (TRUE); - } - return (FALSE); -} - -static lookup_dev_result_t -lookup_deventry( - struct nm_next_hdr *nh, /* head record header */ - set_t setno, /* set to lookup in */ - side_t side, /* (key 1) side number */ - mdkey_t key, /* (key 2) from md_setdevname */ - char *drvnm, /* drvnm to be stored */ - minor_t mnum, /* minor number to be stored */ - char *dirnm, /* directory name to be stored */ - char *filenm, /* device filename to be stored */ - struct nm_name **ret_rec /* place return found rec. */ -) -{ - struct nm_next_hdr *this_nh = nh->nmn_nextp; - struct nm_rec *record; - struct nm_rec_hdr *this_rh; - struct nm_name *n; - size_t offset; - mdkey_t dirkey, drvkey; - - *ret_rec = NULL; - if (this_nh == NULL) - return (LOOKUP_DEV_NOMATCH); - - record = (struct nm_rec *)this_nh->nmn_record; - this_rh = &record->r_rec_hdr; - n = &record->r_name[0]; - - offset = sizeof (struct nm_rec) - sizeof (struct nm_name); - - if ((drvkey = getshared_key(setno, drvnm, 0L)) == MD_KEYBAD) - return (LOOKUP_DEV_NOMATCH); - - if (dirnm == NULL) { - /* No directory name to look up. */ - dirkey = MD_KEYBAD; - } else { - /* Look up the directory name */ - if ((dirkey = getshared_key(setno, dirnm, 0L)) == MD_KEYBAD) - return (LOOKUP_DEV_NOMATCH); - } - ASSERT(side != MD_SIDEWILD); - - /* code to see if EMPTY record */ - while (this_nh && this_rh->r_used_size == sizeof (struct nm_rec_hdr)) { - /* Go to next record */ - this_nh = this_nh->nmn_nextp; - if (this_nh == NULL) - return (LOOKUP_DEV_NOMATCH); - record = (struct nm_rec *)this_nh->nmn_record; - this_rh = &record->r_rec_hdr; - n = &record->r_name[0]; - } - - /*CONSTCOND*/ - while (1) { - if ((side == n->n_side) && - ((key == MD_KEYWILD) || (key == n->n_key)) && - (mnum == n->n_minor) && - (drvkey == n->n_drv_key) && - (dirkey == n->n_dir_key) && - (strcmp(filenm, n->n_name) == 0)) { - *ret_rec = n; - return (LOOKUP_DEV_FOUND); - } - - /* - * Now check for a name conflict. If the filenm of the - * current record matches filename passed in we have a - * potential conflict. If all the other parameters match - * except for the side number, then this is not a - * conflict. The reason is that there are cases where name - * record is added to each side of a set. - * - * There is one additional complication. It is only a - * conflict if the drvkeys both represent metadevices. It - * is legal for a metadevice and a physical device to have - * the same name. - */ - if (strcmp(filenm, n->n_name) == 0) { - int both_meta; - - /* - * It is hsp and we are trying to add it twice - */ - if (strcmp(getshared_name(setno, n->n_drv_key, 0L), - MD_HOTSPARES) == 0 && (side == n->n_side) && - find_hot_spare_pool(setno, - KEY_TO_HSP_ID(setno, n->n_key)) == NULL) { - /* - * All entries removed - */ - rw_exit(&nm_lock.lock); - (void) md_rem_hspname(setno, n->n_key); - rw_enter(&nm_lock.lock, RW_WRITER); - return (LOOKUP_DEV_NOMATCH); - } - - /* - * It is metadevice and we are trying to add it twice - */ - if (md_set[setno].s_un[MD_MIN2UNIT(n->n_minor)] - == NULL && (side == n->n_side) && - ddi_name_to_major(getshared_name(setno, - n->n_drv_key, 0L)) == md_major) { - /* - * Apparently it is invalid so - * clean it up - */ - md_remove_minor_node(n->n_minor); - rw_exit(&nm_lock.lock); - (void) md_rem_selfname(n->n_minor); - rw_enter(&nm_lock.lock, RW_WRITER); - return (LOOKUP_DEV_NOMATCH); - } - - /* First see if the two drives are metadevices. */ - if (is_meta_drive(setno, drvkey) && - is_meta_drive(setno, n->n_drv_key)) { - both_meta = TRUE; - } else { - both_meta = FALSE; - } - /* Check rest of the parameters. */ - if ((both_meta == TRUE) && - ((key != n->n_key) || - (mnum != n->n_minor) || - (drvkey != n->n_drv_key) || - (dirkey != n->n_dir_key))) { - return (LOOKUP_DEV_CONFLICT); - } - } - n = (struct nm_name *)get_next_entry(this_nh, (caddr_t)n, - NAMSIZ(n), &offset); - - if (n == (struct nm_name *)0) { - if (offset) - return (LOOKUP_DEV_NOMATCH); - - /* Go to next record */ - offset = sizeof (struct nm_rec) - - sizeof (struct nm_name); - this_nh = this_nh->nmn_nextp; - record = (struct nm_rec *)this_nh->nmn_record; - this_rh = &record->r_rec_hdr; - n = &record->r_name[0]; - } - } - /*NOTREACHED*/ -} - -void * -lookup_shared_entry( - struct nm_next_hdr *nh, /* First record header to start lookup */ - mdkey_t key, /* Shared key, used as key if nm is NULL */ - char *nm, /* Shared name, used as key if non-NULL */ - mddb_recid_t *id, /* mddb record id of record entry is found in */ - int devid_nm /* which name space? */ -) -{ - - struct nm_rec_hdr *rh = (struct nm_rec_hdr *)nh->nmn_record; - struct nm_next_hdr *this_nh = nh->nmn_nextp; - void *record; - struct nm_rec_hdr *this_rh; - caddr_t shn; - size_t offset, shn_offset; - size_t nm_len = 0, shn_size; - mdkey_t shn_key; - ushort_t shn_namlen; - - if (this_nh == NULL) - return ((void *) 0); - - record = this_nh->nmn_record; - - if (nm != (char *)0) - nm_len = ((devid_nm & NM_DEVID) ? - ddi_devid_sizeof((ddi_devid_t)nm) : - (strlen(nm) + 1)); - - if (id != NULL) - *id = rh->r_next_recid; - - this_rh = ((devid_nm & NM_DEVID) ? - &((struct devid_shr_rec *)record)->did_rec_hdr : - &((struct nm_shr_rec *)record)->sr_rec_hdr); - - /* code to see if EMPTY record */ - while (this_nh && this_rh->r_used_size == sizeof (struct nm_rec_hdr)) { - /* Go to next record */ - this_nh = this_nh->nmn_nextp; - if (this_nh == NULL) - return ((void *)0); - record = this_nh->nmn_record; - if (id != NULL) - *id = this_rh->r_next_recid; - - this_rh = ((devid_nm & NM_DEVID) ? - &((struct devid_shr_rec *)record)->did_rec_hdr : - &((struct nm_shr_rec *)record)->sr_rec_hdr); - } - - /* - * shn_offset will be used to reset offset - */ - shn_offset = offset = ((devid_nm & NM_DEVID) ? - (sizeof (struct devid_shr_rec) - sizeof (struct did_shr_name)) : - (sizeof (struct nm_shr_rec) - sizeof (struct nm_shared_name))); - - shn = ((devid_nm & NM_DEVID) ? - ((caddr_t)&((struct devid_shr_rec *)record)->device_id[0]) : - ((caddr_t)&((struct nm_shr_rec *)record)->sr_name[0])); - - /*CONSTCOND*/ - while (1) { - - if (devid_nm & NM_DEVID) { - shn_key = ((struct did_shr_name *)shn)->did_key; - shn_namlen = ((struct did_shr_name *)shn)->did_size; - shn_size = DID_SHR_NAMSIZ((struct did_shr_name *)shn); - } else { - shn_key = ((struct nm_shared_name *)shn)->sn_key; - shn_namlen = ((struct nm_shared_name *)shn)->sn_namlen; - shn_size = SHR_NAMSIZ((struct nm_shared_name *)shn); - } - - if ((key != 0) && (key == shn_key)) - return ((void *)shn); - - /* Lookup by name */ - if (nm != NULL) { - if (devid_nm & NM_IMP_SHARED) { - /* - * the nm passed in is "/dev/md" in the import case - * and we want to do a partial match on that. - */ - if (strncmp(nm, ((struct nm_shared_name *)shn)->sn_name, - strlen(nm)) == 0) - return ((void *)shn); - } else if (nm_len == shn_namlen) { - if (devid_nm & NM_DEVID) { - if (ddi_devid_compare((ddi_devid_t)nm, - (ddi_devid_t)(((struct did_shr_name *)shn)-> - did_devid)) == 0) - return ((void *)shn); - } else { - if (strcmp(nm, ((struct nm_shared_name *) - shn)->sn_name) == 0) - return ((void *)shn); - } - } - } - - shn = (caddr_t)get_next_entry(this_nh, - (caddr_t)shn, shn_size, &offset); - - if (shn == (caddr_t)0) { - /* - * No next record, return - */ - if (this_nh->nmn_nextp == NULL) - return (NULL); - - /* Go to next record */ - offset = shn_offset; - this_nh = this_nh->nmn_nextp; - record = this_nh->nmn_record; - if (id != NULL) - *id = this_rh->r_next_recid; - this_rh = ((devid_nm & NM_DEVID) ? - &((struct devid_shr_rec *)record)->did_rec_hdr : - &((struct nm_shr_rec *)record)->sr_rec_hdr); - shn = ((devid_nm & NM_DEVID) ? - ((caddr_t)&((struct devid_shr_rec *) - record)->device_id[0]) : - ((caddr_t)&((struct nm_shr_rec *) - record)->sr_name[0])); - } - } - /*NOTREACHED*/ -} - - -/* - * lookup_hspentry - Getting a hotspare pool entry from the namespace. - * Use either the NM key or the hotspare name to find - * a matching record in the namespace of the set. - */ -void * -lookup_hspentry( - struct nm_next_hdr *nh, /* head record header */ - set_t setno, /* set to lookup in */ - side_t side, /* (key 1) side number */ - mdkey_t key, /* (key 2) from md_setdevname */ - char *name /* (alt. key 2), if key == MD_KEYWILD */ -) -{ - struct nm_next_hdr *this_nh = nh->nmn_nextp; - struct nm_rec *record; - struct nm_rec_hdr *this_rh; - struct nm_name *n; - size_t offset, n_offset, n_size; - side_t n_side; - mdkey_t n_key; - char *drv_name; - char *tmpname; - char *setname = NULL; - - if ((key == MD_KEYWILD) && (name == '\0')) - return ((void *)0); - - if (this_nh == NULL) - return ((void *)0); - - record = (struct nm_rec *)this_nh->nmn_record; - - this_rh = &record->r_rec_hdr; - - if (setno != MD_LOCAL_SET) { - setname = mddb_getsetname(setno); - if (setname == NULL) - return ((void *)0); - } - - /* code to see if EMPTY record */ - while (this_nh && this_rh->r_used_size == sizeof (struct nm_rec_hdr)) { - /* Go to next record */ - this_nh = this_nh->nmn_nextp; - if (this_nh == NULL) - return ((void *)0); - record = this_nh->nmn_record; - this_rh = &record->r_rec_hdr; - } - - /* - * n_offset will be used to reset offset - */ - n_offset = offset = (sizeof (struct nm_rec) - sizeof (struct nm_name)); - - n = ((struct nm_name *)&record->r_name[0]); - - tmpname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); - - /*CONSTCOND*/ - while (1) { - n_side = n->n_side; - n_size = NAMSIZ(n); - if ((drv_name = (char *)getshared_name(setno, - n->n_drv_key, 0L)) != NULL) { - - /* We're only interested in hsp NM records */ - if ((strcmp(drv_name, "md_hotspares") == 0) && - ((side == n_side) || (side == MD_SIDEWILD))) { - n_key = n->n_key; - - if ((key != MD_KEYWILD) && (key == n_key)) - goto done; - - /* - * Searching by a hotspare pool name. - * Since the input name is of the form - * setname/hsp_name, we need to attach - * the string 'setname/' in front of the - * n->n_name. - */ - if (key == MD_KEYWILD) { - if (setname != NULL) - (void) snprintf(tmpname, - MAXPATHLEN, "%s/%s", - setname, - ((struct nm_name *) - n)->n_name); - else - (void) snprintf(tmpname, - MAXPATHLEN, "%s", - ((struct nm_name *) - n)->n_name); - - if ((strcmp(name, tmpname)) == 0) - goto done; - } - } - } - - n = (struct nm_name *)get_next_entry(this_nh, (caddr_t)n, - n_size, &offset); - - if (n == NULL) { - /* - * No next record, return - */ - if (offset) - goto done; - - /* Go to next record */ - offset = n_offset; - this_nh = this_nh->nmn_nextp; - record = (struct nm_rec *)this_nh->nmn_record; - this_rh = &record->r_rec_hdr; - n = ((struct nm_name *)&record->r_name[0]); - } - } - -done: - kmem_free(tmpname, MAXPATHLEN); - return ((void *)n); -} - -static int -md_make_devname(struct nm_name *n, set_t setno, char *string, size_t max_size) -{ - - char *dir_name; - size_t dir_len; - - /* - * Can't determine the path - */ - if ((dir_name = - (char *)getshared_name(setno, n->n_dir_key, 0L)) == NULL) - return ((int)NODEV64); - - dir_len = strlen(dir_name); - if ((dir_len + n->n_namlen) > max_size) - return (EFAULT); - - /* Tack the directory and device strings together */ - (void) strcpy(strcpy(string, dir_name) + dir_len, n->n_name); - return (0); -} - -static void -build_rec_hdr_list(struct nm_next_hdr *nh, mddb_recid_t recid, int shared) -{ - size_t overhead_size; - struct nm_rec_hdr *this_rh; - uint_t private; - struct nm_rec_hdr *rh = (struct nm_rec_hdr *)nh->nmn_record; - struct nm_next_hdr *this_nh; - set_t setno; - int multi_node = 0; - - /* If given record is for a multi_node set, set flag */ - setno = DBSET(recid); - if (MD_MNSET_SETNO(setno)) - multi_node = 1; - - if (shared & NM_DEVID) - overhead_size = ((shared & NM_SHARED) ? - (sizeof (struct devid_shr_rec) - - sizeof (struct did_shr_name)) - : - (sizeof (struct devid_min_rec) - - sizeof (struct did_min_name))); - else - overhead_size = ((shared & NM_SHARED) ? - (sizeof (struct nm_shr_rec) - - sizeof (struct nm_shared_name)) : - (sizeof (struct nm_rec) - sizeof (struct nm_name))); - - while (rh->r_next_recid > 0) { - this_nh = kmem_zalloc(sizeof (*this_nh), KM_SLEEP); - nh->nmn_nextp = this_nh; - this_nh->nmn_record = mddb_getrecaddr(rh->r_next_recid); - - ASSERT(this_nh->nmn_record != NULL); - - if (shared & NM_DEVID) - this_rh = ((shared & NM_SHARED) ? - &((struct devid_shr_rec *) - this_nh->nmn_record)->did_rec_hdr - : - &((struct devid_min_rec *) - this_nh->nmn_record)->min_rec_hdr); - else - this_rh = ((shared & NM_SHARED) ? - &((struct nm_shr_rec *) - this_nh->nmn_record)->sr_rec_hdr - : - &((struct nm_rec *)this_nh->nmn_record)->r_rec_hdr); - - /* - * Check for empty records and clean them up. - * For a MN diskset, only do this if master. - */ - if ((!multi_node) || - (multi_node && md_set[setno].s_am_i_master)) { - if (this_rh->r_used_size == overhead_size) { - mddb_setrecprivate(rh->r_next_recid, - MD_PRV_PENDDEL); - rh->r_next_recid = this_rh->r_next_recid; - kmem_free(this_nh, sizeof (*this_nh)); - nh->nmn_nextp = NULL; - mddb_setrecprivate(recid, MD_PRV_PENDCOM); - continue; - } - } - - private = mddb_getrecprivate(rh->r_next_recid); - mddb_setrecprivate(rh->r_next_recid, (private | MD_PRV_GOTIT)); - recid = rh->r_next_recid; - rh = this_rh; - nh = this_nh; - } -} - -static void -zero_data_ptrs(struct nm_next_hdr *nh, set_t setno) -{ - mdkey_t i; - struct nm_rec_hdr *rh = (struct nm_rec_hdr *)nh->nmn_record; - - if (rh->r_next_recid == 0) - return; - - for (i = MDDB_FIRST_MODID; i < rh->r_next_key; i++) - (void) setshared_data(setno, i, (caddr_t)-1); -} - -/* - * md_setdevname - Allows putting a device name into the database - */ -mdkey_t -md_setdevname( - set_t setno, /* specify which namespace to put in */ - side_t side, /* (key 1) side # */ - mdkey_t key, /* (key 2) KEYWILD - alloc key, else use key */ - char *drvnm, /* store this driver name with devicename */ - minor_t mnum, /* store this minor number as well */ - char *devname, /* device name to be stored */ - int imp_flag, /* used exclusively by import */ - ddi_devid_t imp_devid, /* used exclusively by import */ - char *imp_mname, /* used exclusively by import */ - set_t imp_setno, /* used exclusively by import */ - md_error_t *ep /* place to return error info */ -) -{ - struct nm_next_hdr *nh, *did_nh = NULL; - struct nm_name *n; - struct did_min_name *did_n; - struct did_min_name *new_did_n; - mddb_recid_t recids[3]; - char *cp, *dname = NULL, *fname; - char c; - mdkey_t retval = MD_KEYBAD; - int shared = -1, new = 0; - ddi_devid_t devid = NULL; - dev_t devt; - char *mname = NULL; - side_t thisside = MD_SIDEWILD; - lookup_dev_result_t lookup_res; - mdkey_t min_devid_key = MD_KEYWILD; - size_t min_len; - int use_devid = 0; - side_t temp_side; - - /* - * Don't allow addition of new names to namespace during upgrade. - */ - if (MD_UPGRADE) { - return (MD_KEYBAD); - } - - /* - * Make sure devname is not empty - */ - if (devname == (char *)NULL || strncmp(devname, "", 1) == 0) { - cmn_err(CE_WARN, "Unknown device with minor number of %d", - mnum); - return (MD_KEYBAD); - } - - /* - * Load the devid name space if it exists - */ - (void) md_load_namespace(setno, NULL, NM_DEVID); - if (! md_load_namespace(setno, NULL, 0L)) { - /* - * Unload the devid namespace - */ - (void) md_unload_namespace(setno, NM_DEVID); - return (MD_KEYBAD); - } - - /* - * Go looking for an existing devid namespace record for this - * key. We need to do this here as md_devid_found() also - * requires the nm_lock.lock. - */ - if ((!imp_flag) && (setno == MD_LOCAL_SET) && (side > 0) && - (key != MD_KEYWILD)) { - /* - * We must be adding a namespace record for a disk in a - * shared metaset of some description. As we already have a - * key, walk all the valid sides for the set and see if we - * have a devid record present. This will be used to help - * determine which namespace we add this new record into. - */ - for (temp_side = 1; temp_side < MD_MAXSIDES; temp_side++) { - if (md_devid_found(setno, temp_side, key) == 0) { - /* - * We have a devid record for this key. - * Assume it's safe to use devid's for the - * other side records as well. - */ - use_devid = 1; - break; - } - } - } - - rw_enter(&nm_lock.lock, RW_WRITER); - - /* - * Find out what namespace/set/side combination that is - * being dealt with. If this is not done then we stand a - * chance of adding in incorrect devid details to match - * the remote side's disk information. For example: - * disk c2t0d0s0 may have devt of 32,256 on this side - * but 32,567 on the remote side and if this is the case - * we do not want to add the devid for disk 32,567 on - * this side into the namespace. - */ - if (setno == MD_LOCAL_SET && side == 0) - /* local set/local side */ - thisside = side; - else if (setno == MD_LOCAL_SET && side > 0) { - /* - * local set/non-local side information ie a set record - * - * if the key is not set then this is the first time - * through this code which means this is the first record - * which then means the record to be added is for this node - */ - if (key == MD_KEYWILD) { - thisside = side; - } else { - /* - * This is not the first time through this code, - * so we have already got a record in the namespace. - * Check if the earlier search for this record found - * a devid record or not, and set the namespace - * accordingly. - */ - if (use_devid == 1) { - /* A devid record exists */ - shared = NM_DEVID | NM_NOTSHARED; - } else { - /* No devid record exists for this key */ - shared = NM_NOTSHARED; - } - } - } else if (setno != MD_LOCAL_SET) { - /* set record */ - thisside = mddb_getsidenum(setno); - } - - /* - * Check to see if it has a device id associated with - * and if the MDDB_DEVID_STYLE flag is set. If the device - * is a metadevice the get_minor_name will fail. No account - * of the side information is taken here because it is dealt - * with later on. - */ - if (!imp_flag) { - /* - * Only do this if we have not already set the namespace type, - * otherwise we run the risk of adding a record for an invalid - * minor number from a remote node. - */ - if (shared == -1) { - devt = makedevice(ddi_name_to_major(drvnm), mnum); - if ((ddi_lyr_get_devid(devt, &devid) == DDI_SUCCESS) && - (ddi_lyr_get_minor_name(devt, S_IFBLK, &mname) == - DDI_SUCCESS) && - (((mddb_set_t *)md_set[setno].s_db)->s_lbp->lb_flags - & MDDB_DEVID_STYLE)) - /* - * Reference the device id namespace - */ - shared = NM_DEVID | NM_NOTSHARED; - else - shared = NM_NOTSHARED; - } - } else { - /* Importing diskset has devids so store in namespace */ - devid = kmem_alloc(ddi_devid_sizeof(imp_devid), KM_SLEEP); - bcopy(imp_devid, devid, ddi_devid_sizeof(imp_devid)); - mname = md_strdup(imp_mname); - shared = NM_DEVID | NM_NOTSHARED; - } - - /* - * Always lookup the primary name space - */ - if ((nh = get_first_record(setno, 1, NM_NOTSHARED)) == NULL) { - retval = MD_KEYBAD; - goto out; - } - - /* - * If it has a device id then get the header for the devid namespace - */ - if (shared & NM_DEVID) { - if ((did_nh = get_first_record(setno, 1, shared)) == NULL) { - retval = MD_KEYBAD; - goto out; - } - } - - /* find boundary between filename and directory */ - cp = strrchr(devname, '/'); - - if (cp == NULL) { - /* No directory part to the name. */ - fname = devname; - dname = NULL; - } else { - /* Isolate the directory name only; save character after '/' */ - c = *(cp + 1); - *(cp + 1) = '\0'; - dname = md_strdup(devname); - - /* Restore character after '/' */ - *(cp + 1) = c; - fname = cp+1; - } - - /* - * If it already there in the name space - */ - lookup_res = lookup_deventry(nh, setno, side, key, drvnm, mnum, dname, - fname, &n); - - /* If we are importing the set */ - if (imp_flag && (lookup_res == LOOKUP_DEV_FOUND)) { - ushort_t did_sz; - ddi_devid_t did; - - /* - * We need to check for the case where there is a disk - * already in the namespace with a different ID from - * the one we want to add, but the same name. This is - * possible in the case of an unavailable disk. - */ - rw_exit(&nm_lock.lock); - if (md_getdevid(setno, side, n->n_key, NULL, &did_sz) != 0) - did_sz = 0; - rw_enter(&nm_lock.lock, RW_WRITER); - if (did_sz > 0) { - did = kmem_zalloc(did_sz, KM_SLEEP); - rw_exit(&nm_lock.lock); - (void) md_getdevid(setno, side, n->n_key, did, &did_sz); - rw_enter(&nm_lock.lock, RW_WRITER); - if (ddi_devid_compare(did, devid) == 0) { - kmem_free(did, did_sz); - retval = 0; - goto out; - } - kmem_free(did, did_sz); - } - /* - * This is not the same disk so we haven't really found it. - * Thus, we need to say it's "NOMATCH" and create a new - * entry. - */ - lookup_res = LOOKUP_DEV_NOMATCH; - } - switch (lookup_res) { - case LOOKUP_DEV_FOUND: - /* If we are importing the set */ - if (md_get_setstatus(imp_setno) & MD_SET_IMPORT) { - retval = 0; - goto out; - } - - /* Increment reference count */ - retval = n->n_key; - n->n_count++; - (void) update_entry(nh, n->n_side, n->n_key, 0L); - - /* Also in the device id name space if there is one */ - if (did_nh) { - /* - * Use thisside for the sideno as this is the - * side this is running on. - */ - if ((did_n = (struct did_min_name *) - lookup_entry(did_nh, setno, side, n->n_key, - NODEV64, NM_DEVID)) != NULL) { - - did_n->min_count++; - (void) update_entry(did_nh, did_n->min_side, - did_n->min_key, NM_DEVID); - } else { - /* - * If a disk device does not support - * devid then we would fail to find the - * device and then try and add it, bit - * silly. - */ - goto add_devid; - } - } - goto out; - - case LOOKUP_DEV_CONFLICT: - (void) mderror(ep, MDE_NAME_IN_USE); - retval = MD_KEYBAD; - goto out; - - case LOOKUP_DEV_NOMATCH: - /* Create a new name entry */ - new = 1; - n = (struct nm_name *)alloc_entry(nh, md_set[setno].s_nmid, - strlen(fname)+1, NM_NOTSHARED, &recids[0]); - - if (n == NULL) - goto out; - - n->n_minor = mnum; - n->n_side = side; - n->n_key = ((key == MD_KEYWILD) ? create_key(nh) : key); - n->n_count = 1; - - /* fill-in filename */ - (void) strcpy(n->n_name, fname); - n->n_namlen = (ushort_t)(strlen(fname) + 1); - - /* - * If MDE_DB_NOSPACE occurs - */ - if (((n->n_drv_key = - setshared_name(setno, drvnm, MD_KEYWILD, 0L)) == - MD_KEYBAD)) { - /* - * Remove entry allocated by alloc_entry - * and return MD_KEYBAD - */ - (void) remove_entry(nh, n->n_side, n->n_key, 0L); - goto out; - } - if (dname == NULL) { - /* No directory name implies no key. */ - n->n_dir_key = MD_KEYBAD; - } else { - /* We have a directory name to save. */ - if ((n->n_dir_key = - setshared_name(setno, dname, MD_KEYWILD, 0L)) == - MD_KEYBAD) { - /* - * Remove entry allocated by alloc_entry - * and return MD_KEYBAD - */ - (void) remove_entry(nh, n->n_side, n->n_key, - 0L); - goto out; - } - } - - recids[1] = md_set[setno].s_nmid; - recids[2] = 0; - mddb_commitrecs_wrapper(recids); - retval = n->n_key; - - /* - * Now to find out if devid's were used for thisside and if - * so what is the devid_key for the entry so that the correct - * minor name entry (did_n) has the correct devid key. - * Also get the minor name of the device, use the minor name - * on this side because the assumption is that the slices are - * going to be consistant across the nodes. - */ - if (key != MD_KEYWILD && (shared & NM_DEVID)) { - if ((did_n = (struct did_min_name *) - lookup_entry(did_nh, setno, thisside, n->n_key, - NODEV64, NM_DEVID)) == NULL) { - shared &= ~NM_DEVID; - } else { - min_devid_key = did_n->min_devid_key; - min_len = (size_t)did_n->min_namlen; - /* - * Need to save the min_name as well because - * if the alloc_entry() needs to expand the - * record then it will free the existing - * record (which will free any references - * to information within it ie did_n->min_name) - */ - if (mname != NULL) { - kmem_free(mname, strlen(mname) + 1); - } - mname = kmem_alloc(min_len, KM_SLEEP); - (void) strcpy(mname, did_n->min_name); - } - } else { - - /* - * It is possible for the minor name to be null, for - * example a metadevice which means the minor name is - * not initialised. - */ - if (mname == NULL) - goto out; - - min_len = strlen(mname) + 1; - } - break; - } - - /* - * We have the key and if the NM_DEVID bit is on - * use the key to add the device id into the device id name space - */ - -add_devid: - - if (shared & NM_DEVID) { - new_did_n = (struct did_min_name *)alloc_entry(did_nh, - md_set[setno].s_did_nmid, min_len, - shared, &recids[0]); - - /* - * No space - */ - if (new_did_n == NULL) { - if (new) { - (void) remove_entry(nh, n->n_side, n->n_key, 0L); - retval = MD_KEYBAD; - } - goto out; - } - - new_did_n->min_side = side; - new_did_n->min_key = n->n_key; - new_did_n->min_count = n->n_count; - - /* - * If the key is set then we know that there should - * be a corresponding devid entry because when the record - * associated with the key was created it would have created - * a corresponding devid entry, all we need to do is find - * that record and increment the count. - */ - if (key != MD_KEYWILD) { - - /* - * Need to copy the information from the original - * side (thisside). - */ - new_did_n->min_devid_key = min_devid_key; - min_devid_key = setshared_name(setno, - (char *)NULL, min_devid_key, NM_DEVID); - if (new_did_n->min_devid_key != min_devid_key) { - cmn_err(CE_NOTE, - "addname: failed to add to record"); - } - (void) strcpy(new_did_n->min_name, mname); - new_did_n->min_namlen = (ushort_t)min_len; - } else { - - /* use the did_n allocated above! */ - (void) strcpy(new_did_n->min_name, mname); - new_did_n->min_namlen = (ushort_t)(strlen(mname) + 1); - new_did_n->min_devid_key = setshared_name(setno, - (char *)devid, MD_KEYWILD, NM_DEVID); - } - /* - * If MDE_DB_NOSPACE occurs - */ - if (new_did_n->min_devid_key == MD_KEYBAD) { - /* - * Remove entry allocated by alloc_entry - */ - (void) remove_entry(did_nh, new_did_n->min_side, - new_did_n->min_key, NM_DEVID); - if (new) { - (void) remove_entry(nh, n->n_side, n->n_key, - 0L); - retval = MD_KEYBAD; - } - } else { - recids[1] = md_set[setno].s_did_nmid; - recids[2] = 0; - mddb_commitrecs_wrapper(recids); - } - } -out: - if (devid) { - ddi_devid_free(devid); - } - if (dname) - freestr(dname); - if (mname) - kmem_free(mname, strlen(mname) + 1); - rw_exit(&nm_lock.lock); - return (retval); -} - -/* - * md_get_invdid - return the invalid device id's - */ -int -md_get_invdid( - set_t setno, - side_t side, - int count, - int size, - void *ctdptr -) -{ - struct nm_next_hdr *did_shr_nh, *did_nh = NULL, *nh = NULL; - struct did_shr_name *did_shr_n; - struct did_min_name *did_n; - struct nm_name *n; - int key = MD_KEYWILD; - int cnt = 0; - char *cptr = (char *)ctdptr; - int i, dont_add_it; - char *tmpctd; - char *diskname; - char *tmpname; - - /* first get the invalid devid's from the loc block */ - if ((cnt = mddb_getinvlb_devid(setno, count, size, &cptr)) == -1) { - return (-1); - } - - /* - * Load the devid name space if it exists - */ - (void) md_load_namespace(setno, NULL, NM_DEVID); - if (! md_load_namespace(setno, NULL, 0L)) { - /* - * Unload the devid namespace - */ - (void) md_unload_namespace(setno, NM_DEVID); - return (ENOENT); - } - - rw_enter(&nm_lock.lock, RW_READER); - - did_nh = get_first_record(setno, 0, NM_DEVID | NM_NOTSHARED); - if (did_nh == NULL) { - rw_exit(&nm_lock.lock); - return (0); - } - - did_shr_nh = get_first_record(setno, 1, NM_DEVID | NM_SHARED); - if (did_shr_nh == NULL) { - rw_exit(&nm_lock.lock); - return (0); - } - - nh = get_first_record(setno, 0, NM_NOTSHARED); - if (nh == NULL) { - rw_exit(&nm_lock.lock); - return (0); - } - while ((key = md_getnextkey(setno, side, key, NULL)) != MD_KEYWILD) { - dev_t devt; - ddi_devid_t rtn_devid = NULL; - int get_rc; - int compare_rc = 1; - - did_n = (struct did_min_name *)lookup_entry( - did_nh, setno, side, key, NODEV64, NM_DEVID); - if (did_n == NULL) { - continue; - } - did_shr_n = (struct did_shr_name *)lookup_shared_entry( - did_shr_nh, did_n->min_devid_key, (char *)0, - NULL, NM_DEVID); - if ((did_shr_n->did_data & NM_DEVID_VALID) != NULL) { - continue; - } - /* found invalid device id. Add to list */ - devt = md_dev64_to_dev( - md_getdevnum(setno, side, key, MD_TRUST_DEVT)); - get_rc = ddi_lyr_get_devid(devt, &rtn_devid); - if (get_rc == DDI_SUCCESS) { - compare_rc = ddi_devid_compare(rtn_devid, - (ddi_devid_t)did_shr_n-> did_devid); - ddi_devid_free(rtn_devid); - } - - if ((get_rc == DDI_SUCCESS) && (compare_rc == 0)) { - did_shr_n->did_data |= NM_DEVID_VALID; - } else { - if (cnt++ > count) { - rw_exit(&nm_lock.lock); - return (-1); - } - n = (struct nm_name *)lookup_entry( - nh, setno, side, key, NODEV64, 0L); - if (n == NULL) { - rw_exit(&nm_lock.lock); - return ((int)NODEV64); - } - tmpctd = ctdptr; - diskname = md_strdup(n->n_name); - if (strlen(diskname) > size) { - kmem_free(diskname, strlen(diskname) + 1); - rw_exit(&nm_lock.lock); - return (-1); - } - if ((tmpname = strrchr(diskname, 's')) != NULL) - *tmpname = '\0'; - dont_add_it = 0; - for (i = 0; i < (cnt - 1); i++) { - if (strcmp(diskname, tmpctd) == 0) { - dont_add_it = 1; - break; - } - tmpctd += size; - } - if (dont_add_it == 0) { - (void) strcpy(cptr, diskname); - cptr += size; - } - kmem_free(diskname, strlen(n->n_name) + 1); - } - } - *cptr = '\0'; - rw_exit(&nm_lock.lock); - return (0); -} -/* - * md_validate_devid - Checks the device id's to see if they're valid. - * Returns a count of the number of invalid device id's - */ -int -md_validate_devid( - set_t setno, - side_t side, - int *rmaxsz -) -{ - struct nm_next_hdr *did_shr_nh, *did_nh = NULL; - struct did_shr_name *did_shr_n; - struct did_min_name *did_n; - struct nm_name *n; - struct nm_next_hdr *nh = NULL; - int cnt = 0; - int key = MD_KEYWILD; - int maxsz = 0; - int len; - - /* - * do the locator blocks first... - */ - - if ((cnt = mddb_validate_lb(setno, &maxsz)) == -1) { - return (-1); - } - - /* - * Load the devid name space if it exists - */ - (void) md_load_namespace(setno, NULL, NM_DEVID); - if (! md_load_namespace(setno, NULL, 0L)) { - /* - * Unload the devid namespace - */ - (void) md_unload_namespace(setno, NM_DEVID); - return (-1); - } - - rw_enter(&nm_lock.lock, RW_READER); - - did_nh = get_first_record(setno, 0, NM_DEVID | NM_NOTSHARED); - if (did_nh == NULL) { - rw_exit(&nm_lock.lock); - *rmaxsz = maxsz; - return (cnt); - } - - did_shr_nh = get_first_record(setno, 0, NM_DEVID | NM_SHARED); - if (did_shr_nh == NULL) { - rw_exit(&nm_lock.lock); - *rmaxsz = maxsz; - return (cnt); - } - - nh = get_first_record(setno, 0, NM_NOTSHARED); - if (nh == NULL) { - rw_exit(&nm_lock.lock); - *rmaxsz = maxsz; - return (cnt); - } - while ((key = md_getnextkey(setno, side, key, NULL)) != MD_KEYWILD) { - dev_t devt; - ddi_devid_t rtn_devid = NULL; - int get_rc; - int compare_rc = 1; - - did_n = (struct did_min_name *)lookup_entry( - did_nh, setno, side, key, NODEV64, NM_DEVID); - if (did_n == NULL) { - continue; - } - did_shr_n = (struct did_shr_name *)lookup_shared_entry( - did_shr_nh, did_n->min_devid_key, (char *)0, - NULL, NM_DEVID); - if ((did_shr_n->did_data & NM_DEVID_VALID) != 0) { - continue; - } - - devt = md_dev64_to_dev( - md_getdevnum(setno, side, key, MD_TRUST_DEVT)); - get_rc = ddi_lyr_get_devid(devt, &rtn_devid); - if (get_rc == DDI_SUCCESS) { - compare_rc = ddi_devid_compare(rtn_devid, - (ddi_devid_t)did_shr_n->did_devid); - ddi_devid_free(rtn_devid); - } - - if ((get_rc == DDI_SUCCESS) && (compare_rc == 0)) { - did_shr_n->did_data |= NM_DEVID_VALID; - } else { - /* device id is invalid */ - cnt++; - n = (struct nm_name *)lookup_entry( - nh, setno, side, key, NODEV64, 0L); - if (n == NULL) { - rw_exit(&nm_lock.lock); - return ((int)NODEV64); - } - /* update max size if necessary */ - len = (int)strlen(n->n_name); - if (maxsz < len) - maxsz = len; - } - } - rw_exit(&nm_lock.lock); - *rmaxsz = maxsz; - return (cnt); -} - -/* - * md_getdevname - * - * Wrapper for md_getdevname_common() - */ -int -md_getdevname( - set_t setno, /* which set to get name from */ - side_t side, /* (key 1) side number */ - mdkey_t key, /* (key 2) key provided by md_setdevname() */ - md_dev64_t dev, /* (alt. key 2) use this if key == KEYWILD */ - char *devname, /* char array to put device name in */ - size_t max_size /* size of char array */ -) -{ - return (md_getdevname_common(setno, side, key, dev, devname, - max_size, MD_WAIT_LOCK)); -} - -/* - * md_getdevname_common - * Allows getting a device name from the database. - * A pointer to a character array is passed in for - * the device name to be built in. Also the max_size - * is the maximum number of characters which can be put - * in the devname[]. - */ -int -md_getdevname_common( - set_t setno, /* which set to get name from */ - side_t side, /* (key 1) side number */ - mdkey_t key, /* (key 2) key provided by md_setdevname() */ - md_dev64_t dev, /* (alt. key 2) use this if key == KEYWILD */ - char *devname, /* char array to put device name in */ - size_t max_size, /* size of char array */ - int try_lock /* whether to spin on the namespace lock */ -) -{ - struct nm_next_hdr *nh; - struct nm_name *n; - int err; - - /* - * Load the devid name space if it exists - */ - (void) md_load_namespace(setno, NULL, NM_DEVID); - if (! md_load_namespace(setno, NULL, 0L)) { - /* - * Unload the devid namespace - */ - (void) md_unload_namespace(setno, NM_DEVID); - return (ENOENT); - } - - if (try_lock) { - if (rw_tryenter(&nm_lock.lock, RW_READER) == 0) { - /* Cannot obtain the lock without blocking */ - return (EAGAIN); - } - } else { - rw_enter(&nm_lock.lock, RW_READER); - } - - if ((nh = get_first_record(setno, 0, NM_NOTSHARED)) == NULL) { - rw_exit(&nm_lock.lock); - return (ENOENT); - } - - if ((n = (struct nm_name *)lookup_entry(nh, setno, side, key, - dev, 0L)) - == NULL) { - rw_exit(&nm_lock.lock); - return (ENOENT); - } - - err = md_make_devname(n, setno, devname, max_size); - - rw_exit(&nm_lock.lock); - return (err); -} - -/* - * md_gethspinfo - Getting a hsp name or id from the database. - * A pointer to a character array is passed in for - * the hsp name to be built in. If a match is found, - * the corresponding hspid is stored in ret_hspid. - */ -int -md_gethspinfo( - set_t setno, /* which set to get name from */ - side_t side, /* (key 1) side number */ - mdkey_t key, /* (key 2) key provided by md_setdevname() */ - char *drvnm, /* return driver name here */ - hsp_t *ret_hspid, /* returned key if key is MD_KEYWILD */ - char *hspname /* alternate key or returned device name */ -) -{ - struct nm_next_hdr *nh; - struct nm_name *n; - char *drv_name; - int err = 0; - char *setname = NULL; - - /* - * Load the devid name space if it exists - */ - (void) md_load_namespace(setno, NULL, NM_DEVID); - if (! md_load_namespace(setno, NULL, 0L)) { - /* - * Unload the devid namespace - */ - (void) md_unload_namespace(setno, NM_DEVID); - return (ENOENT); - } - - rw_enter(&nm_lock.lock, RW_READER); - - if ((nh = get_first_record(setno, 0, NM_NOTSHARED)) == NULL) { - rw_exit(&nm_lock.lock); - return (ENOENT); - } - - if ((n = (struct nm_name *)lookup_hspentry(nh, setno, side, - key, hspname)) == NULL) { - rw_exit(&nm_lock.lock); - return (ENOENT); - } - - /* Copy the driver name, device name and key for return */ - drv_name = (char *)getshared_name(setno, n->n_drv_key, 0L); - if (!drv_name || (strlen(drv_name) > MD_MAXDRVNM)) { - rw_exit(&nm_lock.lock); - return (EFAULT); - } - - /* - * Pre-friendly hsp names are of the form hspxxx and we - * should not have an entry in the namespace for them. - * So make sure the NM entry we get is a hotspare pool. - */ - if ((strcmp(drv_name, "md_hotspares")) != 0) { - rw_exit(&nm_lock.lock); - return (ENOENT); - } - (void) strcpy(drvnm, drv_name); - - /* - * If the input key is not MD_KEYWILD, return the - * hspname we found. - */ - if (key != MD_KEYWILD) { - setname = mddb_getsetname(setno); - if (setname != NULL) - (void) snprintf(hspname, MAXPATHLEN, - "%s/%s", setname, n->n_name); - else - (void) snprintf(hspname, MAXPATHLEN, - "%s", n->n_name); - } - - *ret_hspid = KEY_TO_HSP_ID(setno, n->n_key); - - rw_exit(&nm_lock.lock); - return (err); -} - -/* - * md_devid_found - Check to see if this key has devid entry or not - * Return 1 if there is one or 0 if none - */ -int -md_devid_found( - set_t setno, /* which set to get name from */ - side_t side, /* (key 1) side number */ - mdkey_t key /* key used to find entry in namespace */ -) -{ - struct nm_next_hdr *nh; - - /* - * Load the devid name space if it exists - */ - (void) md_load_namespace(setno, NULL, NM_DEVID); - if (! md_load_namespace(setno, NULL, 0L)) { - /* - * Unload the devid namespace - */ - (void) md_unload_namespace(setno, NM_DEVID); - return (0); - } - - rw_enter(&nm_lock.lock, RW_READER); - - if ((nh = get_first_record(setno, 0, NM_DEVID| NM_NOTSHARED)) == NULL) { - rw_exit(&nm_lock.lock); - return (0); - } - - /* - * Look up the key - */ - if (lookup_entry(nh, setno, side, key, NODEV64, NM_DEVID) == NULL) { - /* This key not in database */ - rw_exit(&nm_lock.lock); - return (0); - } - - rw_exit(&nm_lock.lock); - /* found a key */ - return (1); -} - - -/* - * md_getkeyfromdev - Allows getting a key from the database by using the dev. - * Returns the first key found and the number of keys - * found that match dev. - */ -int -md_getkeyfromdev( - set_t setno, /* which set to get name from */ - side_t side, /* (key 1) side number */ - md_dev64_t dev, /* dev to match against */ - mdkey_t *firstkey, /* ptr for first key found */ - int *numkeysmatch /* ptr to number of keys matching dev */ -) -{ - struct nm_next_hdr *nh; - struct nm_name *n; - int keynum; - - /* - * Load the devid name space if it exists - */ - (void) md_load_namespace(setno, NULL, NM_DEVID); - if (! md_load_namespace(setno, NULL, 0L)) { - /* - * Unload the devid namespace - */ - (void) md_unload_namespace(setno, NM_DEVID); - return (ENOENT); - } - - rw_enter(&nm_lock.lock, RW_READER); - - if ((nh = get_first_record(setno, 0, NM_NOTSHARED)) == NULL) { - rw_exit(&nm_lock.lock); - return (ENOENT); - } - - /* - * Walk through all keys in the namespace looking for a match - * against the given dev. Count the number of matches and - * set firstkey to be first matched key. - */ - *numkeysmatch = 0; - for (keynum = 1; keynum < - ((struct nm_rec_hdr *)nh->nmn_record)->r_next_key; keynum++) { - if ((n = (struct nm_name *)lookup_entry(nh, setno, side, - keynum, dev, 0L)) == NULL) { - /* This key not in database */ - continue; - } else { - /* found a key, look for the dev match */ - if (dev == build_device_number(setno, - (struct nm_name *)n)) { - /* found a dev match */ - (*numkeysmatch)++; - if (*numkeysmatch == 1) { - *firstkey = n->n_key; - } - } - } - } - - rw_exit(&nm_lock.lock); - return (0); -} - -/* - * md_getnment - Allows getting a driver name and minor # from the database. - */ -int -md_getnment( - set_t setno, /* which set to get name from */ - side_t side, /* (key 1) side number */ - mdkey_t key, /* (key 2) key provided by md_setdevname() */ - md_dev64_t dev, - char *drvnm, /* char array to put driver name in */ - uint_t max_size, /* size of char array */ - major_t *major, /* address for major number */ - minor_t *mnum, /* address for minor number */ - mdkey_t *retkey /* address for returning key */ -) -{ - struct nm_next_hdr *nh; - struct nm_name *n; - char *drv_name; - - /* - * Load the devid name space if it exists - */ - (void) md_load_namespace(setno, NULL, NM_DEVID); - if (! md_load_namespace(setno, NULL, 0L)) { - /* - * Unload the devid namespace - */ - (void) md_unload_namespace(setno, NM_DEVID); - return (ENOENT); - } - - rw_enter(&nm_lock.lock, RW_READER); - - if ((nh = get_first_record(setno, 0, NM_NOTSHARED)) == NULL) { - rw_exit(&nm_lock.lock); - return (ENOENT); - } - - if ((n = (struct nm_name *)lookup_entry(nh, setno, side, key, - dev, 0L)) - == NULL) { - rw_exit(&nm_lock.lock); - return (ENOENT); - } - - drv_name = (char *)getshared_name(setno, n->n_drv_key, 0L); - if (!drv_name || (strlen(drv_name) > max_size)) { - rw_exit(&nm_lock.lock); - return (EFAULT); - } - - /* Copy the driver name, and fill in the minor number */ - (void) strcpy(drvnm, drv_name); - if (MD_UPGRADE) - *major = md_targ_name_to_major(drvnm); - else - *major = ddi_name_to_major(drvnm); - *mnum = n->n_minor; - *retkey = n->n_key; - - rw_exit(&nm_lock.lock); - - return (0); -} - -/* - * md_getdevnum - Allows getting a device number from the database. - * This routine returns a translated (aka miniroot) md_dev64_t. - */ -md_dev64_t -md_getdevnum( - set_t setno, /* which set to get name from */ - side_t side, /* (key 1) side number */ - mdkey_t key, /* (key 2) key provided by md_setdevname() */ - int flag) /* If set then return devt from namespace */ -{ - struct nm_next_hdr *nh, *did_shr_nh, *did_nh = NULL; - struct nm_name *n; - struct did_min_name *did_n; - struct did_shr_name *did_shr_n; - md_dev64_t retval, retval_targ; - int did_found = 0; - ddi_devid_t devid = NULL; - int ndevs; - dev_t *devs; - char *drv, *drvnm, *mname = NULL; - mddb_recid_t recids[3]; - int devid_nm = 0; - - /* - * If a MN diskset and this node is the master OR - * if a traditional diskset, then check to see if the - * did namespace should be cleaned up. - * - * Always set MD_SET_DIDCLUP bit in set's status field - * so that this check is only done once. - */ - if (!(md_get_setstatus(setno) & MD_SET_DIDCLUP)) { - if ((MD_MNSET_SETNO(setno) && (md_set[setno].s_am_i_master)) || - (!(MD_MNSET_SETNO(setno)))) { - if (!(((mddb_set_t *) - md_set[setno].s_db)->s_lbp->lb_flags - & MDDB_DEVID_STYLE) || md_devid_destroy) { - (void) md_load_namespace(setno, NULL, NM_DEVID); - (void) md_devid_cleanup(setno, 1); - } - } - md_set_setstatus(setno, MD_SET_DIDCLUP); - } - - /* - * Test the MDDB_DEVID_STYLE bit - */ - if (((mddb_set_t *)md_set[setno].s_db)->s_lbp->lb_flags - & MDDB_DEVID_STYLE) { - (void) md_load_namespace(setno, NULL, NM_DEVID); - devid_nm = 1; - } - - /* - * Load the primary name space - */ - if (! md_load_namespace(setno, NULL, 0L)) { - /* - * Unload the devid namespace - */ - (void) md_unload_namespace(setno, NM_DEVID); - return (NODEV64); - } - - rw_enter(&nm_lock.lock, RW_READER); - - - /* - * If not even in the primary name space, bail out - */ - if (((nh = get_first_record(setno, 0, NM_NOTSHARED)) == NULL) || - ((n = (struct nm_name *)lookup_entry(nh, setno, side, key, - NODEV64, 0L)) == NULL)) { - rw_exit(&nm_lock.lock); - return (NODEV64); - } - - /* - * Entry corresponds to this key is referenced and snarfed so - * we set the value to 1. During the name space cleanup we will check - * this value and if it is set then we know it is part of the - * current configuration. For any 'key' whose value is not set - * then we know it is an 'orphan' entry and will be removed. - */ - if (md_nm_snarfed) - md_nm_snarfed[key] = 1; - - /* - * Reference the device id namespace - */ - if (devid_nm) { - if (((did_nh = get_first_record(setno, 1, NM_DEVID | - NM_NOTSHARED)) == NULL) || ((did_shr_nh = - get_first_record(setno, 1, NM_DEVID | NM_SHARED)) - == NULL)) { - devid_nm = 0; - } - } - - /* - * If the key is in the device id name space then - * this device has disk tracking info stored - */ - if (devid_nm && ((did_n = (struct did_min_name *)lookup_entry(did_nh, - setno, side, key, NODEV64, NM_DEVID)) != NULL)) { - /* - * Get the minor name and the device id - */ - devid = (ddi_devid_t)getshared_name(setno, - did_n->min_devid_key, NM_DEVID); - - did_shr_n = (struct did_shr_name *)lookup_shared_entry( - did_shr_nh, did_n->min_devid_key, - (char *)0, NULL, NM_DEVID); - - if ((devid == NULL) || (did_shr_n == NULL)) { - rw_exit(&nm_lock.lock); - return (NODEV64); - } - - - if (ddi_lyr_devid_to_devlist(devid, did_n->min_name, &ndevs, - &devs) == DDI_SUCCESS) { - - md_dev64_t tdev; - int cnt; - - did_found = 1; - - /* - * Save the first available devt - * During upgrade, this is a miniroot devt. - */ - - retval = md_expldev(devs[0]); - - /* - * For a multipath device more than 1 md_dev64_t will - * occur. In this case retval will be set to - * the md_dev64_t that was previously set. - */ - - if (ndevs > 1) { - - /* get the stored md_dev64_t */ - tdev = build_device_number(setno, n); - for (cnt = 0; cnt < ndevs; cnt++) { - if (tdev == md_expldev(devs[cnt])) { - retval = tdev; - break; - } - } - } - - /* - * If during upgrade, switch drvnm to be target - * device's name, not miniroot's name. - */ - if (MD_UPGRADE) - drvnm = md_targ_major_to_name(md_getmajor - (md_xlate_mini_2_targ(retval))); - else - drvnm = ddi_major_to_name( - md_getmajor(retval)); - - /* - * It is a valid device id - */ - did_shr_n->did_data = NM_DEVID_VALID; - - /* - * Free the memory - */ - (void) ddi_lyr_free_devlist(devs, ndevs); - } else { - /* - * Invalid device id, say so - * and check flag to see if we can return - * devt stored in the namespace - */ - did_shr_n->did_data = NM_DEVID_INVALID; - rw_exit(&nm_lock.lock); - - /* - * If flag does not have MD_TRUST_DEVT bit on - * then with the invalid device id we simply cant - * trust the devt in the namespace at all - * - * Bit MD_TRUST_DEVT is set by metadevadm or - * when a diskset is taken and it does not have - * any associated devid records for the drive - * records in the set. - * - * When this bit is set that means devt can be - * trusted and we just go ahead do whatever user - * ask for - */ - if (!(flag & MD_TRUST_DEVT)) - return (NODEV64); - - /* build_device_number returns a target devt */ - retval_targ = build_device_number(setno, n); - /* translate devt to miniroot devt */ - if ((retval = md_xlate_targ_2_mini(retval_targ)) - == NODEV64) { - return (NODEV64); - } - return (retval); - } - } - - - /* - * If no entry is found in the device id name space - * It can be one of: - * underlying meta device - * No device id associated - * Has a device id but mddb is in the old fromat - */ - if (did_found) { - /* - * Update the name entry if necessary - */ - if ((retval_targ = md_xlate_mini_2_targ(retval)) == NODEV64) { - rw_exit(&nm_lock.lock); - return (NODEV64); - } - - if (n->n_minor != md_getminor(retval_targ)) - n->n_minor = md_getminor(retval_targ); - - if ((drv = - (char *)getshared_name(setno, n->n_drv_key, 0L)) == NULL) { - rw_exit(&nm_lock.lock); - return (NODEV64); - } - - if (strcmp(drv, drvnm) != 0) - n->n_drv_key = setshared_name(setno, drvnm, - MD_KEYWILD, 0L); - - if (!(md_get_setstatus(setno) & MD_SET_STALE)) - (void) update_entry(nh, side, key, 0L); - } else { - /* - * Has a device id associated with it? - * If yes, then we will try to add them into the device id nm - * build_device_number returns a target devt. - */ - if ((retval_targ = build_device_number(setno, n)) == NODEV64) { - rw_exit(&nm_lock.lock); - return (NODEV64); - } - - /* - * We don't translate the devt of the meta device - * and currently no device id associated with metadevice - */ - if (md_getmajor(retval_targ) != md_major_targ) { - - if ((retval = md_xlate_targ_2_mini(retval_targ)) - == NODEV64) { - rw_exit(&nm_lock.lock); - return (NODEV64); - } - - /* - * Add the device id info only if - * MDDB_DEVID_STYLE bit is set - * - */ - if (!devid_nm) { - rw_exit(&nm_lock.lock); - return (retval); - } - - /* - * We can continue if we are here - * If retval has a device id, add them - */ - if ((ddi_lyr_get_devid(md_dev64_to_dev(retval), &devid) - == DDI_SUCCESS) && - (ddi_lyr_get_minor_name(md_dev64_to_dev(retval), - S_IFBLK, &mname) - == DDI_SUCCESS)) { - /* - * Add them into the devid name space - */ - did_n = (struct did_min_name *)alloc_entry( - did_nh, md_set[setno].s_did_nmid, - strlen(mname)+1, NM_DEVID|NM_NOTSHARED, - &recids[0]); - - if (did_n) { - did_n->min_side = side; - did_n->min_key = key; - did_n->min_count = 1; - (void) strcpy(did_n->min_name, mname); - did_n->min_namlen = - (ushort_t)(strlen(mname)+1); - did_n->min_devid_key = - setshared_name(setno, - (char *)devid, MD_KEYWILD, - NM_DEVID); - /* - * Commit the change to the record - */ - if (did_n->min_devid_key == MD_KEYBAD) { - (void) remove_entry(did_nh, - did_n->min_side, - did_n->min_key, - NM_DEVID); - } else { - recids[1] = - md_set[setno].s_did_nmid; - recids[2] = 0; - mddb_commitrecs_wrapper(recids); - } - } - } - /* - * Free all the memory - */ - if (devid) - ddi_devid_free(devid); - if (mname) - kmem_free(mname, strlen(mname) + 1); - } else { - retval = md_makedevice(md_major, - md_getminor(retval_targ)); - } - } - - rw_exit(&nm_lock.lock); - return (retval); -} - -/* - * md_getnextkey - Allows running thru the list of defined device names. - */ -mdkey_t -md_getnextkey( - set_t setno, /* which set to get name from */ - side_t side, /* (key 1) side number */ - mdkey_t key, /* (key 2) wildcarded or from md_getnextkey() */ - uint_t *cnt) /* n_count returns here */ -{ - struct nm_next_hdr *nh; - struct nm_name *n = NULL; - mdkey_t retval = MD_KEYWILD; - - - /* - * Load the devid name space if it exists - */ - (void) md_load_namespace(setno, NULL, NM_DEVID); - if (! md_load_namespace(setno, NULL, 0L)) { - /* - * Unload the devid namespace - */ - (void) md_unload_namespace(setno, NM_DEVID); - return (MD_KEYWILD); - } - - rw_enter(&nm_lock.lock, RW_READER); - - if ((nh = get_first_record(setno, 0, NM_NOTSHARED)) == NULL) { - rw_exit(&nm_lock.lock); - return (MD_KEYWILD); - } - - for (key++; key < ((struct nm_rec_hdr *)nh->nmn_record)->r_next_key; - key++) { - if ((n = (struct nm_name *)lookup_entry(nh, setno, side, key, - NODEV64, 0L)) != NULL) - break; - } - - if (n != NULL) { - if (cnt != NULL) - *cnt = n->n_count; - - retval = n->n_key; - } - - rw_exit(&nm_lock.lock); - return (retval); -} - -/* - * md_update_namespace_did - update the devid portion of the namespace - */ -int -md_update_namespace_did( - set_t setno, - side_t side, - mdkey_t key, - md_error_t *ep -) -{ - dev_t devt; - ddi_devid_t rtn_devid = NULL; - ddi_devid_t devid = NULL; - struct nm_next_hdr *did_shr_nh; - mdkey_t ent_did_key; - uint32_t ent_did_count; - uint32_t ent_did_data; - struct did_shr_name *shn; - mddb_recid_t recids[3]; - struct nm_next_hdr *did_nh; - struct did_min_name *n; - struct did_shr_name *shr_n; - - (void) md_load_namespace(setno, NULL, NM_DEVID); - if (!md_load_namespace(setno, NULL, 0L)) { - (void) md_unload_namespace(setno, NM_DEVID); - return ((int)NODEV64); - } - rw_enter(&nm_lock.lock, RW_WRITER); - - if ((did_nh = get_first_record(setno, 0, NM_DEVID | NM_NOTSHARED)) == - NULL) { - rw_exit(&nm_lock.lock); - return (ENOENT); - } - - if ((n = (struct did_min_name *)lookup_entry(did_nh, setno, side, key, - NODEV64, NM_DEVID)) == NULL) { - rw_exit(&nm_lock.lock); - return (ENOENT); - } - - rw_exit(&nm_lock.lock); - devt = md_dev64_to_dev(md_getdevnum(setno, side, key, MD_TRUST_DEVT)); - - rw_enter(&nm_lock.lock, RW_WRITER); - if (ddi_lyr_get_devid(devt, &rtn_devid) == DDI_SUCCESS) { - did_shr_nh = get_first_record(setno, 0, NM_DEVID | NM_SHARED); - if (did_shr_nh == NULL) { - ddi_devid_free(rtn_devid); - rw_exit(&nm_lock.lock); - return ((int)NODEV64); - } - - shr_n = (struct did_shr_name *)lookup_shared_entry( - did_shr_nh, n->min_devid_key, (char *)0, - &recids[0], NM_DEVID); - if (shr_n == NULL) { - ddi_devid_free(rtn_devid); - rw_exit(&nm_lock.lock); - return (ENOENT); - } - - devid = (ddi_devid_t)shr_n->did_devid; - if (ddi_devid_compare(rtn_devid, devid) != 0) { - /* remove old devid info */ - ent_did_key = shr_n->did_key; - ent_did_count = shr_n->did_count; - ent_did_data = shr_n->did_data; - (void) remove_shared_entry(did_shr_nh, - shr_n->did_key, NULL, NM_DEVID | - NM_KEY_RECYCLE); - - /* add in new devid info */ - if ((shn = (struct did_shr_name *)alloc_entry( - did_shr_nh, md_set[setno].s_did_nmid, - ddi_devid_sizeof(rtn_devid), - NM_DEVID | NM_SHARED | NM_NOCOMMIT, - &recids[0])) == NULL) { - ddi_devid_free(rtn_devid); - rw_exit(&nm_lock.lock); - return (ENOMEM); - } - shn->did_key = ent_did_key; - shn->did_count = ent_did_count; - ent_did_data |= NM_DEVID_VALID; - shn->did_data = ent_did_data; - shn->did_size = ddi_devid_sizeof(rtn_devid); - bcopy((void *)rtn_devid, (void *)shn->did_devid, - shn->did_size); - recids[1] = md_set[setno].s_nmid; - recids[2] = 0; - - mddb_commitrecs_wrapper(recids); - } - ddi_devid_free(rtn_devid); - } else { - rw_exit(&nm_lock.lock); - (void) mderror(ep, MDE_NODEVID); - return (ENOENT); - } - rw_exit(&nm_lock.lock); - return (0); -} - -/* - * md_update_namespace - update namespace device name and pathname - * - */ - -int -md_update_namespace( - set_t setno, /* which set to get name from */ - side_t side, /* (key 1) side number */ - mdkey_t key, /* (key 2) key provided by md_setdevname() */ - char *devname, /* device name */ - char *pathname, /* pathname to device */ - major_t major, /* major number */ - minor_t mnum /* minor numer */ -) -{ - struct nm_next_hdr *nh; - struct nm_name *n; - struct nm_next_hdr *snh; - struct nm_shared_name *shn; - mddb_recid_t recids[3]; - mdkey_t ent_key, ent_drv_key, ent_dir_key; - uint32_t ent_count; - side_t ent_side; - char *old_pathname, *old_drvnm; - char *drvnm; - - if (!md_load_namespace(setno, NULL, 0L)) { - return (ENOENT); - } - - rw_enter(&nm_lock.lock, RW_WRITER); - - if ((nh = get_first_record(setno, 0, NM_NOTSHARED)) == NULL || - (snh = get_first_record(setno, 0, NM_SHARED)) == NULL) { - rw_exit(&nm_lock.lock); - return (ENOENT); - } - - if ((n = (struct nm_name *)lookup_entry(nh, setno, side, key, NODEV64, - 0L)) == NULL) { - rw_exit(&nm_lock.lock); - return (ENOENT); - } - - /* Save the values from the old record */ - ent_side = n->n_side; - ent_key = n->n_key; - ent_count = n->n_count; - - /* - * These can be overwritten - */ - ent_drv_key = n->n_drv_key; - ent_dir_key = n->n_dir_key; - - /* - * Now can safely remove the entry - * If the entry is there it will be removed, - * otherwise nothing will happen to mddb - */ - (void) remove_entry(nh, n->n_side, n->n_key, 0L | NM_KEY_RECYCLE); - - rw_exit(&nm_lock.lock); - /* The old path and drvnm has to be there */ - old_pathname = md_getshared_name(setno, ent_dir_key); - old_drvnm = md_getshared_name(setno, ent_drv_key); - if (!old_pathname || !old_drvnm) { - return (ENOENT); - } - - /* Check to see if we have a new pathname */ - if (strcmp(old_pathname, pathname)) { - /* now see if the new pathname actually exists in our nsp */ - shn = (struct nm_shared_name *)lookup_shared_entry( - snh, NULL, pathname, &recids[0], 0L); - if (shn) { - /* pathname exists so get it's key */ - ent_dir_key = shn->sn_key; - } else { - /* pathname doesn't exist so create it */ - ent_dir_key = - md_setshared_name(setno, pathname, NM_NOCOMMIT); - } - } - - /* - * Check and update n_drv_key as well since we can't - * blindly use the old one for the new drvnm - */ - drvnm = ddi_major_to_name(major); - if (strcmp(old_drvnm, drvnm)) { - shn = (struct nm_shared_name *)lookup_shared_entry( - snh, NULL, drvnm, &recids[0], 0L); - if (shn) { - ent_drv_key = shn->sn_key; - } else { - ent_drv_key = - md_setshared_name(setno, drvnm, NM_NOCOMMIT); - } - } - - rw_enter(&nm_lock.lock, RW_WRITER); - /* Create a name entry */ - n = (struct nm_name *)alloc_entry(nh, md_set[setno].s_nmid, - strlen(devname)+1, NM_NOTSHARED | NM_NOCOMMIT, &recids[0]); - - if (n == NULL) { - rw_exit(&nm_lock.lock); - return (ENOMEM); - } - - n->n_minor = mnum; - n->n_side = ent_side; - n->n_key = ent_key; - n->n_count = ent_count; - n->n_drv_key = ent_drv_key; - n->n_dir_key = ent_dir_key; - - /* fill-in filename */ - (void) strcpy(n->n_name, devname); - n->n_namlen = (ushort_t)(strlen(devname) + 1); - - recids[1] = md_set[setno].s_nmid; - recids[2] = 0; - - mddb_commitrecs_wrapper(recids); - - rw_exit(&nm_lock.lock); - return (0); -} - -/* - * md_getdevidminor - Get the minor name from the database. The minor - * name and the devid id uniquely identify the disk - * slice. - */ -int -md_getdevidminor( - set_t setno, - side_t side, - mdkey_t key, - char *minorname, - size_t max_size -) -{ - struct nm_next_hdr *nh; - struct did_min_name *n; - - /* - * Load the devid name space if it exists - */ - (void) md_load_namespace(setno, NULL, NM_DEVID); - if (! md_load_namespace(setno, NULL, 0L)) { - /* - * Unload the devid namespace - */ - (void) md_unload_namespace(setno, NM_DEVID); - return (ENOENT); - } - - rw_enter(&nm_lock.lock, RW_READER); - - /* - * The key we have is for the non-shared, regular namespace. We - * have to lookup the min_key in the non-shared, devid namespace. - */ - if ((nh = get_first_record(setno, 0, NM_DEVID | NM_NOTSHARED)) - == NULL) { - rw_exit(&nm_lock.lock); - return (ENOENT); - } - - if ((n = (struct did_min_name *)lookup_entry(nh, setno, side, key, - NODEV64, NM_DEVID)) == NULL) { - rw_exit(&nm_lock.lock); - return (ENOENT); - } - - if (n->min_namlen > max_size) { - rw_exit(&nm_lock.lock); - return (EFAULT); - } - - bcopy(&((struct did_min_name *)n)->min_name[0], minorname, - n->min_namlen); - - rw_exit(&nm_lock.lock); - return (0); -} - -/* - * md_getdevid - Allows getting a device id from the database. - * A pointer to a character array is passed in for - * the device id to be copied to. The size is returned - * in *did_size. - */ -int -md_getdevid( - set_t setno, /* which set to get name from */ - side_t side, - mdkey_t key, /* (key 2) key provided by md_setdevname() */ - ddi_devid_t did, /* pointer to did string */ - ushort_t *did_size /* pointer to size of did string */ -) -{ - struct nm_next_hdr *nh; - void *n; - mddb_recid_t recid; - - /* - * Load the devid name space if it exists - */ - (void) md_load_namespace(setno, NULL, NM_DEVID); - if (! md_load_namespace(setno, NULL, 0L)) { - /* - * Unload the devid namespace - */ - (void) md_unload_namespace(setno, NM_DEVID); - return (ENOENT); - } - - rw_enter(&nm_lock.lock, RW_READER); - - /* - * The key we have is for the non-shared, regular namespace. We - * have to lookup the min_key in the non-shared, devid namespace. - */ - if ((nh = get_first_record(setno, 0, NM_DEVID | NM_NOTSHARED)) - == NULL) { - rw_exit(&nm_lock.lock); - return (ENOENT); - } - - if ((n = (struct did_min_name *)lookup_entry(nh, setno, side, key, - NODEV64, NM_DEVID)) == NULL) { - rw_exit(&nm_lock.lock); - return (ENOENT); - } - - /* - * Now go get the devid. - */ - if ((nh = get_first_record(setno, 0, NM_DEVID | NM_SHARED)) == NULL) { - rw_exit(&nm_lock.lock); - return (ENOENT); - } - - if ((n = (struct did_shr_name *)lookup_shared_entry(nh, - ((struct did_min_name *)n)->min_devid_key, (char *)0, &recid, - NM_DEVID)) == NULL) { - rw_exit(&nm_lock.lock); - return (ENOENT); - } - - /* - * If did is non-zero then copy devid to buffer, else return - * devid size to user. These are exclusive operations. - */ - if (did != NULL) { - bcopy(&((struct did_shr_name *)n)->did_devid[0], did, - *did_size); - } else { - *did_size = ((struct did_shr_name *)n)->did_size; - } - - rw_exit(&nm_lock.lock); - return (0); -} - -/* - * md_remdevname - Allows removing a device name from the database. - */ -int -md_remdevname( - set_t setno, - side_t side, - mdkey_t key -) -{ - struct nm_next_hdr *nh, *did_nh; - struct nm_next_hdr *shared_nh, *did_shr_nh; - struct nm_name *n; - struct did_min_name *did_n = NULL; - mdkey_t drv_key, dir_key, did_key; - int err; - - - /* - * Load the devid name space if it exists - */ - (void) md_load_namespace(setno, NULL, NM_DEVID); - if (! md_load_namespace(setno, NULL, 0L)) { - /* - * Unload the devid namespace - */ - (void) md_unload_namespace(setno, NM_DEVID); - return (ENOENT); - } - - rw_enter(&nm_lock.lock, RW_WRITER); - - if (((nh = get_first_record(setno, 0, NM_NOTSHARED)) == NULL) || - ((shared_nh = get_first_record(setno, 0, NM_SHARED)) == NULL)) { - rw_exit(&nm_lock.lock); - return (ENOENT); - } - - /* - * If it is not in the primary name space, nothing to remove - */ - if ((n = (struct nm_name *)lookup_entry(nh, setno, side, key, NODEV64, - 0L)) == NULL) { - rw_exit(&nm_lock.lock); - return (ENOENT); - } - - /* - * If there is non-empty device id name space - * Try to locate the entry - */ - if (md_set[setno].s_did_nm && - ((did_nh = get_first_record(setno, 0, NM_DEVID | NM_NOTSHARED)) - != NULL) && - ((did_shr_nh = get_first_record(setno, 0, NM_DEVID | NM_SHARED)) - != NULL)) { - did_n = (struct did_min_name *)lookup_entry(did_nh, setno, - side, key, NODEV64, NM_DEVID); - } - - n->n_count--; - if (n->n_count) { - - err = update_entry(nh, side, key, 0L); - /* - * Update the device id namespace as well - */ - if (did_n) { - did_n->min_count--; - (void) update_entry(did_nh, side, key, NM_DEVID); - } - - rw_exit(&nm_lock.lock); - return (err); - } - - /* reference count is zero, actually remove the name entry */ - drv_key = n->n_drv_key; - dir_key = n->n_dir_key; - did_key = (did_n ? did_n->min_devid_key : 0); - - if (remove_entry(nh, side, key, 0L)) { - rw_exit(&nm_lock.lock); - return (EINVAL); - } - - if (remove_shared_entry(shared_nh, drv_key, (char *)0, 0L) || - remove_shared_entry(shared_nh, dir_key, (char *)0, 0L)) { - rw_exit(&nm_lock.lock); - return (EINVAL); - } - - /* - * Remove from the device id name space - */ - if (did_n) { - if (remove_entry(did_nh, side, key, NM_DEVID)) { - rw_exit(&nm_lock.lock); - return (EINVAL); - } - - if (remove_shared_entry(did_shr_nh, did_key, (char *)0, - NM_DEVID)) { - rw_exit(&nm_lock.lock); - return (EINVAL); - } - } - - rw_exit(&nm_lock.lock); - return (0); -} - -/* - * md_setshared_name - Puts a name into the shared namespace database, and - * returns a key (used to get the string back). - * If the name does not already exist in the namespace - * then it will be added and the reference count will - * be set to one; - * Otherwise the reference count is incremented. - */ -mdkey_t -md_setshared_name(set_t setno, char *shrname, int nocommit) -{ - mdkey_t key; - - - /* - * Load the devid name space if it exists - */ - (void) md_load_namespace(setno, NULL, NM_DEVID); - if (! md_load_namespace(setno, NULL, 0L)) { - /* - * Unload the devid namespace - */ - (void) md_unload_namespace(setno, NM_DEVID); - return (MD_KEYBAD); - } - - rw_enter(&nm_lock.lock, RW_WRITER); - - key = setshared_name(setno, shrname, MD_KEYWILD, nocommit); - - rw_exit(&nm_lock.lock); - return (key); -} - - -/* - * md_getshared_name - Allows converting a key, into the shared namespace - * database, to the string which it represents. - */ -char * -md_getshared_name(set_t setno, mdkey_t shrkey) -{ - char *string; - - - /* - * Load the devid name space if it exists - */ - (void) md_load_namespace(setno, NULL, NM_DEVID); - if (! md_load_namespace(setno, NULL, 0L)) { - /* - * Unload the devid namespace - */ - (void) md_unload_namespace(setno, NM_DEVID); - return ((char *)0); - } - - rw_enter(&nm_lock.lock, RW_READER); - string = (char *)getshared_name(setno, shrkey, 0L); - rw_exit(&nm_lock.lock); - - return (string); -} - -/* - * md_remshared_name - Allows removing of shared name by key. - */ -int -md_remshared_name(set_t setno, mdkey_t shrkey) -{ - struct nm_next_hdr *nh; - - - /* - * Load the devid name space if it exists - */ - (void) md_load_namespace(setno, NULL, NM_DEVID); - if (! md_load_namespace(setno, NULL, 0L)) { - /* - * Unload the devid namespace - */ - (void) md_unload_namespace(setno, NM_DEVID); - return (ENOENT); - } - - rw_enter(&nm_lock.lock, RW_WRITER); - - if ((nh = get_first_record(setno, 0, NM_SHARED)) == NULL) { - rw_exit(&nm_lock.lock); - return (ENOENT); - } - - if (remove_shared_entry(nh, shrkey, (char *)0, 0L)) { - rw_exit(&nm_lock.lock); - return (ENOENT); - } - - rw_exit(&nm_lock.lock); - return (0); -} - -/* - * md_getshared_key - get the key for the given string. - */ -mdkey_t -md_getshared_key(set_t setno, char *shrname) -{ - mdkey_t retval; - - - /* - * Load the devid name space if it exists - */ - (void) md_load_namespace(setno, NULL, NM_DEVID); - if (! md_load_namespace(setno, NULL, 0L)) { - /* - * Unload the devid namespace - */ - (void) md_unload_namespace(setno, NM_DEVID); - return (MD_KEYBAD); - } - - rw_enter(&nm_lock.lock, RW_WRITER); - retval = getshared_key(setno, shrname, 0L); - rw_exit(&nm_lock.lock); - return (retval); -} - -/* - * md_load_namespace - Get all the records associated with the namespace - * out of the database and setup all the incore - * structures (i.e., pointers). - */ -int -md_load_namespace(set_t setno, md_error_t *ep, int devid_nm) -{ - mddb_recid_t hdr_recid; - struct nm_header_hdr *hdr = NULL; - mddb_type_t rec_type; - - if ((md_get_setstatus(setno) & MD_SET_NM_LOADED)) - return (1); - - if (devid_nm && (md_set[setno].s_did_nm != 0)) - return (1); - - rec_type = (devid_nm ? MDDB_DID_NM_HDR : MDDB_NM_HDR); - - hdr_recid = mddb_getnextrec(mddb_makerecid(setno, 0), rec_type, 0); - - if (hdr_recid < 0) { - if (ep != NULL) - return (mddbstatus2error(ep, hdr_recid, NODEV32, - setno)); - return (0); - } - - if (hdr_recid != 0) { - mddb_recstatus_t status; - - status = mddb_getrecstatus(hdr_recid); - if (status == MDDB_NODATA) { - mddb_setrecprivate(hdr_recid, MD_PRV_PENDDEL); - hdr_recid = 0; - } else if (status == MDDB_STALE) { - if (! (md_get_setstatus(setno) & MD_SET_STALE)) { - md_set_setstatus(setno, MD_SET_STALE); - cmn_err(CE_WARN, "md: state database is stale"); - } - } - } - - rw_enter(&nm_lock.lock, RW_WRITER); - - if (hdr_recid != 0) { - - hdr = kmem_zalloc(sizeof (*hdr), KM_SLEEP); - ASSERT(hdr != NULL); - - if (devid_nm) { - md_set[setno].s_did_nmid = hdr_recid; - md_set[setno].s_did_nm = (void *)hdr; - } else { - md_set[setno].s_nmid = hdr_recid; - md_set[setno].s_nm = (void *)hdr; - } - - hdr->hh_header = (struct nm_header *)mddb_getrecaddr(hdr_recid); - - ASSERT(hdr->hh_header != NULL); - - hdr->hh_names.nmn_record = &(hdr->hh_header->h_names); - hdr->hh_shared.nmn_record = &(hdr->hh_header->h_shared); - - mddb_setrecprivate(hdr_recid, MD_PRV_GOTIT); - - build_rec_hdr_list(&hdr->hh_names, hdr_recid, - devid_nm | NM_NOTSHARED); - build_rec_hdr_list(&hdr->hh_shared, hdr_recid, - devid_nm | NM_SHARED); - - /* - * Only cleanup a MN diskset if this node is master. - * Always cleanup traditional diskset. - */ - if (!(MD_MNSET_SETNO(setno)) || - (MD_MNSET_SETNO(setno) && md_set[setno].s_am_i_master)) { - if (devid_nm) { - cleanup_unused_rec(setno, NM_DEVID); - } else { - cleanup_unused_rec(setno, 0L); - } - } - } - - if (!devid_nm) - md_set_setstatus(setno, MD_SET_NM_LOADED); - if (hdr && hdr->hh_header != NULL) - zero_data_ptrs(&hdr->hh_shared, setno); - rw_exit(&nm_lock.lock); - return (1); -} - -void -md_unload_namespace(set_t setno, int devid_nm) -{ - struct nm_header_hdr *hhdr; - struct nm_next_hdr *nh, *nnh; - - if (!devid_nm && (md_set[setno].s_nmid == 0)) - return; - - if (devid_nm && (md_set[setno].s_did_nmid == 0)) - return; - - rw_enter(&nm_lock.lock, RW_WRITER); - - hhdr = ((devid_nm & NM_DEVID) ? - (struct nm_header_hdr *)md_set[setno].s_did_nm : - (struct nm_header_hdr *)md_set[setno].s_nm); - - if (devid_nm) { - md_set[setno].s_did_nmid = 0; - md_set[setno].s_did_nm = NULL; - } else { - md_set[setno].s_nmid = 0; - md_set[setno].s_nm = NULL; - } - - /* - * Clear MD_SET_NM_LOADED when the primary is unloaded - */ - if (!devid_nm) - md_clr_setstatus(setno, MD_SET_NM_LOADED); - - rw_exit(&nm_lock.lock); - - /* - * Free the memory occupied by the namespace records if any has been - * allocated. For the case of a namespace which contains drives not - * supporting device id's we must be careful. - */ - if (hhdr != NULL) { - for (nh = hhdr->hh_names.nmn_nextp; nh; nh = nnh) { - nnh = nh->nmn_nextp; - kmem_free(nh, sizeof (*nh)); - } - - for (nh = hhdr->hh_shared.nmn_nextp; nh; nh = nnh) { - nnh = nh->nmn_nextp; - kmem_free(nh, sizeof (*nh)); - } - kmem_free(hhdr, sizeof (*hhdr)); - } -} - -/* - * md_nm_did_chkspace - calculate the approximate DID namespace size based - * on the component disk devices defined in the primary - * non-shared namespace for this set. This is done on - * the conservative side and may be a block or two too - * large. These are MDDB blocks. - * - * This is intended to be called during a replica conversion from non-devid - * format to devid format. As such no special precautions were taken to - * insure reentrancy. In particular the code in free_devid_list() that - * initializes the devid_list anchor linkages makes this function non-MT-safe. - */ - -int -md_nm_did_chkspace(set_t setno) -{ - struct nm_next_hdr *nh; - struct nm_name *n; - side_t side = MD_SIDEWILD; - mdkey_t key = MD_KEYWILD; - int total_size = 0; /* Total required size */ - int devid_size = 0; /* Device id total size */ - int mname_size = 0; /* Minor name total size */ - int namelen = 0; - int comp_count = 0; /* Total number of components */ - int devid_count = 0; /* Total number of devids */ - ddi_devid_t devid = NULL; - char *mname = NULL; - - rw_enter(&nm_lock.lock, RW_READER); - - if ((nh = get_first_record(setno, 0, NM_NOTSHARED)) == NULL) { - rw_exit(&nm_lock.lock); - return (total_size); - } - - /* - * For each key in the non-shared, primary namespace, lookup the - * minor name and any associated device id. These will reside in - * the device id namespace of the upgraded system. - */ - while ((key = md_getnextkey(setno, side, key, NULL)) != MD_KEYWILD) { - if ((n = (struct nm_name *)lookup_entry(nh, setno, side, key, - NODEV64, 0L)) == NULL) { - break; - } else { - md_dev64_t dev64 = build_device_number(setno, n); - dev_t dev = md_dev64_to_dev(dev64); - - if (ddi_lyr_get_minor_name(dev, S_IFBLK, &mname) - != DDI_SUCCESS) { - continue; - } else { - if (mname) { - namelen = strlen(mname); - mname_size += namelen; - kmem_free(mname, (namelen + 1)); - comp_count++; - } - } - if (ddi_lyr_get_devid(dev, &devid) != DDI_SUCCESS) { - continue; - } else { - if (devid_is_unique(devid)) { - add_to_devid_list(devid); - } else { - ddi_devid_free(devid); - } - } - } - } - - devid_size = free_devid_list(&devid_count); - rw_exit(&nm_lock.lock); - - /* - * Sum things up in this order: - * 1) # blocks to hold devid non-shared record blocks - * 2) # blocks to hold devid shared record blocks - * 3) 1 block to hold devid non-shared nm_rec_hdr's - * 4) 1 block to hold mddb_de's for both of these spaces - */ - - /* - * 1) - */ - total_size = roundup(sizeof (struct mddb_rb32) + - sizeof (struct nm_rec_hdr) + (sizeof (struct did_min_name) * - comp_count) + (mname_size + comp_count), MDDB_BSIZE); - - /* - * 2) - */ - total_size += roundup(sizeof (struct mddb_rb32) + - sizeof (struct nm_rec_hdr) + (sizeof (struct did_shr_name) * - devid_count) + devid_size, MDDB_BSIZE); - - /* - * 3) and 4) - */ - total_size += (2 * MDDB_BSIZE); - - return (total_size/MDDB_BSIZE); -} - -/* - * devid_list - forward list of devid_list structs. - * Managed by routines add_to_devid_list() and free_devid_list() to keep - * track of unique devids associated with components of metadevices. Entries - * are made at the beginning of the list. - */ -static struct devid_list { - size_t devid_size; - struct devid_list *next; - ddi_devid_t devid; -} did_list = { 0, NULL, NULL}; - -static struct devid_list *dlp = &did_list; - -/* - * add_to_devid_list - add a struct devid_list to the head of the devid_list - * list. - */ -static void -add_to_devid_list(ddi_devid_t did) -{ - struct devid_list *curdlp; - - curdlp = kmem_zalloc(sizeof (struct devid_list), KM_SLEEP); - curdlp->devid_size = ddi_devid_sizeof(did); - curdlp->devid = did; - curdlp->next = dlp->next; - dlp->next = curdlp; -} - -/* - * free_devid_list - free storage allocated to dev_list list. Return number - * of entries on list at address supplied by argument count. Return total - * size of all device ids that were on the list. - */ -static size_t -free_devid_list(int *count) -{ - struct devid_list *curdlp; - struct devid_list *nextlp; - size_t total_size = 0; - int n = 0; - - /* - * If there's nothing on the list. - */ - if ((curdlp = dlp->next) == NULL) { - *count = 0; - return (total_size); - } - - while (curdlp) { - nextlp = curdlp->next; - total_size += curdlp->devid_size; - (void) ddi_devid_free(curdlp->devid); - kmem_free(curdlp, sizeof (struct devid_list)); - curdlp = nextlp; - n++; - } - - /* - * Insure that the devid_list anchor linkages are reinitialized in - * case of multiple calls (eg during testsuite execution). - */ - dlp->next = NULL; - dlp->devid = NULL; - - *count = n; - return (total_size); -} - -/* - * devid_is_unique - search for did on devid_list list. Return "false" if - * found. - */ -static int -devid_is_unique(ddi_devid_t did) -{ - struct devid_list *curdlp; - int unique = 1; /* Default to true */ - - /* - * If first call. - */ - if ((curdlp = dlp->next) == NULL) { - return (1); - } - - while (curdlp) { - if (ddi_devid_compare(curdlp->devid, did) == 0) { - unique = 0; - break; - } - curdlp = curdlp->next; - } - return (unique); -} - - -/* - * Called after the unit's snarf to cleanup the device id name space - */ -void -md_devid_cleanup(set_t setno, uint_t all) -{ - struct nm_next_hdr *nh, *did_nh, *this_nh, *did_shr_nh; - struct did_min_name *did_n; - size_t offset, n_offset; - struct devid_min_rec *record; - mdkey_t did_key; - size_t n_size; - int doit; - - /* - * If it is an empty name space - */ - if (((nh = get_first_record(setno, 0, NM_NOTSHARED)) == NULL) || - ((did_nh = get_first_record(setno, 1, NM_DEVID | NM_NOTSHARED)) - == NULL) || - ((did_shr_nh = get_first_record(setno, 1, NM_DEVID | - NM_SHARED)) == NULL)) { - return; - } - - /* - * Or the name space is empty - */ - this_nh = did_nh->nmn_nextp; - record = this_nh->nmn_record; - - if (((struct nm_rec_hdr *)record)->r_used_size == - sizeof (struct nm_rec_hdr)) { - return; - } - - /* - * Not empty - */ - n_offset = offset = (sizeof (struct devid_min_rec) - - sizeof (struct did_min_name)); - did_n = &(record->minor_name[0]); - - /*CONSTCOND*/ - while (1) { - did_key = did_n->min_devid_key; - n_size = DID_NAMSIZ((struct did_min_name *)did_n); - - /* - * It is not in the primary, remove it from the devid nmspace - */ - doit = (all ? 1 : - (lookup_entry(nh, setno, MD_SIDEWILD, did_n->min_key, - NODEV64, 0L) == NULL)); - if (doit) { - (void) remove_entry(did_nh, did_n->min_side, - did_n->min_key, NM_DEVID); - (void) remove_shared_entry(did_shr_nh, did_key, - (char *)0, NM_DEVID); - /* - * We delete something so reset scan - */ - offset = n_offset; - did_n = &(record->minor_name[0]); - if (did_n->min_key != NULL) { - continue; - } else { - return; - } - } - - did_n = (struct did_min_name *)get_next_entry(this_nh, - (caddr_t)did_n, n_size, &offset); - - /* - * Next record? - */ - if (did_n == NULL) { - if (offset) - return; - /* - * Goto next record - */ - offset = n_offset; - this_nh = this_nh->nmn_nextp; - record = this_nh->nmn_record; - did_n = &(record->minor_name[0]); - } - } - /*NOTREACHED*/ -} - - -/* - * Resolve md_dev64_t by device id when current configure changes. This - * can happen before the system reboot or between snarf - * and the first use of metadevice. The configure change can - * mean poweroff before boot and poweron after boot or recable - * disks between snarf and the first open of metadevice. - */ -md_dev64_t -md_resolve_bydevid(minor_t mnum, md_dev64_t device, mdkey_t key) -{ - - struct nm_name *n; - struct nm_next_hdr *nh, *did_nh; - struct did_min_name *did_n; - ddi_devid_t devid; - dev_t *devs; /* ddi returns dev_t not md_dev64_t */ - int ndevs, - cnt; - set_t setno; - int update = 0; - md_dev64_t targ_dev; - - /* assign here so that lint does not complain */ - targ_dev = NODEV64; - - if (device != NODEV64 && (md_getmajor(device) == md_major)) - return (device); - - setno = MD_MIN2SET(mnum); - - if (((nh = get_first_record(setno, 0, NM_NOTSHARED)) == NULL) || - ((n = (struct nm_name *)lookup_entry(nh, setno, MD_SIDEWILD, - key, NODEV64, 0L)) == NULL)) { - return (NODEV64); - } - - /* - * Something can be resolved by device id - * Resolve by the device id and if it can't be resolved - * then return whatever passed in - */ - if (((did_nh = get_first_record(setno, 0, NM_DEVID | NM_NOTSHARED)) - != NULL) && ((did_n = (struct did_min_name *)lookup_entry - (did_nh, setno, MD_SIDEWILD, key, NODEV64, NM_DEVID)) - != NULL)) { - /* - * Get the current devt and update mddb devt if necessary - */ - devid = (ddi_devid_t)getshared_name(setno, - did_n->min_devid_key, NM_DEVID); - - if (devid && (ddi_lyr_devid_to_devlist(devid, did_n->min_name, - &ndevs, &devs) == DDI_SUCCESS)) { - - /* - * This device has been powered off - */ - if (device == NODEV64) { - device = md_expldev(devs[0]); - update = 1; - } else { - for (cnt = 0; cnt < ndevs; cnt++) { - if (device == md_expldev(devs[cnt])) - break; - } - if (cnt == ndevs) { - device = md_expldev(devs[0]); - update = 1; - } - } - - /* - * Have devt so update name space also - */ - targ_dev = md_xlate_mini_2_targ(device); - if (targ_dev == NODEV64) - return (NODEV64); - - if (update && - !(md_get_setstatus(setno) & MD_SET_STALE)) { - n->n_minor = md_getminor(targ_dev); - /* - * If we have the key for the driver get - * it and update the entry. If it's not there - * we need to create it. - */ - if ((n->n_drv_key = getshared_key(setno, - md_targ_major_to_name( - md_getmajor(targ_dev)), 0L)) == MD_KEYBAD) { - n->n_drv_key = setshared_name(setno, - md_targ_major_to_name( - md_getmajor(targ_dev)), - MD_KEYWILD, 0L); - } - (void) update_entry(nh, MD_SIDEWILD, - n->n_key, 0L); - } - /* - * Free memory - */ - (void) ddi_lyr_free_devlist(devs, ndevs); - } else { - /* - * if input devid is null or ddi_devid_lyr_devlist - * does not return success then return NODEV64 - */ - device = NODEV64; - } - } - return (device); -} diff --git a/usr/src/uts/common/io/lvm/md/md_rename.c b/usr/src/uts/common/io/lvm/md/md_rename.c deleted file mode 100644 index ef7f1a733f5c..000000000000 --- a/usr/src/uts/common/io/lvm/md/md_rename.c +++ /dev/null @@ -1,1872 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - - -/* - * rename or exchange identities of virtual device nodes - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include - -extern major_t md_major; -extern unit_t md_nunits; -extern set_t md_nsets; -extern md_set_t md_set[]; - -#define ROLE(r) \ - ((r) == MDRR_PARENT? "parent": \ - (r) == MDRR_SELF? "self": \ - (r) == MDRR_CHILD? "child": \ - (r) == MDRR_UNK? "": "") - -#define OP_STR(op) \ - (((op) == MDRNOP_UNK)? "" : \ - ((op) == MDRNOP_RENAME)? "rename" : \ - ((op) == MDRNOP_EXCHANGE)? "exchange" : \ - "") -int md_rename_debug = 0; - -/* delta guard rails */ -const unsigned long long DELTA_BEG = (0xDad08888a110beefull); -const unsigned long long DELTA_END = (0xa110Beef88880Dadull); - -const unsigned long long DELTA_BEG_FREED = (0xBad0c0ed0fed0dadull); -const unsigned long long DELTA_END_FREED = (0x0Fed0dadbad0c0edull); - -/* transaction guard rails */ -const unsigned long long TXN_BEG = (0xDad01eadc0ed2badull); -const unsigned long long TXN_END = (0xc0ed2badDad01eadull); - -const unsigned long long TXNUN_BEG = (0xcafe0fedbad0beefull); -const unsigned long long TXNUN_END = (0xbad0beefcafe0fedull); - -const unsigned int guard_shift = (sizeof (u_longlong_t) - 3); -const md_stackcap_t MD_CAN_DO_ANYTHING = (md_stackcap_t)0; - -typedef struct role_change_mapping_tab_t { - const int ord; - const md_renrole_t old_role; - const md_renrole_t new_role; - const char *svc_name; - md_ren_roleswap_svc_t * const default_svc; -} role_change_tab_t; - -/* - * The actual table is at the end of the file, so we don't need - * many forward references - */ -static role_change_tab_t role_swap_tab[]; - -#define ILLEGAL_ROLESWAP_SVC ((md_ren_roleswap_svc_t *)(0xA1100BAD)) -#define NO_DEFAULT_ROLESWAP_SVC ((md_ren_roleswap_svc_t *)(NULL)) -#define ILLEGAL_SVC_NAME (NULL) - -/* - * - * Role swap rule table: - * - * New Role - * +---------------------------------------------------------------| - * | | Parent | Self | Child | - * +--------+-----------------+----------------+-------------------+ - * | Parent | no default | ...no default | illegal | - * | | 1 (update kids) | 2 (update to) | 3 | - * Old +--------+-----------------+----------------+-------------------+ - * Role | Self | ...self update | ...rename self | no default (down | - * | | 4 update up | 5 | 6 update from) | - * +--------+-----------------+----------------+-------------------+ - * | Child | illegal | ...child | ...update | - * | | 7 | 8 update to | 9 parent | - * +---------------------------------------------------------------+ - * - * and notes: - * - * - Boxes 1, 4 and 6 are the most interesting. They are responsible - * for updating the from unit's data structures. These may involve - * finding (former or future) children, resetting name keys and the like. - * - * - The "rename" operation is boxes 1, 5 and 9. Most of the work - * is done in box 5, since that contains both the "from" and "to" - * unit struct for rename. - * - * (There's got to be an eigen function for this; that diagonal - * axis is a role identity operation searching for an expression.) - * - * - Almost every transaction will call more than one of these. - * (Only a rename of a unit with no relatives will only call - * a single box.) - * - * - Box 4 "...update from" is the generic self->parent modifier. - * - Box 8 "...update to" is the generic child->self modifier. - * These can be generic because all of the information which - * needs to be updated is in the common portion of the unit - * structure when changing from their respective roles. - * - * - Boxes 1, 2 and 6 ("no default") indicate that per-metadevice - * information must be updated. For example, in box 1, children - * identities must be updated. Since different metadevice types - * detect and manipulate their children differently, there can - * be no generic "md_rename" function in this box. - * - * In addition to the named services in the table above, there - * are other named services used by rename/exchange. - * MDRNM_LIST_URFOLKS, MDRNM_LIST_URSELF, MDRNM_LIST_URKIDS - * list a device's parents, self and children, respectively. - * In most cases the default functions can be used for parents - * and self. Top-level devices, are not required to have a - * "list folks" named service. Likewise, devices which can - * not have metadevice children, are not required to have the - * "list kids" named service. The LIST_UR* functions call back into - * the base driver (md_build_rendelta()) to package the changes to - * a device for addition onto the tree. The LIST_UR* named service - * then adds this "rename delta" onto the delta tree itself. - * This keeps private knowledge appropriately encapsulated. - * They return the number of devices which will need to be changed, - * and hence the number of elements they've added to the delta list - * or -1 for error. - * - * Other named services used by rename/exchange are: - * "lock" (MDRNM_LOCK), "unlock" (MDRNM_UNLOCK) and "check" (MDRNM_CHECK). - * These (un) write-lock all of the relevant in-core structs, - * including the unit structs for the device and quiesce i/o as necessary. - * The "check" named service verifies that this device - * is in a state where rename could and may occur at this time. - * Since the role_swap functions themselves cannot be undone - * (at least in this implementation), it is check()'s job to - * verify that the device is renamable (sic) or, if not, abort. - * The check function for the device participating in the role - * of "self" is usually where rename or exchange validity is verified. - * - * All of these functions take two arguments which may be thought - * of as the collective state changes of the tree of devices - * (md_rendelta_t *family) and the rename transaction state - * (md_rentxn_t rtxn or rtxnp). - * - */ - - -/* - * rename unit lock - * (default name service routine MDRNM_LOCK) - */ -static intptr_t -md_rename_lock(md_rendelta_t *delta, md_rentxn_t *rtxnp) -{ - minor_t mnum; - md_renop_t op; - - ASSERT(delta); - ASSERT(rtxnp); - - if (!delta || !rtxnp) { - (void) mdsyserror(&rtxnp->mde, EINVAL); - return (EINVAL); - } - mnum = md_getminor(delta->dev); - op = rtxnp->op; - - /* - * target doesn't exist if renaming (by definition), - * so it need not be locked - */ - if (op == MDRNOP_RENAME && mnum == rtxnp->to.mnum) { - return (0); - } - - ASSERT(delta->uip); - if (!delta->uip) { - (void) mdmderror(&rtxnp->mde, MDE_UNIT_NOT_SETUP, mnum); - return (ENODEV); - } - - ASSERT(delta->unp); - if (!delta->unp) { - (void) mdmderror(&rtxnp->mde, MDE_UNIT_NOT_SETUP, mnum); - return (ENODEV); - } - - ASSERT(!UNIT_WRITER_HELD(delta->unp)); - - (void) md_unit_writerlock(delta->uip); - - ASSERT(UNIT_WRITER_HELD(delta->unp)); - - return (0); -} - -/* - * (default name service routine MDRNM_UNLOCK) - */ -/* ARGSUSED */ -static void -md_rename_unlock( - md_rendelta_t *delta, - md_rentxn_t *rtxnp) -{ - ASSERT(delta); - ASSERT(delta->uip); - ASSERT(delta->unp); - - ASSERT(UNIT_WRITER_HELD(delta->unp)); - - (void) md_unit_writerexit(delta->uip); - - ASSERT(!UNIT_WRITER_HELD(delta->unp)); -} - -/* - * This is used by the various MDRNM_LIST* named services. - */ -md_rendelta_t * -md_build_rendelta( - md_renrole_t old_role, - md_renrole_t new_role, - md_dev64_t dev, - md_rendelta_t *prev, - md_unit_t *unp, - mdi_unit_t *uip, - md_error_t *ep) -{ - int err = 0; - md_rendelta_t *new; - - new = (md_rendelta_t *)kmem_alloc(sizeof (md_rendelta_t), KM_SLEEP); - - new->beginning = DELTA_BEG; - new->dev = dev; - new->new_role = new_role; - new->old_role = old_role; - new->next = NULL; - new->prev = prev; - new->unp = unp; - new->uip = uip; - bzero((void *) &new->txn_stat, sizeof (md_rendstat_t)); - - /* - * For non-meta devices that are being renamed (in the future, - * that is) we would need to pass in default functions to - * accommodate them, provided the default function is - * truly capable of performing the lock/check/unlock function - * on opaque devices. - */ - - new->lock = md_get_named_service(dev, /* modindex */ 0, - MDRNM_LOCK, md_rename_lock); - - new->unlock = (md_ren_void_svc_t *)md_get_named_service(dev, - /* modindex */ 0, MDRNM_UNLOCK, - (intptr_t (*)()) md_rename_unlock); - - new->check = md_get_named_service(dev, /* modindex */ 0, - MDRNM_CHECK, /* Default */ NULL); - - new->role_swap = NULL; /* set this when the roles are determined */ - - if (!new->lock || !new->unlock || !new->check) { - (void) mdmderror(ep, MDE_RENAME_CONFIG_ERROR, md_getminor(dev)); - err = EINVAL; - goto out; - } - - new->end = DELTA_END; - -out: - if (err != 0) { - if (new) { - new->beginning = DELTA_BEG_FREED; - new->end = DELTA_END_FREED; - - kmem_free(new, sizeof (md_rendelta_t)); - new = NULL; - } - } - - if (prev) { - prev->next = new; - } - - return (new); -} - -/* - * md_store_recid() - * used by role swap functions - */ -void -md_store_recid( - int *prec_idx, - mddb_recid_t *recid_list, - md_unit_t *un) -{ - mddb_recid_t *rp; - bool_t add_recid; - - ASSERT(prec_idx); - ASSERT(recid_list); - ASSERT(recid_list[*prec_idx] == 0); - ASSERT(*prec_idx >= 0); - - for (add_recid = TRUE, rp = recid_list; add_recid && rp && *rp; rp++) { - if (MD_RECID(un) == *rp) { - add_recid = FALSE; - } - } - - if (add_recid) { - recid_list[(*prec_idx)++] = MD_RECID(un); - } -} - -/* - * MDRNM_LIST_URFOLKS: generic named svc entry point - * add all parents onto the list pointed to by dlpp - * (only weird multi-parented devices need to have their - * own named svc to do this.) - */ -static int -md_rename_listfolks(md_rendelta_t **dlpp, md_rentxn_t *rtxnp) -{ - md_rendelta_t *new; - - ASSERT(rtxnp); - ASSERT(dlpp); - ASSERT(*dlpp == NULL); - ASSERT((rtxnp->op == MDRNOP_EXCHANGE) || (rtxnp->op == MDRNOP_RENAME)); - ASSERT(rtxnp->from.uip); - ASSERT(rtxnp->from.unp); - - if ((!rtxnp->from.uip) || (!rtxnp->from.unp)) { - (void) mdmderror(&rtxnp->mde, MDE_UNIT_NOT_SETUP, - rtxnp->from.mnum); - return (-1); - } - - if (!MD_HAS_PARENT(MD_PARENT(rtxnp->from.unp))) { - return (0); - } - - /* - * If supporting log renaming (and other multiparented devices) - * callout to each misc module to claim this waif and return the - * md_dev64_t of its parents. - */ - if (MD_PARENT(rtxnp->from.unp) == MD_MULTI_PARENT) { - (void) mdmderror(&rtxnp->mde, MDE_RENAME_SOURCE_BAD, - rtxnp->from.mnum); - return (2); - } - - if ((rtxnp->op == MDRNOP_RENAME) || - (MD_PARENT(rtxnp->from.unp) != MD_SID(rtxnp->to.unp))) { - - new = md_build_rendelta( - MDRR_PARENT, - MDRR_PARENT, - md_makedevice(md_major, MD_PARENT(rtxnp->from.unp)), - NULL, - MD_UNIT(MD_PARENT(rtxnp->from.unp)), - MDI_UNIT(MD_PARENT(rtxnp->from.unp)), - &rtxnp->mde); - } else { - /* parent is swapping roles with self */ - new = md_build_rendelta( - MDRR_PARENT, - MDRR_SELF, - md_makedevice(md_major, MD_SID(rtxnp->to.unp)), - NULL, - rtxnp->to.unp, - rtxnp->to.uip, - &rtxnp->mde); - } - - if (!new) { - if (mdisok(&rtxnp->mde)) { - (void) mdsyserror(&rtxnp->mde, ENOMEM); - } - return (-1); - } - - *dlpp = new; - - return (1); -} - -/* - * MDRNM_LIST_URSELF: named svc entry point - * add all delta entries appropriate for ourselves onto the deltalist pointed - * to by dlpp - */ -static int -md_rename_listself(md_rendelta_t **dlpp, md_rentxn_t *rtxnp) -{ - md_rendelta_t *new, *p; - bool_t exchange_up = FALSE; - - ASSERT(rtxnp); - ASSERT(dlpp); - ASSERT((rtxnp->op == MDRNOP_EXCHANGE) || (rtxnp->op == MDRNOP_RENAME)); - ASSERT(rtxnp->from.unp); - ASSERT(rtxnp->from.uip); - - if ((!rtxnp->from.uip) || (!rtxnp->from.unp)) { - (void) mdmderror(&rtxnp->mde, MDE_UNIT_NOT_SETUP, - rtxnp->from.mnum); - return (-1); - } - - for (p = *dlpp; p && p->next != NULL; p = p->next) { - /* NULL */ - } - - /* - * renaming or - * from's parent is not to and to's parent is not from - */ - if (rtxnp->op == MDRNOP_RENAME) { - new = md_build_rendelta( - MDRR_SELF, - MDRR_SELF, - md_makedevice(md_major, rtxnp->from.mnum), - p, - rtxnp->from.unp, - rtxnp->from.uip, - &rtxnp->mde); - } else { - - if (MD_PARENT(rtxnp->from.unp) == MD_SID(rtxnp->to.unp)) { - exchange_up = TRUE; - } - - /* self and parent are flipping */ - new = md_build_rendelta( - MDRR_SELF, - exchange_up? MDRR_PARENT: MDRR_CHILD, - md_makedevice(md_major, rtxnp->from.mnum), - p, - rtxnp->from.unp, - rtxnp->from.uip, - &rtxnp->mde); - } - - if (!new) { - if (mdisok(&rtxnp->mde)) { - (void) mdsyserror(&rtxnp->mde, ENOMEM); - } - return (-1); - } - - if (!*dlpp) { - *dlpp = new; - } - - return (1); -} - -/* - * free the tree of all deltas to devices involved in the rename transaction - */ -static void -free_dtree(md_rendelta_t *family) -{ - md_rendelta_t *next = NULL; - int i = 0; - md_rendelta_t *r; - - for (r = family; (NULL != r); r = next, i++) { - - next = r->next; - - /* shift << because it makes the resultant pattern readable */ - r->beginning = DELTA_BEG_FREED ^ (i << guard_shift); - r->end = DELTA_END_FREED ^ (i << guard_shift); - - kmem_free(r, sizeof (md_rendelta_t)); - } -} - -/* - * walk down family tree, calling lock service function - */ -static int -lock_dtree(md_rendelta_t *family, md_rentxn_t *rtxnp) -{ - md_rendelta_t *r; - int rc; - - ASSERT(family); - ASSERT(rtxnp); - - if (!family || !rtxnp) { - return (EINVAL); - } - - for (rc = 0, r = family; r; r = r->next) { - - ASSERT(r->unp); - ASSERT(!UNIT_WRITER_HELD(r->unp)); - ASSERT(r->lock); - - if ((rc = (int)(*r->lock) (r, rtxnp)) != 0) { - return (rc); - } - r->txn_stat.locked = TRUE; - } - - return (0); -} - -/* - * We rely on check() (MDRNM_CHECK) to make exhaustive checks, - * since we don't attempt to undo role_swap() failures. - * - * To implement an undo() function would require each role_swap() - * to store a log of previous state of the structures it changes, - * presumably anchored by the rendelta. - * - */ -static int -check_dtree(md_rendelta_t *family, md_rentxn_t *rtxnp) -{ - md_rendelta_t *r; - int rc; - - ASSERT(family); - ASSERT(rtxnp); - - if (!family || !rtxnp) { - /* no error packet to set? */ - return (EINVAL); - } - - for (r = family, rc = 0; r; r = r->next) { - - ASSERT(UNIT_WRITER_HELD(r->unp)); - ASSERT(r->txn_stat.locked); - - /* - * doesn't exist for rename - */ - if (!(rtxnp->op == MDRNOP_RENAME && - md_getminor(r->dev) == rtxnp->to.mnum)) { - ASSERT(r->uip); - r->txn_stat.is_open = md_unit_isopen(r->uip); - } - - /* - * if only allowing offline rename/exchanges, check - * for top being trans because it opens its sub-devices - */ - - switch (rtxnp->revision) { - case MD_RENAME_VERSION_OFFLINE: - if ((r->txn_stat.is_open) && - (!rtxnp->stat.trans_in_stack)) { - (void) mdmderror(&rtxnp->mde, MDE_RENAME_BUSY, - md_getminor(r->dev)); - return (EBUSY); - } - break; - - case MD_RENAME_VERSION_ONLINE: - break; - - default: - (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR, - md_getminor(r->dev)); - return (EINVAL); - } - - /* MD_UN_MOD_INPROGRESS includes the MD_UN_RENAMING bit */ - - if (MD_STATUS(r->unp) & MD_UN_MOD_INPROGRESS) { - (void) mdmderror(&rtxnp->mde, MDE_RENAME_BUSY, - md_getminor(r->dev)); - return (EBUSY); - } - - MD_STATUS(r->unp) |= MD_UN_RENAMING; - - if ((rc = (int)(*r->check)(r, rtxnp)) != 0) { - return (rc); - } - - /* and be sure we can proceed */ - if (!(r->role_swap)) { - (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR, - md_getminor(r->dev)); - return (EINVAL); - } - r->txn_stat.checked = TRUE; - } - - return (0); -} - - -/* - * rename role_swap() functions are responsible for updating their - * own parent, self and children references in both on-disk - * and in-core structures, as well as storing the changed - * record ids into recids and incrementing rec_idx. - */ - -static void -role_swap_dtree(md_rendelta_t *family, md_rentxn_t *rtxnp) -{ - md_rendelta_t *r; - - ASSERT(family); - ASSERT(rtxnp); - - for (r = family; r; r = r->next) { - ASSERT(r->role_swap); - ASSERT(r->txn_stat.locked); - ASSERT(r->txn_stat.checked); - - (*r->role_swap)(r, rtxnp); - - r->txn_stat.role_swapped = TRUE; - } - - /* - * there's some work to do, but not more than expected - */ - ASSERT(rtxnp->rec_idx > 0); - ASSERT(rtxnp->rec_idx < rtxnp->n_recids); - - if (rtxnp->rec_idx >= rtxnp->n_recids || rtxnp->rec_idx <= 0) { - /* - * There's no way to indicate error from here, - * and even if we could, there's no undo mechanism. - * We've already modified the in-core structs, so - * We can't continue w/o committing, but we - * don't appear to have anything to commit. - */ - cmn_err(CE_PANIC, - "md_rename: role_swap_dtree(family:%p, rtxnp:%p)", - (void *) family, (void *) rtxnp); - return; - } - rtxnp->recids[rtxnp->rec_idx] = 0; - - mddb_commitrecs_wrapper(rtxnp->recids); -} - -/* - * walk down delta tree, calling the unlock service for each device, - * provided any of the devices appear to have been locked - */ -static void -unlock_dtree(md_rendelta_t *family, md_rentxn_t *rtxnp) -{ - md_rendelta_t *r; - uint_t any_locked = FALSE; - - ASSERT(family); - ASSERT(rtxnp); - - for (r = family; r; r = r->next) { - - ASSERT(!(r->txn_stat.unlocked)); /* "has been unlocked" */ - any_locked |= r->txn_stat.locked; - } - - if (any_locked) { - - /* unwind in reverse order */ - for (r = family; NULL != r->next; r = r->next) { - /* NULL */ - } - - for (; NULL != r; r = r->prev) { - MD_STATUS(r->unp) &= ~MD_UN_RENAMING; - ASSERT(r->unlock); - r->unlock(r, rtxnp); - r->txn_stat.unlocked = TRUE; - } - } -} - -/* - * MDRNM_UPDATE_SELF - * This role swap function is identical for all unit types, - * so keep it here. It's also the best example because it - * touches all the modified portions of the relevant - * in-common structures. - */ -static void -md_rename_update_self( - md_rendelta_t *delta, - md_rentxn_t *rtxnp) -{ - minor_t from_min, to_min; - sv_dev_t sv; - mddb_de_ic_t *dep; - mddb_rb32_t *rbp; - - ASSERT(rtxnp); - ASSERT(rtxnp->op == MDRNOP_RENAME); - ASSERT(delta); - ASSERT(delta->unp); - ASSERT(delta->uip); - ASSERT(rtxnp->rec_idx >= 0); - ASSERT(rtxnp->recids); - ASSERT(delta->old_role == MDRR_SELF); - ASSERT(delta->new_role == MDRR_SELF); - ASSERT(md_getminor(delta->dev) == rtxnp->from.mnum); - - from_min = rtxnp->from.mnum; - to_min = rtxnp->to.mnum; - - /* - * self id changes in our own unit struct - */ - MD_SID(delta->unp) = to_min; - - /* - * make sure that dest always has correct un_revision - * and rb_revision - */ - delta->unp->c.un_revision |= MD_FN_META_DEV; - dep = mddb_getrecdep(MD_RECID(delta->unp)); - ASSERT(dep); - rbp = dep->de_rb; - if (rbp->rb_revision & MDDB_REV_RB) { - rbp->rb_revision = MDDB_REV_RBFN; - } else if (rbp->rb_revision & MDDB_REV_RB64) { - rbp->rb_revision = MDDB_REV_RB64FN; - } - - /* - * clear old array pointers to unit in-core and unit - */ - - MDI_VOIDUNIT(from_min) = NULL; - MD_VOIDUNIT(from_min) = NULL; - - /* - * and point the new slots at the unit in-core and unit structs - */ - - MDI_VOIDUNIT(to_min) = delta->uip; - MD_VOIDUNIT(to_min) = delta->unp; - - /* - * recreate kstats - * - destroy the ones associated with our former identity - * - reallocate and associate them with our new identity - */ - md_kstat_destroy_ui(delta->uip); - md_kstat_init_ui(to_min, delta->uip); - - /* - * the unit in-core reference to the get next link's id changes - */ - - delta->uip->ui_link.ln_id = to_min; - - /* - * name space addition of new key was done from user-level - * remove the old name's key here - */ - - sv.setno = MD_MIN2SET(from_min); - sv.key = rtxnp->from.key; - - md_rem_names(&sv, 1); - - /* - * Remove associated device node as well - */ - md_remove_minor_node(from_min); - - /* - * and store the record id (from the unit struct) into recids - * for later commitment by md_rename() - */ - md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp); -} - -/* - * Either one of our siblings and/or our parent changed identities. - */ -static void -md_renexch_update_parent( - md_rendelta_t *delta, - md_rentxn_t *rtxnp) -{ - ASSERT(rtxnp); - ASSERT((MDRNOP_RENAME == rtxnp->op) || (rtxnp->op == MDRNOP_EXCHANGE)); - ASSERT(rtxnp->rec_idx >= 0); - ASSERT(rtxnp->recids); - ASSERT(delta); - ASSERT(delta->unp); - ASSERT(delta->old_role == MDRR_CHILD); - ASSERT(delta->new_role == MDRR_CHILD); - ASSERT((MD_PARENT(delta->unp) == rtxnp->from.mnum) || - (MD_PARENT(delta->unp) == rtxnp->to.mnum)); - - if (MD_PARENT(delta->unp) == rtxnp->from.mnum) { - MD_PARENT(delta->unp) = rtxnp->to.mnum; - } - - md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp); -} - -/* - * exchange up (child->self) - */ -static void -md_exchange_child_update_to( - md_rendelta_t *delta, - md_rentxn_t *rtxnp) -{ - minor_t from_min, to_min; - - ASSERT(rtxnp); - ASSERT(rtxnp->op == MDRNOP_EXCHANGE); - ASSERT(rtxnp->rec_idx >= 0); - ASSERT(rtxnp->recids); - ASSERT(delta); - ASSERT(delta->unp); - ASSERT(delta->uip); - ASSERT(delta->old_role == MDRR_CHILD); - ASSERT(delta->new_role == MDRR_SELF); - ASSERT(md_getminor(delta->dev) == rtxnp->to.mnum); - - from_min = rtxnp->from.mnum; - to_min = rtxnp->to.mnum; - - /* - * self id changes in our own unit struct - * Note: - * - Since we're assuming the identity of "from" we use its mnum even - * though we're updating the "to" structures. - */ - - MD_SID(delta->unp) = from_min; - - /* - * our parent identifier becomes the new self, who was "to" - */ - - MD_PARENT(delta->unp) = to_min; - - /* - * point the set array pointers at the "new" unit and unit in-cores - * Note: - * - The other half of this transfer is done in the "update from" - * rename/exchange named service. - */ - - MD_VOIDUNIT(from_min) = delta->unp; - MDI_VOIDUNIT(from_min) = delta->uip; - - /* - * transfer kstats - */ - - delta->uip->ui_kstat = rtxnp->from.kstatp; - - /* - * the unit in-core reference to the get next link's id changes - */ - - delta->uip->ui_link.ln_id = from_min; - - /* - * name space additions, if necessary, were done from user-level. - * name space deletions, if necessary, were done in "exchange_from" - */ - - /* - * and store the record id (from the unit struct) into recids - * for later comitment by md_rename() - */ - - md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp); -} - -/* - * exchange up (self->parent) - */ -static void -md_exchange_self_update_from_up( - md_rendelta_t *delta, - md_rentxn_t *rtxnp) -{ - minor_t from_min, to_min; - - ASSERT(rtxnp); - ASSERT(rtxnp->op == MDRNOP_EXCHANGE); - ASSERT(rtxnp->rec_idx >= 0); - ASSERT(rtxnp->recids); - ASSERT(delta); - ASSERT(delta->unp); - ASSERT(delta->uip); - ASSERT(delta->old_role == MDRR_SELF); - ASSERT(delta->new_role == MDRR_PARENT); - ASSERT(md_getminor(delta->dev) == rtxnp->from.mnum); - - from_min = rtxnp->from.mnum; - to_min = rtxnp->to.mnum; - - /* - * self id changes in our own unit struct - * Note: - * - Since we're assuming the identity of "to" we use its mnum - * while we're updating the "to" structures. - */ - - MD_SID(delta->unp) = to_min; - - /* - * our parent identifier becomes the new parent, who was "from" - */ - - MD_PARENT(delta->unp) = from_min; - - /* - * point the set array pointers at the "new" unit and unit in-cores - * Note: - * - The other half of this transfer is done in the "update from" - * rename/exchange named service. - */ - - MD_VOIDUNIT(to_min) = delta->unp; - MDI_VOIDUNIT(to_min) = delta->uip; - - /* - * transfer kstats - */ - - delta->uip->ui_kstat = rtxnp->to.kstatp; - - /* - * the unit in-core reference to the get next link's id changes - */ - - delta->uip->ui_link.ln_id = to_min; - - /* - * name space additions, if necessary, were done from user-level. - * name space deletions, if necessary, were done in "exchange_from" - */ - - /* - * and store the record id (from the unit struct) into recids - * for later comitment by md_rename() - */ - - md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp); -} - -/* - * The order of the called role swap functions is critical. - * If they're not ordered as "all parents", then "all self" - * then "all child" transitions, we will almost certainly - * corrupt the data base and the in-core linkages. So, - * verify that the list built by the individual drivers is - * ok here. - * - * We could have done fancy bit encodings of the roles so - * it all fit into a single word and we wouldn't need the - * prev_ord field. But, since cpu power is cheaper than - * than people power, they're all separate for easier - * debugging and maintaining. (In the unlikely event that - * rename/exchange ever becomes cpu-limited, and this - * algorithm is the bottleneck, we should revisit this.) - */ - -static bool_t -role_swap_is_valid( - int previous, - int current, - md_rendelta_t *delta, - md_rentxn_t *rtxnp) -{ - bool_t valid = FALSE; - - /* - * we've backed up in processing the role table - */ - if ((previous > current) && - (delta->prev && (delta->old_role != delta->prev->old_role))) { - goto out; - } - - /* - * we're repeating the same role transition - */ - if (previous == current) { - switch (delta->old_role) { - case MDRR_PARENT: - /* - * require at least one of the devices to - * be multiparented for us to allow another - * parent transition - */ - if ((MD_MULTI_PARENT != MD_PARENT(rtxnp->from.unp)) && - (MD_MULTI_PARENT != MD_PARENT(rtxnp->to.unp))) { - goto out; - } - break; - - case MDRR_CHILD: - /* it's ok to have multiple children */ - break; - - case MDRR_SELF: - /* it's never ok to have multiple self transitions */ - /* FALLTHROUGH */ - default: - goto out; - } - } - - valid = TRUE; -out: - if (!valid) { - if (md_rename_debug != 0) { - cmn_err(CE_NOTE, "previous: %d, current: %d, role: %s", - previous, current, - ROLE(delta->old_role)); - delay(3*drv_usectohz(1000000)); - ASSERT(FALSE); - } - } - - return (valid); -} - -static role_change_tab_t * -lookup_role(md_renrole_t old_role, md_renrole_t new_role) -{ - role_change_tab_t *rp; - role_change_tab_t *found = NULL; - - for (rp = role_swap_tab; !found && (rp->old_role != MDRR_UNK); rp++) { - - if (rp->old_role == old_role && rp->new_role == new_role) { - found = rp; - } - } - /* - * we require a named svc if we've got two devices - * claiming to be changing roles in this manner - */ - ASSERT(found); - ASSERT(found->default_svc != ILLEGAL_ROLESWAP_SVC); - ASSERT(found->svc_name != ILLEGAL_SVC_NAME); - - if (!found || - (found->default_svc == ILLEGAL_ROLESWAP_SVC) || - (found->svc_name == ILLEGAL_SVC_NAME)) { - return (NULL); - } - - return (found); -} - -/* - * fill in the role swap named svc., now that we know each device - * and its changing role - */ -static int -valid_roleswap_dtree( - md_rendelta_t *family, - md_rentxn_t *rtxnp -) -{ - md_rendelta_t *r; - role_change_tab_t *rolep; - minor_t from_min, to_min; - int prev_ord = -1; - bool_t found_self = FALSE; - int err = 0; - - ASSERT(family); - ASSERT(rtxnp); - - from_min = rtxnp->from.mnum; - to_min = rtxnp->to.mnum; - - for (r = family; r; r = r->next, prev_ord = rolep->ord) { - - if (!(rolep = lookup_role(r->old_role, r->new_role))) { - (void) mdmderror(&rtxnp->mde, - MDE_RENAME_CONFIG_ERROR, from_min); - err = EOPNOTSUPP; - goto out; - } - r->role_swap = (md_ren_roleswap_svc_t *)md_get_named_service( - r->dev, /* modindex */ 0, - (char *)rolep->svc_name, - (intptr_t (*)()) rolep->default_svc); - - /* - * someone probably called the ioctl directly and - * incorrectly, rather than via the libmeta wrappers - */ - if (!(r->role_swap)) { - (void) mdmderror(&rtxnp->mde, - MDE_RENAME_TARGET_UNRELATED, to_min); - err = EOPNOTSUPP; - goto out; - } - - if (!role_swap_is_valid(prev_ord, rolep->ord, r, rtxnp)) { - (void) mdmderror(&rtxnp->mde, - MDE_RENAME_CONFIG_ERROR, from_min); - err = EINVAL; - goto out; - } - - if (rolep->old_role == MDRR_SELF) { - found_self = TRUE; - } - - if (MD_PARENT(r->unp) == MD_MULTI_PARENT) { - (void) mdmderror(&rtxnp->mde, MDE_RENAME_TARGET_BAD, - md_getminor(r->dev)); - err = EINVAL; - goto out; - } - } - - /* - * must be at least one selfish device - */ - ASSERT(found_self); - if (!found_self) { - (void) mdmderror(&rtxnp->mde, - MDE_RENAME_CONFIG_ERROR, from_min); - err = EINVAL; - goto out; - } - -out: - return (err); -} - -/* - * dump contents of rename transaction - */ -static void -dump_txn(md_rentxn_t *rtxnp) { - - if (md_rename_debug == 0) { - return; - } - - cmn_err(CE_NOTE, "rtxnp: %p", (void *) rtxnp); - if (rtxnp) { - cmn_err(CE_NOTE, "beginning: %llx, op: %s", - rtxnp->beginning, OP_STR(rtxnp->op)); - - cmn_err(CE_NOTE, - "revision: %d, uflags: %d, rec_idx: %d, n_recids: %d, rec_ids: %p%s", - rtxnp->revision, rtxnp->uflags, - rtxnp->rec_idx, rtxnp->n_recids, (void *) rtxnp->recids, - rtxnp->stat.trans_in_stack? " (trans in stack)": ""); - cmn_err(CE_NOTE, " from: beginning: %llx", - rtxnp->from.beginning); - cmn_err(CE_NOTE, " minor: %lX, key: %lX", - (ulong_t)rtxnp->from.mnum, (ulong_t)rtxnp->from.key); - cmn_err(CE_NOTE, " unp: %lX, uip: %lX", - (ulong_t)rtxnp->from.unp, (ulong_t)rtxnp->from.uip); - cmn_err(CE_NOTE, " end: %llx", rtxnp->from.end); - cmn_err(CE_NOTE, " to: beginning: %llx", rtxnp->to.beginning); - cmn_err(CE_NOTE, " minor: %lX, key: %lX", - (ulong_t)rtxnp->to.mnum, (ulong_t)rtxnp->to.key); - cmn_err(CE_NOTE, " unp: %lX, uip: %lX", - (ulong_t)rtxnp->to.unp, (ulong_t)rtxnp->to.uip); - cmn_err(CE_NOTE, " end: %llx", rtxnp->to.end); - cmn_err(CE_NOTE, "end: %llx\n", rtxnp->end); - } - delay(drv_usectohz(1000000)); -} - -/* - * dump contents of all deltas - */ -static void -dump_dtree(md_rendelta_t *family) -{ - md_rendelta_t *r; - int i; - - if (md_rename_debug == 0) { - return; - } - - for (r = family, i = 0; r; r = r->next, i++) { - cmn_err(CE_NOTE, "%d. beginning: %llx", i, r->beginning); - cmn_err(CE_NOTE, " r: %lX, dev: %lX, next: %lx, prev: %lx", - (ulong_t)r, (ulong_t)r->dev, - (ulong_t)r->next, (ulong_t)r->prev); - - cmn_err(CE_NOTE, " role: %s -> %s, unp: %lx, uip: %lx", - ROLE(r->old_role), ROLE(r->new_role), - (ulong_t)r->unp, (ulong_t)r->uip); - cmn_err(CE_NOTE, - " lock: %lx, unlock: %lx\n\t check: %lx, role_swap: %lx", - (ulong_t)r->lock, (ulong_t)r->unlock, - (ulong_t)r->check, (ulong_t)r->role_swap); - if (*((uint_t *)(&r->txn_stat)) != 0) { - cmn_err(CE_NOTE, "status: (0x%x) %s%s%s%s%s", - *((uint_t *)(&r->txn_stat)), - r->txn_stat.is_open? "is_open " : "", - r->txn_stat.locked? "locked " : "", - r->txn_stat.checked? "checked " : "", - r->txn_stat.role_swapped? "role_swapped " : "", - r->txn_stat.unlocked? "unlocked" : ""); - } - cmn_err(CE_NOTE, "end: %llx\n", r->end); - } - delay(drv_usectohz(1000000)); -} - -/* - * validate the rename request parameters - */ -static int -validate_txn_parms(md_rentxn_t *rtxnp) -{ - minor_t to_min, from_min; - - ASSERT(rtxnp); - - from_min = rtxnp->from.mnum; - to_min = rtxnp->to.mnum; - - switch (rtxnp->revision) { - case MD_RENAME_VERSION_OFFLINE: - if (rtxnp->uflags != 0) { - (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR, - from_min); - return (ENOTSUP); - } - break; - - case MD_RENAME_VERSION_ONLINE: - /* not supported until 5.0 */ - /* FALLTHROUGH */ - - default: - (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR, - from_min); - return (EPROTONOSUPPORT); - } - - if ((rtxnp->from.uip = MDI_UNIT(from_min)) == NULL) { - (void) mdmderror(&rtxnp->mde, MDE_UNIT_NOT_SETUP, from_min); - return (ENODEV); - } - - if (!md_dev_exists(md_makedevice(md_major, from_min))) { - (void) mdmderror(&rtxnp->mde, MDE_UNIT_NOT_SETUP, from_min); - return (ENODEV); - } - - if ((rtxnp->from.key == MD_KEYBAD) || (rtxnp->from.key == MD_KEYWILD)) { - (void) mdmderror(&rtxnp->mde, MDE_INVAL_UNIT, from_min); - return (EINVAL); - } - - rtxnp->from.kstatp = rtxnp->from.uip->ui_kstat; - rtxnp->from.unp = MD_UNIT(from_min); - - if (MD_MIN2SET(to_min) != MD_MIN2SET(from_min)) { - (void) mdmderror(&rtxnp->mde, MDE_INVAL_UNIT, to_min); - return (EINVAL); - } - - switch (rtxnp->op) { - case MDRNOP_EXCHANGE: - rtxnp->to.unp = MD_UNIT(to_min); - rtxnp->to.uip = MDI_UNIT(to_min); - - /* - * exchange requires target to exist - */ - - if ((rtxnp->to.uip == NULL) || - (md_dev_exists(md_makedevice(md_major, to_min)) == NULL)) { - (void) mdmderror(&rtxnp->mde, MDE_UNIT_NOT_SETUP, - to_min); - return (ENODEV); - } - - if ((rtxnp->to.key == MD_KEYBAD) || - (rtxnp->to.key == MD_KEYWILD)) { - (void) mdmderror(&rtxnp->mde, MDE_INVAL_UNIT, to_min); - return (EINVAL); - } - - /* - * is not in the role of , - * that is, - * has a parent, which is and has a parent too - * or - * has a parent, which is and can have a child - */ - if ((MD_HAS_PARENT(MD_PARENT(rtxnp->from.unp))) && - (MD_PARENT(rtxnp->from.unp) == to_min) && - MD_HAS_PARENT(MD_PARENT(rtxnp->to.unp))) { - (void) mdmderror(&rtxnp->mde, MDE_RENAME_ORDER, - from_min); - return (EINVAL); - } - - if ((MD_HAS_PARENT(MD_PARENT(rtxnp->to.unp))) && - (MD_PARENT(rtxnp->to.unp) == from_min) && - (MD_CAPAB(rtxnp->to.unp) & MD_CAN_META_CHILD)) { - (void) mdmderror(&rtxnp->mde, MDE_RENAME_ORDER, - from_min); - return (EINVAL); - } - - rtxnp->to.kstatp = rtxnp->to.uip->ui_kstat; - break; - - case MDRNOP_RENAME: - - /* - * rename requires not to exist - */ - - if (MDI_UNIT(to_min) || - md_dev_exists(md_makedevice(md_major, to_min))) { - - (void) mdmderror(&rtxnp->mde, MDE_UNIT_ALREADY_SETUP, - to_min); - return (EEXIST); - } - - /* - * and to be within valid ranges for the current - * limits on number of sets and metadevices - */ - if ((MD_MIN2SET(to_min) >= md_nsets) || - (MD_MIN2UNIT(to_min) >= md_nunits)) { - (void) mdmderror(&rtxnp->mde, MDE_INVAL_UNIT, to_min); - return (EINVAL); - } - - rtxnp->to.unp = NULL; - rtxnp->to.uip = NULL; - rtxnp->to.kstatp = NULL; - break; - - default: - (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR, - from_min); - return (EINVAL); - } - - /* - * install guard rails - */ - rtxnp->beginning = TXN_BEG; - - rtxnp->from.beginning = TXNUN_BEG; - rtxnp->from.end = TXNUN_END; - - rtxnp->to.beginning = TXNUN_BEG; - rtxnp->to.end = TXNUN_END; - - rtxnp->end = TXN_END; - - return (0); -} - -/* - * If the device being changed exhibits this capability, set the list - * relatives function pointer to the named service that lists the - * appropriate relatives for this capability. - */ -static int -set_list_rels_funcp( - md_rentxn_t *rtxnp, - md_stackcap_t capability, - char *svc_name, - md_ren_list_svc_t default_svc_func, - md_ren_list_svc_t **list_relatives_funcp -) -{ - int err; - minor_t from_min; - md_dev64_t from_dev; - md_unit_t *from_un; - mdi_unit_t *from_ui; - - ASSERT(rtxnp); - ASSERT((rtxnp->op == MDRNOP_RENAME) || (rtxnp->op == MDRNOP_EXCHANGE)); - ASSERT(list_relatives_funcp); - - from_min = rtxnp->from.mnum; - from_dev = md_makedevice(md_major, from_min); - from_un = MD_UNIT(from_min); - from_ui = MDI_UNIT(from_min); - err = 0; - - if (!from_ui || !from_un) { - (void) mdmderror(&rtxnp->mde, MDE_UNIT_NOT_SETUP, from_min); - err = EINVAL; - goto out; - } - - if ((capability == MD_CAN_DO_ANYTHING) || - ((MD_CAPAB(from_un) & capability) == capability)) { - - *list_relatives_funcp = (md_ren_list_svc_t *) - md_get_named_service(from_dev, - /* modindex */ 0, svc_name, - (intptr_t (*)()) default_svc_func); - - ASSERT(*list_relatives_funcp); - if (!(*list_relatives_funcp)) { - (void) mdmderror(&rtxnp->mde, - MDE_RENAME_CONFIG_ERROR, from_min); - err = EINVAL; - goto out; - } - } else { - *list_relatives_funcp = (md_ren_list_svc_t *)NULL; - } - -out: - return (err); -} - -/* - * call list relations function, bump recid counter - * by number of members added to the delta list. - * Validate that the number of members added is within bounds. - */ -static int -list_relations( - md_rendelta_t **family, - md_rentxn_t *rtxnp, - md_ren_list_svc_t *add_relatives_funcp, - int valid_min, - int valid_max -) -{ - int n_added; - int err = 0; - - ASSERT(family); - ASSERT(rtxnp); - - if (!family || !rtxnp) { - err = EINVAL; - goto out; - } - - n_added = 0; - - /* no relations of this type */ - if (!add_relatives_funcp) { - goto out; - } - - n_added = (*add_relatives_funcp) (family, rtxnp); - - if ((n_added < valid_min) || (n_added > valid_max)) { - if (mdisok(&rtxnp->mde)) { - (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR, - rtxnp->from.mnum); - } - err = EINVAL; - goto out; - } - - rtxnp->n_recids += n_added; - -out: - return (err); -} - -/* - * build recid array - */ -static int -alloc_recids(md_rendelta_t *family, md_rentxn_t *rtxnp) -{ - int err = 0; - - if (!family || !rtxnp) { - err = ENOMEM; - goto out; - } - - rtxnp->rec_idx = 0; - - if (rtxnp->n_recids == 0) { - err = EINVAL; - goto out; - } - - rtxnp->n_recids += 1; /* terminator */ - - rtxnp->recids = kmem_alloc(sizeof (mddb_recid_t) * rtxnp->n_recids, - KM_SLEEP); - if (!(rtxnp->recids)) { - err = ENOMEM; - goto out; - } - - bzero((void *) rtxnp->recids, - (sizeof (mddb_recid_t) * rtxnp->n_recids)); -out: - if (err != 0) { - (void) mdsyserror(&rtxnp->mde, err); - } - - return (err); -} - -/* - * build family tree (parent(s), self, children) - * The order of the resultant list is important, as it governs - * the order of locking, checking and changing the unit structures. - * Since we'll be changing them, we may not use the MD_UNIT, MDI_UNIT, - * and other pointer which depend on the array being correct. - * Use only the cached pointers (in rtxnp.) - */ -static md_rendelta_t * -build_dtree(md_rentxn_t *rtxnp) -{ - md_ren_list_svc_t *add_folks, *add_self, *add_kids; - int err; - md_rendelta_t *family = NULL; - - ASSERT(rtxnp); - ASSERT((rtxnp->op == MDRNOP_RENAME) || (rtxnp->op == MDRNOP_EXCHANGE)); - - err = set_list_rels_funcp(rtxnp, MD_CAN_PARENT, MDRNM_LIST_URFOLKS, - md_rename_listfolks, &add_folks); - - if (err) { - goto out; - } - - err = set_list_rels_funcp(rtxnp, MD_CAN_DO_ANYTHING, MDRNM_LIST_URSELF, - md_rename_listself, &add_self); - if (err) { - goto out; - } - - err = set_list_rels_funcp(rtxnp, MD_CAN_META_CHILD, MDRNM_LIST_URKIDS, - /* no default list func */ ((int (*)()) NULL), - &add_kids); - if (err) { - goto out; - } - - rtxnp->n_recids = 0; /* accumulated by list_relations() */ - - if ((err = list_relations(&family, rtxnp, add_folks, 0, 1)) != 0) { - goto out; - } - - if ((err = list_relations(&family, rtxnp, add_self, 1, 1)) != 0) { - goto out; - } - - err = list_relations(&family, rtxnp, add_kids, 0, md_nunits); - if (err != 0) { - goto out; - } - - /* - * delta tree is still empty? - */ - if ((!family) || (rtxnp->n_recids == 0)) { - (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR, - rtxnp->from.mnum); - err = EINVAL; - goto out; - } - - /* - * verify role change interactions - */ - if ((err = valid_roleswap_dtree(family, rtxnp)) != 0) { - goto out; - } - - if ((err = alloc_recids(family, rtxnp)) != 0) { - goto out; - } - -out: - if (err != 0) { - free_dtree(family); - dump_dtree(family); /* yes, after freeing it */ - family = NULL; - } - - return (family); -} - - -/* - * (MD_IOCRENAME) rename/exchange ioctl entry point - * calls individual driver named service entry points - * to build a list of devices which need state changed, - * to verify that they're in a state where renames may occur, - * and to modify themselves into their new identities - */ - -int -md_rename( - md_rename_t *mrp, - IOLOCK *iolockp) -{ - md_rendelta_t *family = NULL; - md_rentxn_t rtxn; - int err = 0; - set_t setno; - mdc_unit_t *mdc; - - ASSERT(iolockp); - if (mrp == NULL) - return (EINVAL); - - setno = MD_MIN2SET(mrp->from.mnum); - if (setno >= md_nsets) { - return (EINVAL); - } - - /* - * Early exit if top is eof trans - */ - mdc = (mdc_unit_t *)md_set[setno].s_un[MD_MIN2UNIT(mrp->from.mnum)]; - while (mdc != NULL) { - if (!MD_HAS_PARENT(mdc->un_parent)) { - break; - } else { - mdc = (mdc_unit_t *)md_set[setno].s_un[MD_MIN2UNIT - (mdc->un_parent)]; - } - } - - if (mdc && mdc->un_type == MD_METATRANS) { - return (EINVAL); - } - - - mdclrerror(&mrp->mde); - - bzero((void *) &rtxn, sizeof (md_rentxn_t)); - mdclrerror(&rtxn.mde); - - /* - * encapsulate user parameters - */ - rtxn.from.key = mrp->from.key; - rtxn.to.key = mrp->to.key; - rtxn.from.mnum = mrp->from.mnum; - rtxn.to.mnum = mrp->to.mnum; - rtxn.op = mrp->op; - rtxn.uflags = mrp->flags; - rtxn.revision = mrp->revision; - - if (MD_MIN2UNIT(mrp->to.mnum) >= md_nunits) { - err = EINVAL; - goto cleanup; - } - - /* - * catch this early, before taking any locks - */ - if (md_get_setstatus(setno) & MD_SET_STALE) { - (void) (mdmddberror(&rtxn.mde, MDE_DB_STALE, rtxn.from.mnum, - MD_MIN2SET(rtxn.from.mnum))); - err = EROFS; - goto cleanup; - } - - /* - * Locking and re-validation (of the per-unit state) is - * done by the rename lock/unlock service, for now only take - * the array lock. - */ - md_array_writer(iolockp); - - /* - * validate the rename/exchange parameters - * rtxn is filled in on succesful completion of validate_txn_parms() - */ - if ((err = validate_txn_parms(&rtxn)) != 0) { - goto cleanup; - } - - /* - * build list of work to do, the "delta tree" for related devices - */ - if (!(family = build_dtree(&rtxn))) { - err = ENOMEM; - goto cleanup; - } - dump_txn(&rtxn); - dump_dtree(family); - - if ((err = lock_dtree(family, &rtxn)) != 0) { - goto cleanup; - } - - if ((err = check_dtree(family, &rtxn)) != 0) { - goto cleanup; - } - dump_txn(&rtxn); - - role_swap_dtree(family, &rtxn); /* commits the recids */ - - /* - * let folks know - */ - SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_RENAME_SRC, SVM_TAG_METADEVICE, - MD_MIN2SET(rtxn.from.mnum), rtxn.from.mnum); - SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_RENAME_DST, SVM_TAG_METADEVICE, - MD_MIN2SET(rtxn.from.mnum), rtxn.from.mnum); - -cleanup: - - if (err != 0 && mdisok(&rtxn.mde)) { - (void) mdsyserror(&rtxn.mde, EINVAL); - } - - if (family) { - unlock_dtree(family, &rtxn); - free_dtree(family); - dump_dtree(family); - family = NULL; - } - - if (rtxn.recids && (rtxn.n_recids > 0)) { - kmem_free(rtxn.recids, sizeof (mddb_recid_t) * rtxn.n_recids); - } - - if (!mdisok(&rtxn.mde)) { - (void) mdstealerror(&mrp->mde, &rtxn.mde); - } - - return (0); /* success/failure will be communicated via rtxn.mde */ -} - -static role_change_tab_t -role_swap_tab[] = -{ - { - 1, /* ordinal */ - MDRR_PARENT, /* old role */ - MDRR_PARENT, /* new role */ - MDRNM_UPDATE_KIDS, /* named service */ - NO_DEFAULT_ROLESWAP_SVC /* default role swap function */ - }, - { - 2, - MDRR_PARENT, - MDRR_SELF, - MDRNM_PARENT_UPDATE_TO, - NO_DEFAULT_ROLESWAP_SVC - }, - { - 3, - MDRR_PARENT, - MDRR_CHILD, - ILLEGAL_SVC_NAME, - ILLEGAL_ROLESWAP_SVC - }, - { - 4, - MDRR_SELF, - MDRR_PARENT, - MDRNM_SELF_UPDATE_FROM_UP, - md_exchange_self_update_from_up - }, - { - 5, - MDRR_SELF, - MDRR_SELF, - MDRNM_UPDATE_SELF, - md_rename_update_self - }, - { - 6, - MDRR_SELF, - MDRR_CHILD, - MDRNM_SELF_UPDATE_FROM_DOWN, - NO_DEFAULT_ROLESWAP_SVC - }, - { - 7, - MDRR_CHILD, - MDRR_PARENT, - ILLEGAL_SVC_NAME, - ILLEGAL_ROLESWAP_SVC - }, - { - 8, - MDRR_CHILD, - MDRR_SELF, - MDRNM_CHILD_UPDATE_TO, - md_exchange_child_update_to - }, - { - 9, - MDRR_CHILD, - MDRR_CHILD, - MDRNM_UPDATE_FOLKS, - md_renexch_update_parent - }, - - /* terminator is old_role == MDRR_UNK */ - { - 0, - MDRR_UNK, - MDRR_UNK, - ILLEGAL_SVC_NAME, - NO_DEFAULT_ROLESWAP_SVC - } -}; diff --git a/usr/src/uts/common/io/lvm/md/md_subr.c b/usr/src/uts/common/io/lvm/md/md_subr.c deleted file mode 100644 index 921c552d184d..000000000000 --- a/usr/src/uts/common/io/lvm/md/md_subr.c +++ /dev/null @@ -1,4345 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* - * Driver for Virtual Disk. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include - -#include -#include - -#include -#include - - -/* - * Machine specific Hertz is kept here - */ -extern clock_t md_hz; - -/* - * Externs. - */ -extern int (*mdv_strategy_tstpnt)(buf_t *, int, void*); -extern major_t md_major; -extern unit_t md_nunits; -extern set_t md_nsets; -extern md_set_t md_set[]; -extern md_set_io_t md_set_io[]; -extern md_ops_t **md_ops; -extern md_ops_t *md_opslist; -extern ddi_modhandle_t *md_mods; -extern dev_info_t *md_devinfo; - -extern md_krwlock_t md_unit_array_rw; -extern kmutex_t md_mx; -extern kcondvar_t md_cv; - -extern md_krwlock_t hsp_rwlp; -extern md_krwlock_t ni_rwlp; - -extern int md_num_daemons; -extern int md_status; -extern int md_ioctl_cnt; -extern int md_mtioctl_cnt; - -extern struct metatransops metatransops; -extern md_event_queue_t *md_event_queue; -extern md_resync_t md_cpr_resync; -extern int md_done_daemon_threads; -extern int md_ff_daemon_threads; - - -extern mddb_set_t *mddb_setenter(set_t setno, int flag, int *errorcodep); -extern void mddb_setexit(mddb_set_t *s); -extern void *lookup_entry(struct nm_next_hdr *, set_t, - side_t, mdkey_t, md_dev64_t, int); -extern struct nm_next_hdr *get_first_record(set_t, int, int); -extern dev_t getrootdev(void); - -struct mdq_anchor md_done_daemon; /* done request queue */ -struct mdq_anchor md_mstr_daemon; /* mirror error, WOW requests */ -struct mdq_anchor md_mhs_daemon; /* mirror hotspare requests queue */ -struct mdq_anchor md_hs_daemon; /* raid hotspare requests queue */ -struct mdq_anchor md_ff_daemonq; /* failfast request queue */ -struct mdq_anchor md_mirror_daemon; /* mirror owner queue */ -struct mdq_anchor md_mirror_io_daemon; /* mirror owner i/o queue */ -struct mdq_anchor md_mirror_rs_daemon; /* mirror resync done queue */ -struct mdq_anchor md_sp_daemon; /* soft-part error daemon queue */ -struct mdq_anchor md_mto_daemon; /* mirror timeout daemon queue */ - -int md_done_daemon_threads = 1; /* threads for md_done_daemon requestq */ -int md_mstr_daemon_threads = 1; /* threads for md_mstr_daemon requestq */ -int md_mhs_daemon_threads = 1; /* threads for md_mhs_daemon requestq */ -int md_hs_daemon_threads = 1; /* threads for md_hs_daemon requestq */ -int md_ff_daemon_threads = 3; /* threads for md_ff_daemon requestq */ -int md_mirror_daemon_threads = 1; /* threads for md_mirror_daemon requestq */ -int md_sp_daemon_threads = 1; /* threads for md_sp_daemon requestq */ -int md_mto_daemon_threads = 1; /* threads for md_mto_daemon requestq */ - -#ifdef DEBUG -/* Flag to switch on debug messages */ -int md_release_reacquire_debug = 0; /* debug flag */ -#endif - -/* - * - * The md_request_queues is table of pointers to request queues and the number - * of threads associated with the request queues. - * When the number of threads is set to 1, then the order of execution is - * sequential. - * The number of threads for all the queues have been defined as global - * variables to enable kernel tuning. - * - */ - -#define MD_DAEMON_QUEUES 11 - -md_requestq_entry_t md_daemon_queues[MD_DAEMON_QUEUES] = { - {&md_done_daemon, &md_done_daemon_threads}, - {&md_mstr_daemon, &md_mstr_daemon_threads}, - {&md_hs_daemon, &md_hs_daemon_threads}, - {&md_ff_daemonq, &md_ff_daemon_threads}, - {&md_mirror_daemon, &md_mirror_daemon_threads}, - {&md_mirror_io_daemon, &md_mirror_daemon_threads}, - {&md_mirror_rs_daemon, &md_mirror_daemon_threads}, - {&md_sp_daemon, &md_sp_daemon_threads}, - {&md_mhs_daemon, &md_mhs_daemon_threads}, - {&md_mto_daemon, &md_mto_daemon_threads}, - {0, 0} -}; - -/* - * Number of times a message is retried before issuing a warning to the operator - */ -#define MD_MN_WARN_INTVL 10 - -/* - * Setting retry cnt to one (pre decremented) so that we actually do no - * retries when committing/deleting a mddb rec. The underlying disk driver - * does several retries to check if the disk is really dead or not so there - * is no reason for us to retry on top of the drivers retries. - */ - -uint_t md_retry_cnt = 1; /* global so it can be patched */ - -/* - * How many times to try to do the door_ki_upcall() in mdmn_ksend_message. - * Again, made patchable here should it prove useful. - */ -uint_t md_send_retry_limit = 30; - -/* - * Bug # 1212146 - * Before this change the user had to pass in a short aligned buffer because of - * problems in some underlying device drivers. This problem seems to have been - * corrected in the underlying drivers so we will default to not requiring any - * alignment. If the user needs to check for a specific alignment, - * md_uio_alignment_mask may be set in /etc/system to accomplish this. To get - * the behavior before this fix, the md_uio_alignment_mask would be set to 1, - * to check for word alignment, it can be set to 3, for double word alignment, - * it can be set to 7, etc. - * - * [Other part of fix is in function md_chk_uio()] - */ -static int md_uio_alignment_mask = 0; - -/* - * for md_dev64_t translation - */ -struct md_xlate_table *md_tuple_table; -struct md_xlate_major_table *md_major_tuple_table; -int md_tuple_length; -uint_t md_majortab_len; - -/* Function declarations */ - -static int md_create_probe_rqlist(md_probedev_impl_t *plist, - daemon_queue_t **hdr, intptr_t (*probe_test)()); - -/* - * manipulate global status - */ -void -md_set_status(int bits) -{ - mutex_enter(&md_mx); - md_status |= bits; - mutex_exit(&md_mx); -} - -void -md_clr_status(int bits) -{ - mutex_enter(&md_mx); - md_status &= ~bits; - mutex_exit(&md_mx); -} - -int -md_get_status() -{ - int result; - mutex_enter(&md_mx); - result = md_status; - mutex_exit(&md_mx); - return (result); -} - -void -md_set_setstatus(set_t setno, int bits) -{ - ASSERT(setno != MD_SET_BAD && setno < MD_MAXSETS); - - mutex_enter(&md_mx); - md_set[setno].s_status |= bits; - mutex_exit(&md_mx); -} - -void -md_clr_setstatus(set_t setno, int bits) -{ - ASSERT(setno != MD_SET_BAD && setno < MD_MAXSETS); - - mutex_enter(&md_mx); - md_set[setno].s_status &= ~bits; - mutex_exit(&md_mx); -} - -uint_t -md_get_setstatus(set_t setno) -{ - uint_t result; - - ASSERT(setno != MD_SET_BAD && setno < MD_MAXSETS); - - mutex_enter(&md_mx); - result = md_set[setno].s_status; - mutex_exit(&md_mx); - return (result); -} - -/* - * md_unit_readerlock_common: - * ------------------------- - * Mark the given unit as having a reader reference. Spin waiting for any - * writer references to be released. - * - * Input: - * ui unit reference - * lock_held 0 => ui_mx needs to be grabbed - * 1 => ui_mx already held - * Output: - * mm_unit_t corresponding to unit structure - * ui->ui_readercnt incremented - */ -static void * -md_unit_readerlock_common(mdi_unit_t *ui, int lock_held) -{ - uint_t flag = MD_UL_WRITER | MD_UL_WANABEWRITER; - - if (!lock_held) - mutex_enter(&ui->ui_mx); - while (ui->ui_lock & flag) { - if (panicstr) { - if (ui->ui_lock & MD_UL_WRITER) - panic("md: writer lock is held"); - break; - } - cv_wait(&ui->ui_cv, &ui->ui_mx); - } - ui->ui_readercnt++; - if (!lock_held) - mutex_exit(&ui->ui_mx); - return (MD_UNIT(ui->ui_link.ln_id)); -} - -void * -md_unit_readerlock(mdi_unit_t *ui) -{ - return (md_unit_readerlock_common(ui, 0)); -} - -/* - * md_unit_writerlock_common: - * ------------------------- - * Acquire a unique writer reference. Causes previous readers to drain. - * Spins if a writer reference already exists or if a previous reader/writer - * dropped the lock to allow a ksend_message to be despatched. - * - * Input: - * ui unit reference - * lock_held 0 => grab ui_mx - * 1 => ui_mx already held on entry - * Output: - * mm_unit_t reference - */ -static void * -md_unit_writerlock_common(mdi_unit_t *ui, int lock_held) -{ - uint_t flag = MD_UL_WRITER; - - if (panicstr) - panic("md: writer lock not allowed"); - - if (!lock_held) - mutex_enter(&ui->ui_mx); - - while ((ui->ui_lock & flag) || (ui->ui_readercnt != 0)) { - ui->ui_wanabecnt++; - ui->ui_lock |= MD_UL_WANABEWRITER; - cv_wait(&ui->ui_cv, &ui->ui_mx); - if (--ui->ui_wanabecnt == 0) - ui->ui_lock &= ~MD_UL_WANABEWRITER; - } - ui->ui_lock |= MD_UL_WRITER; - ui->ui_owner = curthread; - - if (!lock_held) - mutex_exit(&ui->ui_mx); - return (MD_UNIT(ui->ui_link.ln_id)); -} - -void * -md_unit_writerlock(mdi_unit_t *ui) -{ - return (md_unit_writerlock_common(ui, 0)); -} - -/* - * md_unit_readerexit_common: - * ------------------------- - * Release the readerlock for the specified unit. If the reader count reaches - * zero and there are waiting writers (MD_UL_WANABEWRITER set) wake them up. - * - * Input: - * ui unit reference - * lock_held 0 => ui_mx needs to be acquired - * 1 => ui_mx already held - */ -static void -md_unit_readerexit_common(mdi_unit_t *ui, int lock_held) -{ - if (!lock_held) - mutex_enter(&ui->ui_mx); - ASSERT((ui->ui_lock & MD_UL_WRITER) == 0); - ASSERT(ui->ui_readercnt != 0); - ui->ui_readercnt--; - if ((ui->ui_wanabecnt != 0) && (ui->ui_readercnt == 0)) - cv_broadcast(&ui->ui_cv); - - if (!lock_held) - mutex_exit(&ui->ui_mx); -} - -void -md_unit_readerexit(mdi_unit_t *ui) -{ - md_unit_readerexit_common(ui, 0); -} - -/* - * md_unit_writerexit_common: - * ------------------------- - * Release the writerlock currently held on the unit. Wake any threads waiting - * on becoming reader or writer (MD_UL_WANABEWRITER set). - * - * Input: - * ui unit reference - * lock_held 0 => ui_mx to be acquired - * 1 => ui_mx already held - */ -static void -md_unit_writerexit_common(mdi_unit_t *ui, int lock_held) -{ - if (!lock_held) - mutex_enter(&ui->ui_mx); - ASSERT((ui->ui_lock & MD_UL_WRITER) != 0); - ASSERT(ui->ui_readercnt == 0); - ui->ui_lock &= ~MD_UL_WRITER; - ui->ui_owner = NULL; - - cv_broadcast(&ui->ui_cv); - if (!lock_held) - mutex_exit(&ui->ui_mx); -} - -void -md_unit_writerexit(mdi_unit_t *ui) -{ - md_unit_writerexit_common(ui, 0); -} - -void * -md_io_readerlock(mdi_unit_t *ui) -{ - md_io_lock_t *io = ui->ui_io_lock; - - ASSERT(io); /* checks case where no io lock allocated */ - mutex_enter(&io->io_mx); - while (io->io_lock & (MD_UL_WRITER | MD_UL_WANABEWRITER)) { - if (panicstr) { - if (io->io_lock & MD_UL_WRITER) - panic("md: writer lock is held"); - break; - } - cv_wait(&io->io_cv, &io->io_mx); - } - io->io_readercnt++; - mutex_exit(&io->io_mx); - return (MD_UNIT(ui->ui_link.ln_id)); -} - -void * -md_io_writerlock(mdi_unit_t *ui) -{ - md_io_lock_t *io = ui->ui_io_lock; - - ASSERT(io); /* checks case where no io lock allocated */ - if (panicstr) - panic("md: writer lock not allowed"); - - mutex_enter(&io->io_mx); - while ((io->io_lock & MD_UL_WRITER) || (io->io_readercnt != 0)) { - io->io_wanabecnt++; - io->io_lock |= MD_UL_WANABEWRITER; - cv_wait(&io->io_cv, &io->io_mx); - if (--io->io_wanabecnt == 0) - io->io_lock &= ~MD_UL_WANABEWRITER; - } - io->io_lock |= MD_UL_WRITER; - io->io_owner = curthread; - - mutex_exit(&io->io_mx); - return (MD_UNIT(ui->ui_link.ln_id)); -} - -void -md_io_readerexit(mdi_unit_t *ui) -{ - md_io_lock_t *io = ui->ui_io_lock; - - mutex_enter(&io->io_mx); - ASSERT((io->io_lock & MD_UL_WRITER) == 0); - ASSERT(io->io_readercnt != 0); - io->io_readercnt--; - if ((io->io_wanabecnt != 0) && (io->io_readercnt == 0)) { - cv_broadcast(&io->io_cv); - } - mutex_exit(&io->io_mx); -} - -void -md_io_writerexit(mdi_unit_t *ui) -{ - md_io_lock_t *io = ui->ui_io_lock; - - mutex_enter(&io->io_mx); - ASSERT((io->io_lock & MD_UL_WRITER) != 0); - ASSERT(io->io_readercnt == 0); - io->io_lock &= ~MD_UL_WRITER; - io->io_owner = NULL; - - cv_broadcast(&io->io_cv); - mutex_exit(&io->io_mx); -} - -/* - * Attempt to grab that set of locks defined as global. - * A mask containing the set of global locks that are owned upon - * entry is input. Any additional global locks are then grabbed. - * This keeps the caller from having to know the set of global - * locks. - */ -static int -md_global_lock_enter(int global_locks_owned_mask) -{ - - /* - * The current implementation has been verified by inspection - * and test to be deadlock free. If another global lock is - * added, changing the algorithm used by this function should - * be considered. With more than 2 locks it is difficult to - * guarantee that locks are being acquired in the correct order. - * The safe approach would be to drop all of the locks that are - * owned at function entry and then reacquire all of the locks - * in the order defined by the lock hierarchy. - */ - mutex_enter(&md_mx); - if (!(global_locks_owned_mask & MD_GBL_IOCTL_LOCK)) { - while ((md_mtioctl_cnt != 0) || - (md_status & MD_GBL_IOCTL_LOCK)) { - if (cv_wait_sig_swap(&md_cv, &md_mx) == 0) { - mutex_exit(&md_mx); - return (EINTR); - } - } - md_status |= MD_GBL_IOCTL_LOCK; - md_ioctl_cnt++; - } - if (!(global_locks_owned_mask & MD_GBL_HS_LOCK)) { - while (md_status & MD_GBL_HS_LOCK) { - if (cv_wait_sig_swap(&md_cv, &md_mx) == 0) { - md_status &= ~MD_GBL_IOCTL_LOCK; - mutex_exit(&md_mx); - return (EINTR); - } - } - md_status |= MD_GBL_HS_LOCK; - } - mutex_exit(&md_mx); - return (0); -} - -/* - * Release the set of global locks that were grabbed in md_global_lock_enter - * that were not already owned by the calling thread. The set of previously - * owned global locks is passed in as a mask parameter. - */ -static int -md_global_lock_exit(int global_locks_owned_mask, int code, - int flags, mdi_unit_t *ui) -{ - mutex_enter(&md_mx); - - /* If MT ioctl decrement mt_ioctl_cnt */ - if ((flags & MD_MT_IOCTL)) { - md_mtioctl_cnt--; - } else { - if (!(global_locks_owned_mask & MD_GBL_IOCTL_LOCK)) { - /* clear the lock and decrement count */ - ASSERT(md_ioctl_cnt == 1); - md_ioctl_cnt--; - md_status &= ~MD_GBL_IOCTL_LOCK; - } - if (!(global_locks_owned_mask & MD_GBL_HS_LOCK)) - md_status &= ~MD_GBL_HS_LOCK; - } - if (flags & MD_READER_HELD) - md_unit_readerexit(ui); - if (flags & MD_WRITER_HELD) - md_unit_writerexit(ui); - if (flags & MD_IO_HELD) - md_io_writerexit(ui); - if (flags & (MD_ARRAY_WRITER | MD_ARRAY_READER)) { - rw_exit(&md_unit_array_rw.lock); - } - cv_broadcast(&md_cv); - mutex_exit(&md_mx); - - return (code); -} - -/* - * The two functions, md_ioctl_lock_enter, and md_ioctl_lock_exit make - * use of the md_global_lock_{enter|exit} functions to avoid duplication - * of code. They rely upon the fact that the locks that are specified in - * the input mask are not acquired or freed. If this algorithm changes - * as described in the block comment at the beginning of md_global_lock_enter - * then it will be necessary to change these 2 functions. Otherwise these - * functions will be grabbing and holding global locks unnecessarily. - */ -int -md_ioctl_lock_enter(void) -{ - /* grab only the ioctl lock */ - return (md_global_lock_enter(~MD_GBL_IOCTL_LOCK)); -} - -/* - * If md_ioctl_lock_exit is being called at the end of an ioctl before - * returning to user space, then ioctl_end is set to 1. - * Otherwise, the ioctl lock is being dropped in the middle of handling - * an ioctl and will be reacquired before the end of the ioctl. - * Do not attempt to process the MN diskset mddb parse flags unless - * ioctl_end is true - otherwise a deadlock situation could arise. - */ -int -md_ioctl_lock_exit(int code, int flags, mdi_unit_t *ui, int ioctl_end) -{ - int ret_val; - uint_t status; - mddb_set_t *s; - int i; - int err; - md_mn_msg_mddb_parse_t *mddb_parse_msg; - md_mn_kresult_t *kresult; - mddb_lb_t *lbp; - int rval = 1; - int flag; - - /* release only the ioctl lock */ - ret_val = md_global_lock_exit(~MD_GBL_IOCTL_LOCK, code, flags, ui); - - /* - * If md_ioctl_lock_exit is being called with a possible lock held - * (ioctl_end is 0), then don't check the MN disksets since the - * call to mddb_setenter may cause a lock ordering deadlock. - */ - if (!ioctl_end) - return (ret_val); - - /* - * Walk through disksets to see if there is a MN diskset that - * has messages that need to be sent. Set must be snarfed and - * be a MN diskset in order to be checked. - * - * In a MN diskset, this routine may send messages to the - * rpc.mdcommd in order to have the slave nodes re-parse parts - * of the mddb. Messages can only be sent with no locks held, - * so if mddb change occurred while the ioctl lock is held, this - * routine must send the messages. - */ - for (i = 1; i < md_nsets; i++) { - status = md_get_setstatus(i); - - /* Set must be snarfed and be a MN diskset */ - if ((status & (MD_SET_SNARFED | MD_SET_MNSET)) != - (MD_SET_SNARFED | MD_SET_MNSET)) - continue; - - /* Grab set lock so that set can't change */ - if ((s = mddb_setenter(i, MDDB_MUSTEXIST, &err)) == NULL) - continue; - - lbp = s->s_lbp; - - /* Re-get set status now that lock is held */ - status = md_get_setstatus(i); - - /* - * If MN parsing block flag is set - continue to next set. - * - * If s_mn_parseflags_sending is non-zero, then another thread - * is already currently sending a parse message, so just - * release the set mutex. If this ioctl had caused an mddb - * change that results in a parse message to be generated, - * the thread that is currently sending a parse message would - * generate the additional parse message. - * - * If s_mn_parseflags_sending is zero then loop until - * s_mn_parseflags is 0 (until there are no more - * messages to send). - * While s_mn_parseflags is non-zero, - * put snapshot of parse_flags in s_mn_parseflags_sending - * set s_mn_parseflags to zero - * release set mutex - * send message - * re-grab set mutex - * set s_mn_parseflags_sending to zero - * - * If set is STALE, send message with NO_LOG flag so that - * rpc.mdcommd won't attempt to log message to non-writeable - * replica. - */ - mddb_parse_msg = kmem_zalloc(sizeof (md_mn_msg_mddb_parse_t), - KM_SLEEP); - while (((s->s_mn_parseflags_sending & MDDB_PARSE_MASK) == 0) && - (s->s_mn_parseflags & MDDB_PARSE_MASK) && - (!(status & MD_SET_MNPARSE_BLK))) { - - /* Grab snapshot of parse flags */ - s->s_mn_parseflags_sending = s->s_mn_parseflags; - s->s_mn_parseflags = 0; - - mutex_exit(&md_set[(s)->s_setno].s_dbmx); - - /* - * Send the message to the slaves to re-parse - * the indicated portions of the mddb. Send the status - * of the 50 mddbs in this set so that slaves know - * which mddbs that the master node thinks are 'good'. - * Otherwise, slave may reparse, but from wrong - * replica. - */ - mddb_parse_msg->msg_parse_flags = - s->s_mn_parseflags_sending; - - for (i = 0; i < MDDB_NLB; i++) { - mddb_parse_msg->msg_lb_flags[i] = - lbp->lb_locators[i].l_flags; - } - kresult = kmem_alloc(sizeof (md_mn_kresult_t), - KM_SLEEP); - while (rval != 0) { - flag = 0; - if (status & MD_SET_STALE) - flag |= MD_MSGF_NO_LOG; - rval = mdmn_ksend_message(s->s_setno, - MD_MN_MSG_MDDB_PARSE, flag, 0, - (char *)mddb_parse_msg, - sizeof (md_mn_msg_mddb_parse_t), kresult); - /* if the node hasn't yet joined, it's Ok. */ - if ((!MDMN_KSEND_MSG_OK(rval, kresult)) && - (kresult->kmmr_comm_state != - MDMNE_NOT_JOINED)) { - mdmn_ksend_show_error(rval, kresult, - "MD_MN_MSG_MDDB_PARSE"); - cmn_err(CE_WARN, "md_ioctl_lock_exit: " - "Unable to send mddb update " - "message to other nodes in " - "diskset %s\n", s->s_setname); - rval = 1; - } - } - kmem_free(kresult, sizeof (md_mn_kresult_t)); - - /* - * Re-grab mutex to clear sending field and to - * see if another parse message needs to be generated. - */ - mutex_enter(&md_set[(s)->s_setno].s_dbmx); - s->s_mn_parseflags_sending = 0; - } - kmem_free(mddb_parse_msg, sizeof (md_mn_msg_mddb_parse_t)); - mutex_exit(&md_set[(s)->s_setno].s_dbmx); - } - return (ret_val); -} - -/* - * Called when in an ioctl and need readerlock. - */ -void * -md_ioctl_readerlock(IOLOCK *lock, mdi_unit_t *ui) -{ - ASSERT(lock != NULL); - lock->l_ui = ui; - lock->l_flags |= MD_READER_HELD; - return (md_unit_readerlock_common(ui, 0)); -} - -/* - * Called when in an ioctl and need writerlock. - */ -void * -md_ioctl_writerlock(IOLOCK *lock, mdi_unit_t *ui) -{ - ASSERT(lock != NULL); - lock->l_ui = ui; - lock->l_flags |= MD_WRITER_HELD; - return (md_unit_writerlock_common(ui, 0)); -} - -void * -md_ioctl_io_lock(IOLOCK *lock, mdi_unit_t *ui) -{ - ASSERT(lock != NULL); - lock->l_ui = ui; - lock->l_flags |= MD_IO_HELD; - return (md_io_writerlock(ui)); -} - -void -md_ioctl_readerexit(IOLOCK *lock) -{ - ASSERT(lock != NULL); - lock->l_flags &= ~MD_READER_HELD; - md_unit_readerexit(lock->l_ui); -} - -void -md_ioctl_writerexit(IOLOCK *lock) -{ - ASSERT(lock != NULL); - lock->l_flags &= ~MD_WRITER_HELD; - md_unit_writerexit(lock->l_ui); -} - -void -md_ioctl_io_exit(IOLOCK *lock) -{ - ASSERT(lock != NULL); - lock->l_flags &= ~MD_IO_HELD; - md_io_writerexit(lock->l_ui); -} - -/* - * md_ioctl_releaselocks: - * -------------------- - * Release the unit locks that are held and stop subsequent - * md_unit_reader/writerlock calls from progressing. This allows the caller - * to send messages across the cluster when running in a multinode - * environment. - * ioctl originated locks (via md_ioctl_readerlock/md_ioctl_writerlock) are - * allowed to progress as normal. This is required as these typically are - * invoked by the message handler that may be called while a unit lock is - * marked as released. - * - * On entry: - * variety of unit locks may be held including ioctl lock - * - * On exit: - * locks released and unit structure updated to prevent subsequent reader/ - * writer locks being acquired until md_ioctl_reacquirelocks is called - */ -void -md_ioctl_releaselocks(int code, int flags, mdi_unit_t *ui) -{ - /* This actually releases the locks. */ - (void) md_global_lock_exit(~MD_GBL_IOCTL_LOCK, code, flags, ui); -} - -/* - * md_ioctl_reacquirelocks: - * ---------------------- - * Reacquire the locks that were held when md_ioctl_releaselocks - * was called. - * - * On entry: - * No unit locks held - * On exit: - * locks held that were held at md_ioctl_releaselocks time including - * the ioctl lock. - */ -void -md_ioctl_reacquirelocks(int flags, mdi_unit_t *ui) -{ - if (flags & MD_MT_IOCTL) { - mutex_enter(&md_mx); - md_mtioctl_cnt++; - mutex_exit(&md_mx); - } else { - while (md_ioctl_lock_enter() == EINTR) - ; - } - if (flags & MD_ARRAY_WRITER) { - rw_enter(&md_unit_array_rw.lock, RW_WRITER); - } else if (flags & MD_ARRAY_READER) { - rw_enter(&md_unit_array_rw.lock, RW_READER); - } - if (ui != (mdi_unit_t *)NULL) { - if (flags & MD_IO_HELD) { - (void) md_io_writerlock(ui); - } - - mutex_enter(&ui->ui_mx); - if (flags & MD_READER_HELD) { - (void) md_unit_readerlock_common(ui, 1); - } else if (flags & MD_WRITER_HELD) { - (void) md_unit_writerlock_common(ui, 1); - } - /* Wake up any blocked readerlock() calls */ - cv_broadcast(&ui->ui_cv); - mutex_exit(&ui->ui_mx); - } -} - -void -md_ioctl_droplocks(IOLOCK *lock) -{ - mdi_unit_t *ui; - int flags; - - ASSERT(lock != NULL); - ui = lock->l_ui; - flags = lock->l_flags; - if (flags & MD_READER_HELD) { - lock->l_flags &= ~MD_READER_HELD; - md_unit_readerexit(ui); - } - if (flags & MD_WRITER_HELD) { - lock->l_flags &= ~MD_WRITER_HELD; - md_unit_writerexit(ui); - } - if (flags & MD_IO_HELD) { - lock->l_flags &= ~MD_IO_HELD; - md_io_writerexit(ui); - } - if (flags & (MD_ARRAY_WRITER | MD_ARRAY_READER)) { - lock->l_flags &= ~(MD_ARRAY_WRITER | MD_ARRAY_READER); - rw_exit(&md_unit_array_rw.lock); - } -} - -void -md_array_writer(IOLOCK *lock) -{ - ASSERT(lock != NULL); - lock->l_flags |= MD_ARRAY_WRITER; - rw_enter(&md_unit_array_rw.lock, RW_WRITER); -} - -void -md_array_reader(IOLOCK *lock) -{ - ASSERT(lock != NULL); - lock->l_flags |= MD_ARRAY_READER; - rw_enter(&md_unit_array_rw.lock, RW_READER); -} - -/* - * Called when in an ioctl and need opencloselock. - * Sets flags in lockp for READER_HELD. - */ -void * -md_ioctl_openclose_enter(IOLOCK *lockp, mdi_unit_t *ui) -{ - void *un; - - ASSERT(lockp != NULL); - mutex_enter(&ui->ui_mx); - while (ui->ui_lock & MD_UL_OPENORCLOSE) - cv_wait(&ui->ui_cv, &ui->ui_mx); - ui->ui_lock |= MD_UL_OPENORCLOSE; - - /* Maintain mutex across the readerlock call */ - lockp->l_ui = ui; - lockp->l_flags |= MD_READER_HELD; - un = md_unit_readerlock_common(ui, 1); - mutex_exit(&ui->ui_mx); - - return (un); -} - -/* - * Clears reader lock using md_ioctl instead of md_unit - * and updates lockp. - */ -void -md_ioctl_openclose_exit(IOLOCK *lockp) -{ - mdi_unit_t *ui; - - ASSERT(lockp != NULL); - ui = lockp->l_ui; - ASSERT(ui->ui_lock & MD_UL_OPENORCLOSE); - - md_ioctl_readerexit(lockp); - - mutex_enter(&ui->ui_mx); - ui->ui_lock &= ~MD_UL_OPENORCLOSE; - - cv_broadcast(&ui->ui_cv); - mutex_exit(&ui->ui_mx); -} - -/* - * Clears reader lock using md_ioctl instead of md_unit - * and updates lockp. - * Does not acquire or release the ui_mx lock since the calling - * routine has already acquired this lock. - */ -void -md_ioctl_openclose_exit_lh(IOLOCK *lockp) -{ - mdi_unit_t *ui; - - ASSERT(lockp != NULL); - ui = lockp->l_ui; - ASSERT(ui->ui_lock & MD_UL_OPENORCLOSE); - - lockp->l_flags &= ~MD_READER_HELD; - md_unit_readerexit_common(lockp->l_ui, 1); - - ui->ui_lock &= ~MD_UL_OPENORCLOSE; - cv_broadcast(&ui->ui_cv); -} - -void * -md_unit_openclose_enter(mdi_unit_t *ui) -{ - void *un; - - mutex_enter(&ui->ui_mx); - while (ui->ui_lock & (MD_UL_OPENORCLOSE)) - cv_wait(&ui->ui_cv, &ui->ui_mx); - ui->ui_lock |= MD_UL_OPENORCLOSE; - - /* Maintain mutex across the readerlock call */ - un = md_unit_readerlock_common(ui, 1); - mutex_exit(&ui->ui_mx); - - return (un); -} - -void -md_unit_openclose_exit(mdi_unit_t *ui) -{ - md_unit_readerexit(ui); - - mutex_enter(&ui->ui_mx); - ASSERT(ui->ui_lock & MD_UL_OPENORCLOSE); - ui->ui_lock &= ~MD_UL_OPENORCLOSE; - - cv_broadcast(&ui->ui_cv); - mutex_exit(&ui->ui_mx); -} - -/* - * Drop the openclose and readerlocks without acquiring or - * releasing the ui_mx lock since the calling routine has - * already acquired this lock. - */ -void -md_unit_openclose_exit_lh(mdi_unit_t *ui) -{ - md_unit_readerexit_common(ui, 1); - ASSERT(ui->ui_lock & MD_UL_OPENORCLOSE); - ui->ui_lock &= ~MD_UL_OPENORCLOSE; - cv_broadcast(&ui->ui_cv); -} - -int -md_unit_isopen( - mdi_unit_t *ui -) -{ - int isopen; - - /* check status */ - mutex_enter(&ui->ui_mx); - isopen = ((ui->ui_lock & MD_UL_OPEN) ? 1 : 0); - mutex_exit(&ui->ui_mx); - return (isopen); -} - -int -md_unit_incopen( - minor_t mnum, - int flag, - int otyp -) -{ - mdi_unit_t *ui = MDI_UNIT(mnum); - int err = 0; - - /* check type and flags */ - ASSERT(ui != NULL); - mutex_enter(&ui->ui_mx); - if ((otyp < 0) || (otyp >= OTYPCNT)) { - err = EINVAL; - goto out; - } - if (((flag & FEXCL) && (ui->ui_lock & MD_UL_OPEN)) || - (ui->ui_lock & MD_UL_EXCL)) { - err = EBUSY; - goto out; - } - - /* count and flag open */ - ui->ui_ocnt[otyp]++; - ui->ui_lock |= MD_UL_OPEN; - if (flag & FEXCL) - ui->ui_lock |= MD_UL_EXCL; - - /* setup kstat, return success */ - mutex_exit(&ui->ui_mx); - md_kstat_init(mnum); - return (0); - - /* return error */ -out: - mutex_exit(&ui->ui_mx); - return (err); -} - -int -md_unit_decopen( - minor_t mnum, - int otyp -) -{ - mdi_unit_t *ui = MDI_UNIT(mnum); - int err = 0; - unsigned i; - - /* check type and flags */ - ASSERT(ui != NULL); - mutex_enter(&ui->ui_mx); - if ((otyp < 0) || (otyp >= OTYPCNT)) { - err = EINVAL; - goto out; - } else if (ui->ui_ocnt[otyp] == 0) { - err = ENXIO; - goto out; - } - - /* count and flag closed */ - if (otyp == OTYP_LYR) - ui->ui_ocnt[otyp]--; - else - ui->ui_ocnt[otyp] = 0; - ui->ui_lock &= ~MD_UL_OPEN; - for (i = 0; (i < OTYPCNT); ++i) - if (ui->ui_ocnt[i] != 0) - ui->ui_lock |= MD_UL_OPEN; - if (! (ui->ui_lock & MD_UL_OPEN)) - ui->ui_lock &= ~MD_UL_EXCL; - - /* teardown kstat, return success */ - if (! (ui->ui_lock & MD_UL_OPEN)) { - - /* - * We have a race condition inherited from specfs between - * open() and close() calls. This results in the kstat - * for a pending I/O being torn down, and then a panic. - * To avoid this, only tear the kstat down if there are - * no other readers on this device. - */ - if (ui->ui_readercnt > 1) { - mutex_exit(&ui->ui_mx); - } else { - mutex_exit(&ui->ui_mx); - md_kstat_destroy(mnum); - } - return (0); - } - - /* return success */ -out: - mutex_exit(&ui->ui_mx); - return (err); -} - -md_dev64_t -md_xlate_targ_2_mini(md_dev64_t targ_devt) -{ - dev32_t mini_32_devt, targ_32_devt; - int i; - - /* - * check to see if we're in an upgrade situation - * if we are not in upgrade just return the input device - */ - - if (!MD_UPGRADE) - return (targ_devt); - - targ_32_devt = md_cmpldev(targ_devt); - - i = 0; - while (i != md_tuple_length) { - if (md_tuple_table[i].targ_devt == targ_32_devt) { - mini_32_devt = md_tuple_table[i].mini_devt; - return (md_expldev((md_dev64_t)mini_32_devt)); - } - i++; - } - return (NODEV64); -} - -md_dev64_t -md_xlate_mini_2_targ(md_dev64_t mini_devt) -{ - dev32_t mini_32_devt, targ_32_devt; - int i; - - if (!MD_UPGRADE) - return (mini_devt); - - mini_32_devt = md_cmpldev(mini_devt); - - i = 0; - while (i != md_tuple_length) { - if (md_tuple_table[i].mini_devt == mini_32_devt) { - targ_32_devt = md_tuple_table[i].targ_devt; - return (md_expldev((md_dev64_t)targ_32_devt)); - } - i++; - } - return (NODEV64); -} - -void -md_xlate_free(int size) -{ - kmem_free(md_tuple_table, size); -} - -char * -md_targ_major_to_name(major_t maj) -{ - char *drv_name = NULL; - int i; - - if (!MD_UPGRADE) - return (ddi_major_to_name(maj)); - - for (i = 0; i < md_majortab_len; i++) { - if (md_major_tuple_table[i].targ_maj == maj) { - drv_name = md_major_tuple_table[i].drv_name; - break; - } - } - return (drv_name); -} - -major_t -md_targ_name_to_major(char *drv_name) -{ - major_t maj; - int i; - - maj = md_getmajor(NODEV64); - if (!MD_UPGRADE) - return (ddi_name_to_major(drv_name)); - - for (i = 0; i < md_majortab_len; i++) { - if ((strcmp(md_major_tuple_table[i].drv_name, - drv_name)) == 0) { - maj = md_major_tuple_table[i].targ_maj; - break; - } - } - - return (maj); -} - -void -md_majortab_free() -{ - size_t sz; - int i; - - for (i = 0; i < md_majortab_len; i++) { - freestr(md_major_tuple_table[i].drv_name); - } - - sz = md_majortab_len * sizeof (struct md_xlate_major_table); - kmem_free(md_major_tuple_table, sz); -} - -/* functions return a pointer to a function which returns an int */ - -intptr_t (* -md_get_named_service(md_dev64_t dev, int modindex, char *name, - intptr_t (*Default)()))() -{ - mdi_unit_t *ui; - md_named_services_t *sp; - int i; - - /* - * Return the first named service found. - * Use this path when it is known that there is only - * one named service possible (e.g., hotspare interface) - */ - if ((dev == NODEV64) && (modindex == ANY_SERVICE)) { - for (i = 0; i < MD_NOPS; i++) { - if (md_ops[i] == NULL) { - continue; - } - sp = md_ops[i]->md_services; - if (sp == NULL) - continue; - while (sp->md_service != NULL) { - if (strcmp(name, sp->md_name) == 0) - return (sp->md_service); - sp++; - } - } - return (Default); - } - - /* - * Return the named service for the given modindex. - * This is used if there are multiple possible named services - * and each one needs to be called (e.g., poke hotspares) - */ - if (dev == NODEV64) { - if (modindex >= MD_NOPS) - return (Default); - - if (md_ops[modindex] == NULL) - return (Default); - - sp = md_ops[modindex]->md_services; - if (sp == NULL) - return (Default); - - while (sp->md_service != NULL) { - if (strcmp(name, sp->md_name) == 0) - return (sp->md_service); - sp++; - } - return (Default); - } - - /* - * Return the named service for this md_dev64_t - */ - if (md_getmajor(dev) != md_major) - return (Default); - - if ((MD_MIN2SET(md_getminor(dev)) >= md_nsets) || - (MD_MIN2UNIT(md_getminor(dev)) >= md_nunits)) - return (NULL); - - - if ((ui = MDI_UNIT(md_getminor(dev))) == NULL) - return (NULL); - - sp = md_ops[ui->ui_opsindex]->md_services; - if (sp == NULL) - return (Default); - while (sp->md_service != NULL) { - if (strcmp(name, sp->md_name) == 0) - return (sp->md_service); - sp++; - } - return (Default); -} - -/* - * md_daemon callback routine - */ -boolean_t -callb_md_cpr(void *arg, int code) -{ - callb_cpr_t *cp = (callb_cpr_t *)arg; - int ret = 0; /* assume success */ - clock_t delta; - - mutex_enter(cp->cc_lockp); - - switch (code) { - case CB_CODE_CPR_CHKPT: - /* - * Check for active resync threads - */ - mutex_enter(&md_cpr_resync.md_resync_mutex); - if ((md_cpr_resync.md_mirror_resync > 0) || - (md_cpr_resync.md_raid_resync > 0)) { - mutex_exit(&md_cpr_resync.md_resync_mutex); - cmn_err(CE_WARN, "There are Solaris Volume Manager " - "synchronization threads running."); - cmn_err(CE_WARN, "Please try system suspension at " - "a later time."); - ret = -1; - break; - } - mutex_exit(&md_cpr_resync.md_resync_mutex); - - cp->cc_events |= CALLB_CPR_START; - delta = CPR_KTHREAD_TIMEOUT_SEC * hz; - while (!(cp->cc_events & CALLB_CPR_SAFE)) - /* cv_reltimedwait() returns -1 if it times out. */ - if ((ret = cv_reltimedwait(&cp->cc_callb_cv, - cp->cc_lockp, delta, TR_CLOCK_TICK)) == -1) - break; - break; - - case CB_CODE_CPR_RESUME: - cp->cc_events &= ~CALLB_CPR_START; - cv_signal(&cp->cc_stop_cv); - break; - } - mutex_exit(cp->cc_lockp); - return (ret != -1); -} - -void -md_daemon(int pass_thru, mdq_anchor_t *anchor) -{ - daemon_queue_t *dq; - callb_cpr_t cprinfo; - - if (pass_thru && (md_get_status() & MD_GBL_DAEMONS_LIVE)) - return; - /* - * Register cpr callback - */ - CALLB_CPR_INIT(&cprinfo, &anchor->a_mx, callb_md_cpr, "md_daemon"); - - /*CONSTCOND*/ - while (1) { - mutex_enter(&anchor->a_mx); - while ((dq = anchor->dq.dq_next) == &(anchor->dq)) { - if (pass_thru) { - /* - * CALLB_CPR_EXIT Will do - * mutex_exit(&anchor->a_mx) - */ - CALLB_CPR_EXIT(&cprinfo); - return; - } - if (md_get_status() & MD_GBL_DAEMONS_DIE) { - mutex_exit(&anchor->a_mx); - mutex_enter(&md_mx); - md_num_daemons--; - mutex_exit(&md_mx); - /* - * CALLB_CPR_EXIT will do - * mutex_exit(&anchor->a_mx) - */ - mutex_enter(&anchor->a_mx); - CALLB_CPR_EXIT(&cprinfo); - thread_exit(); - } - CALLB_CPR_SAFE_BEGIN(&cprinfo); - cv_wait(&anchor->a_cv, &anchor->a_mx); - CALLB_CPR_SAFE_END(&cprinfo, &anchor->a_mx); - } - dq->dq_prev->dq_next = dq->dq_next; - dq->dq_next->dq_prev = dq->dq_prev; - dq->dq_prev = dq->dq_next = NULL; - anchor->dq.qlen--; - mutex_exit(&anchor->a_mx); - (*(dq->dq_call))(dq); - } - /*NOTREACHED*/ -} - -/* - * daemon_request: - * - * Adds requests to appropriate requestq which is - * anchored by *anchor. - * The request is the first element of a doubly linked circular list. - * When the request is a single element, the forward and backward - * pointers MUST point to the element itself. - */ - -void -daemon_request(mdq_anchor_t *anchor, void (*func)(), - daemon_queue_t *request, callstyle_t style) -{ - daemon_queue_t *rqtp; - int i = 0; - - rqtp = request; - if (style == REQ_OLD) { - ASSERT((rqtp->dq_next == NULL) && (rqtp->dq_prev == NULL)); - /* set it to the new style */ - rqtp->dq_prev = rqtp->dq_next = rqtp; - } - ASSERT((rqtp->dq_next != NULL) && (rqtp->dq_prev != NULL)); - - /* scan the list and add the function to each element */ - - do { - rqtp->dq_call = func; - i++; - rqtp = rqtp->dq_next; - } while (rqtp != request); - - /* save pointer to tail of the request list */ - rqtp = request->dq_prev; - - mutex_enter(&anchor->a_mx); - /* stats */ - anchor->dq.qlen += i; - anchor->dq.treqs += i; - anchor->dq.maxq_len = (anchor->dq.qlen > anchor->dq.maxq_len) ? - anchor->dq.qlen : anchor->dq.maxq_len; - - /* now add the list to request queue */ - request->dq_prev = anchor->dq.dq_prev; - rqtp->dq_next = &anchor->dq; - anchor->dq.dq_prev->dq_next = request; - anchor->dq.dq_prev = rqtp; - cv_broadcast(&anchor->a_cv); - mutex_exit(&anchor->a_mx); -} - -void -mddb_commitrec_wrapper(mddb_recid_t recid) -{ - int sent_log = 0; - uint_t retry = md_retry_cnt; - set_t setno; - - while (mddb_commitrec(recid)) { - if (! sent_log) { - cmn_err(CE_WARN, - "md: state database commit failed"); - sent_log = 1; - } - delay(md_hz); - - /* - * Setting retry cnt to one (pre decremented) so that we - * actually do no retries when committing/deleting a mddb rec. - * The underlying disk driver does several retries to check - * if the disk is really dead or not so there - * is no reason for us to retry on top of the drivers retries. - */ - - if (--retry == 0) { - setno = mddb_getsetnum(recid); - if (md_get_setstatus(setno) & MD_SET_TOOFEW) { - panic( - "md: Panic due to lack of DiskSuite state\n" - " database replicas. Fewer than 50%% of " - "the total were available,\n so panic to " - "ensure data integrity."); - } else { - panic("md: state database problem"); - } - /*NOTREACHED*/ - } - } -} - -void -mddb_commitrecs_wrapper(mddb_recid_t *recids) -{ - int sent_log = 0; - uint_t retry = md_retry_cnt; - set_t setno; - - while (mddb_commitrecs(recids)) { - if (! sent_log) { - cmn_err(CE_WARN, - "md: state database commit failed"); - sent_log = 1; - } - delay(md_hz); - - /* - * Setting retry cnt to one (pre decremented) so that we - * actually do no retries when committing/deleting a mddb rec. - * The underlying disk driver does several retries to check - * if the disk is really dead or not so there - * is no reason for us to retry on top of the drivers retries. - */ - - if (--retry == 0) { - /* - * since all the records are part of the same set - * use the first one to get setno - */ - setno = mddb_getsetnum(*recids); - if (md_get_setstatus(setno) & MD_SET_TOOFEW) { - panic( - "md: Panic due to lack of DiskSuite state\n" - " database replicas. Fewer than 50%% of " - "the total were available,\n so panic to " - "ensure data integrity."); - } else { - panic("md: state database problem"); - } - /*NOTREACHED*/ - } - } -} - -void -mddb_deleterec_wrapper(mddb_recid_t recid) -{ - int sent_log = 0; - uint_t retry = md_retry_cnt; - set_t setno; - - while (mddb_deleterec(recid)) { - if (! sent_log) { - cmn_err(CE_WARN, - "md: state database delete failed"); - sent_log = 1; - } - delay(md_hz); - - /* - * Setting retry cnt to one (pre decremented) so that we - * actually do no retries when committing/deleting a mddb rec. - * The underlying disk driver does several retries to check - * if the disk is really dead or not so there - * is no reason for us to retry on top of the drivers retries. - */ - - if (--retry == 0) { - setno = mddb_getsetnum(recid); - if (md_get_setstatus(setno) & MD_SET_TOOFEW) { - panic( - "md: Panic due to lack of DiskSuite state\n" - " database replicas. Fewer than 50%% of " - "the total were available,\n so panic to " - "ensure data integrity."); - } else { - panic("md: state database problem"); - } - /*NOTREACHED*/ - } - } -} - -/* - * md_holdset_enter is called in order to hold the set in its - * current state (loaded, unloaded, snarfed, unsnarfed, etc) - * until md_holdset_exit is called. This is used by the mirror - * code to mark the set as HOLD so that the set won't be - * unloaded while hotspares are being allocated in check_4_hotspares. - * The original fix to the mirror code to hold the set was to call - * md_haltsnarf_enter, but this will block all ioctls and ioctls - * must work for a MN diskset while hotspares are allocated. - */ -void -md_holdset_enter(set_t setno) -{ - mutex_enter(&md_mx); - while (md_set[setno].s_status & MD_SET_HOLD) - cv_wait(&md_cv, &md_mx); - md_set[setno].s_status |= MD_SET_HOLD; - mutex_exit(&md_mx); -} - -void -md_holdset_exit(set_t setno) -{ - mutex_enter(&md_mx); - md_set[setno].s_status &= ~MD_SET_HOLD; - cv_broadcast(&md_cv); - mutex_exit(&md_mx); -} - -/* - * Returns a 0 if this thread marked the set as HOLD (success), - * returns a -1 if set was already marked HOLD (failure). - * Used by the release_set code to see if set is marked HOLD. - * HOLD is set by a daemon when hotspares are being allocated - * to mirror units. - */ -int -md_holdset_testandenter(set_t setno) -{ - mutex_enter(&md_mx); - if (md_set[setno].s_status & MD_SET_HOLD) { - mutex_exit(&md_mx); - return (-1); - } - md_set[setno].s_status |= MD_SET_HOLD; - mutex_exit(&md_mx); - return (0); -} - -void -md_haltsnarf_enter(set_t setno) -{ - mutex_enter(&md_mx); - while (md_set[setno].s_status & MD_SET_SNARFING) - cv_wait(&md_cv, &md_mx); - - md_set[setno].s_status |= MD_SET_SNARFING; - mutex_exit(&md_mx); -} - -void -md_haltsnarf_exit(set_t setno) -{ - mutex_enter(&md_mx); - md_set[setno].s_status &= ~MD_SET_SNARFING; - cv_broadcast(&md_cv); - mutex_exit(&md_mx); -} - -void -md_haltsnarf_wait(set_t setno) -{ - mutex_enter(&md_mx); - while (md_set[setno].s_status & MD_SET_SNARFING) - cv_wait(&md_cv, &md_mx); - mutex_exit(&md_mx); -} - -/* - * ASSUMED that the md_unit_array_rw WRITER lock is held. - */ -int -md_halt_set(set_t setno, enum md_haltcmd cmd) -{ - int i, err; - - if (md_set[setno].s_un == NULL || md_set[setno].s_ui == NULL) { - return (0); - } - - if ((cmd == MD_HALT_CHECK) || (cmd == MD_HALT_ALL)) { - for (i = 0; i < MD_NOPS; i++) { - if (md_ops[i] == NULL) - continue; - if ((*(md_ops[i]->md_halt))(MD_HALT_CLOSE, setno)) { - for (--i; i > 0; --i) { - if (md_ops[i] == NULL) - continue; - (void) (*(md_ops[i]->md_halt)) - (MD_HALT_OPEN, setno); - } - return (EBUSY); - } - } - - for (i = 0; i < MD_NOPS; i++) { - if (md_ops[i] == NULL) - continue; - if ((*(md_ops[i]->md_halt))(MD_HALT_CHECK, setno)) { - for (i = 0; i < MD_NOPS; i++) { - if (md_ops[i] == NULL) - continue; - (void) (*(md_ops[i]->md_halt)) - (MD_HALT_OPEN, setno); - } - return (EBUSY); - } - } - } - - if ((cmd == MD_HALT_DOIT) || (cmd == MD_HALT_ALL)) { - for (i = 0; i < MD_NOPS; i++) { - if (md_ops[i] == NULL) - continue; - err = (*(md_ops[i]->md_halt))(MD_HALT_DOIT, setno); - if (err != 0) - cmn_err(CE_NOTE, - "md: halt failed for %s, error %d", - md_ops[i]->md_driver.md_drivername, err); - } - - /* - * Unload the devid namespace if it is loaded - */ - md_unload_namespace(setno, NM_DEVID); - md_unload_namespace(setno, 0L); - md_clr_setstatus(setno, MD_SET_SNARFED); - } - - return (0); -} - -int -md_halt(int global_locks_owned_mask) -{ - set_t i, j; - int err; - int init_queues; - md_requestq_entry_t *rqp; - md_ops_t **pops, *ops, *lops; - ddi_modhandle_t mod; - char *name; - - rw_enter(&md_unit_array_rw.lock, RW_WRITER); - - /* - * Grab the all of the global locks that are not - * already owned to ensure that there isn't another - * thread trying to access a global resource - * while the halt is in progress - */ - if (md_global_lock_enter(global_locks_owned_mask) == EINTR) - return (EINTR); - - for (i = 0; i < md_nsets; i++) - md_haltsnarf_enter(i); - - /* - * Kill the daemon threads. - */ - init_queues = ((md_get_status() & MD_GBL_DAEMONS_LIVE) ? FALSE : TRUE); - md_clr_status(MD_GBL_DAEMONS_LIVE); - md_set_status(MD_GBL_DAEMONS_DIE); - - rqp = &md_daemon_queues[0]; - i = 0; - while (!NULL_REQUESTQ_ENTRY(rqp)) { - cv_broadcast(&rqp->dispq_headp->a_cv); - rqp = &md_daemon_queues[++i]; - } - - mutex_enter(&md_mx); - while (md_num_daemons != 0) { - mutex_exit(&md_mx); - delay(md_hz); - mutex_enter(&md_mx); - } - mutex_exit(&md_mx); - md_clr_status(MD_GBL_DAEMONS_DIE); - - for (i = 0; i < md_nsets; i++) - /* - * Only call into md_halt_set if s_un / s_ui are both set. - * If they are NULL this set hasn't been accessed, so its - * pointless performing the call. - */ - if (md_set[i].s_un != NULL && md_set[i].s_ui != NULL) { - if (md_halt_set(i, MD_HALT_CHECK)) { - if (md_start_daemons(init_queues)) - cmn_err(CE_WARN, - "md: restart of daemon threads " - "failed"); - for (j = 0; j < md_nsets; j++) - md_haltsnarf_exit(j); - - return (md_global_lock_exit( - global_locks_owned_mask, EBUSY, - MD_ARRAY_WRITER, NULL)); - } - } - - /* - * if we get here we are going to do it - */ - for (i = 0; i < md_nsets; i++) { - /* - * Only call into md_halt_set if s_un / s_ui are both set. - * If they are NULL this set hasn't been accessed, so its - * pointless performing the call. - */ - if (md_set[i].s_un != NULL && md_set[i].s_ui != NULL) { - err = md_halt_set(i, MD_HALT_DOIT); - if (err != 0) - cmn_err(CE_NOTE, - "md: halt failed set %u, error %d", - (unsigned)i, err); - } - } - - /* - * issue a halt unload to each module to indicate that it - * is about to be unloaded. Each module is called once, set - * has no meaning at this point in time. - */ - for (i = 0; i < MD_NOPS; i++) { - if (md_ops[i] == NULL) - continue; - err = (*(md_ops[i]->md_halt))(MD_HALT_UNLOAD, 0); - if (err != 0) - cmn_err(CE_NOTE, - "md: halt failed for %s, error %d", - md_ops[i]->md_driver.md_drivername, err); - } - - /* ddi_modclose the submodules */ - for (i = 0; i < MD_NOPS; i++) { - /* skip if not open */ - if ((md_ops[i] == NULL) || (md_mods[i] == NULL)) - continue; - - /* find and unlink from md_opslist */ - ops = md_ops[i]; - mod = md_mods[i]; - pops = &md_opslist; - for (lops = *pops; lops; - pops = &lops->md_next, lops = *pops) { - if (lops == ops) { - *pops = ops->md_next; - ops->md_next = NULL; - break; - } - } - - /* uninitialize */ - name = ops->md_driver.md_drivername; - md_ops[i] = NULL; - md_mods[i] = NULL; - ops->md_selfindex = 0; - ops->md_driver.md_drivername[0] = '\0'; - rw_destroy(&ops->md_link_rw.lock); - - /* close */ - err = ddi_modclose(mod); - if (err != 0) - cmn_err(CE_NOTE, - "md: halt close failed for %s, error %d", - name ? name : "UNKNOWN", err); - } - - /* Unload the database */ - mddb_unload(); - - md_set_status(MD_GBL_HALTED); /* we are ready to be unloaded */ - - for (i = 0; i < md_nsets; i++) - md_haltsnarf_exit(i); - - return (md_global_lock_exit(global_locks_owned_mask, 0, - MD_ARRAY_WRITER, NULL)); -} - -/* - * md_layered_open() is an internal routine only for SVM modules. - * So the input device will be a md_dev64_t, because all SVM modules internally - * work with that device type. - * ddi routines on the other hand work with dev_t. So, if we call any ddi - * routines from here we first have to convert that device into a dev_t. - */ - -int -md_layered_open( - minor_t mnum, - md_dev64_t *dev, - int md_oflags -) -{ - int flag = (FREAD | FWRITE); - cred_t *cred_p = kcred; - major_t major; - int err; - dev_t ddi_dev = md_dev64_to_dev(*dev); - - if (ddi_dev == NODEV) - return (ENODEV); - - major = getmajor(ddi_dev); - - /* metadevice */ - if (major == md_major) { - mdi_unit_t *ui; - - /* open underlying driver */ - mnum = getminor(ddi_dev); - - ui = MDI_UNIT(mnum); - if (md_ops[ui->ui_opsindex]->md_open != NULL) { - int ret = (*md_ops[ui->ui_opsindex]->md_open)(&ddi_dev, - flag, OTYP_LYR, cred_p, md_oflags); - /* - * As open() may change the device, - * send this info back to the caller. - */ - *dev = md_expldev(ddi_dev); - return (ret); - } - - /* or do it ourselves */ - (void) md_unit_openclose_enter(ui); - err = md_unit_incopen(mnum, flag, OTYP_LYR); - md_unit_openclose_exit(ui); - /* convert our ddi_dev back to the dev we were given */ - *dev = md_expldev(ddi_dev); - return (err); - } - - /* - * Open regular device, since open() may change dev_t give new dev_t - * back to the caller. - */ - err = dev_lopen(&ddi_dev, flag, OTYP_LYR, cred_p); - *dev = md_expldev(ddi_dev); - return (err); -} - -/* - * md_layered_close() is an internal routine only for SVM modules. - * So the input device will be a md_dev64_t, because all SVM modules internally - * work with that device type. - * ddi routines on the other hand work with dev_t. So, if we call any ddi - * routines from here we first have to convert that device into a dev_t. - */ -void -md_layered_close( - md_dev64_t dev, - int md_cflags -) -{ - int flag = (FREAD | FWRITE); - cred_t *cred_p = kcred; - dev_t ddi_dev = md_dev64_to_dev(dev); - major_t major = getmajor(ddi_dev); - minor_t mnum = getminor(ddi_dev); - - /* metadevice */ - if (major == md_major) { - mdi_unit_t *ui = MDI_UNIT(mnum); - - /* close underlying driver */ - if (md_ops[ui->ui_opsindex]->md_close != NULL) { - (*md_ops[ui->ui_opsindex]->md_close) - (ddi_dev, flag, OTYP_LYR, cred_p, md_cflags); - return; - } - - /* or do it ourselves */ - (void) md_unit_openclose_enter(ui); - (void) md_unit_decopen(mnum, OTYP_LYR); - md_unit_openclose_exit(ui); - return; - } - - /* close regular device */ - (void) dev_lclose(ddi_dev, flag, OTYP_LYR, cred_p); -} - -/* - * saves a little code in mdstrategy - */ -int -errdone(mdi_unit_t *ui, struct buf *bp, int err) -{ - if ((bp->b_error = err) != 0) - bp->b_flags |= B_ERROR; - else - bp->b_resid = bp->b_bcount; - md_unit_readerexit(ui); - md_biodone(bp); - return (1); -} - -static int md_write_label = 0; - -int -md_checkbuf(mdi_unit_t *ui, md_unit_t *un, buf_t *bp) -{ - diskaddr_t endblk; - set_t setno = MD_UN2SET(un); - - if ((md_get_setstatus(setno) & MD_SET_STALE) && - (! (bp->b_flags & B_READ))) - return (errdone(ui, bp, EROFS)); - /* - * Check early for unreasonable block number. - * - * b_blkno is defined as adaddr_t which is typedef'd to a long. - * A problem occurs if b_blkno has bit 31 set and un_total_blocks - * doesn't, b_blkno is then compared as a negative number which is - * always less than a positive. - */ - if ((u_longlong_t)bp->b_lblkno > (u_longlong_t)un->c.un_total_blocks) - return (errdone(ui, bp, EINVAL)); - - if (bp->b_lblkno == un->c.un_total_blocks) - return (errdone(ui, bp, 0)); - - /* - * make sure we don't clobber any labels - */ - if ((bp->b_lblkno == 0) && (! (bp->b_flags & B_READ)) && - (un->c.un_flag & MD_LABELED) && (! md_write_label)) { - cmn_err(CE_NOTE, "md: %s: write to label", - md_shortname(getminor(bp->b_edev))); - return (errdone(ui, bp, EINVAL)); - } - - bp->b_resid = 0; - endblk = (diskaddr_t)(bp->b_lblkno + - howmany(bp->b_bcount, DEV_BSIZE) - 1); - - if (endblk > (un->c.un_total_blocks - 1)) { - bp->b_resid = dbtob(endblk - (un->c.un_total_blocks - 1)); - endblk = un->c.un_total_blocks - 1; - bp->b_bcount -= bp->b_resid; - } - return (0); -} - -/* - * init_request_queue: initializes the request queues and creates the threads. - * return value = 0 :invalid num_threads - * = n : n is the number of threads created. - */ - -int -init_requestq( - md_requestq_entry_t *rq, /* request queue info */ - void (*threadfn)(), /* function to start the thread */ - caddr_t threadfn_args, /* args to the function */ - int pri, /* thread priority */ - int init_queue) /* flag to init queues */ -{ - struct mdq_anchor *rqhead; - int i; - int num_threads; - - - num_threads = *(rq->num_threadsp); - rqhead = rq->dispq_headp; - - if (NULL_REQUESTQ_ENTRY(rq) || num_threads == 0) - return (0); - - if (init_queue) { - rqhead->dq.maxq_len = 0; - rqhead->dq.treqs = 0; - rqhead->dq.dq_next = &rqhead->dq; - rqhead->dq.dq_prev = &rqhead->dq; - cv_init(&rqhead->a_cv, NULL, CV_DEFAULT, NULL); - mutex_init(&rqhead->a_mx, NULL, MUTEX_DEFAULT, NULL); - } - for (i = 0; i < num_threads; i++) { - (void) thread_create(NULL, 0, threadfn, threadfn_args, 0, &p0, - TS_RUN, pri); - } - return (i); -} - -static void -start_daemon(struct mdq_anchor *q) -{ - md_daemon(0, q); - ASSERT(0); -} - -/* - * Creates all the md daemons. - * Global: - * md_num_daemons is set to number of daemons. - * MD_GBL_DAEMONS_LIVE flag set to indicate the daemons are active. - * - * Return value: 0 success - * 1 failure - */ -int -md_start_daemons(int init_queue) -{ - md_requestq_entry_t *rqp; - int cnt; - int i; - int retval = 0; - - - if (md_get_status() & MD_GBL_DAEMONS_LIVE) { - return (retval); - } - md_clr_status(MD_GBL_DAEMONS_DIE); - - rqp = &md_daemon_queues[0]; - i = 0; - while (!NULL_REQUESTQ_ENTRY(rqp)) { - cnt = init_requestq(rqp, start_daemon, - (caddr_t)rqp->dispq_headp, minclsyspri, init_queue); - - if (cnt && cnt != *rqp->num_threadsp) { - retval = 1; - break; - } - /* - * initialize variables - */ - md_num_daemons += cnt; - rqp = &md_daemon_queues[++i]; - } - - md_set_status(MD_GBL_DAEMONS_LIVE); - return (retval); -} - -int -md_loadsubmod(set_t setno, char *name, int drvrid) -{ - ddi_modhandle_t mod; - md_ops_t **pops, *ops; - int i, err; - - /* - * See if the submodule is mdopened. If not, i is the index of the - * next empty slot. - */ - for (i = 0; md_ops[i] != NULL; i++) { - if (strncmp(name, md_ops[i]->md_driver.md_drivername, - MD_DRIVERNAMELEN) == 0) - return (i); - - if (i == (MD_NOPS - 1)) - return (-1); - } - - if (drvrid < 0) { - /* Do not try to add any records to the DB when stale. */ - if (md_get_setstatus(setno) & MD_SET_STALE) - return (-1); - drvrid = md_setshared_name(setno, name, 0L); - } - - if (drvrid < 0) - return (-1); - - /* open and import the md_ops of the submodules */ - mod = ddi_modopen(name, KRTLD_MODE_FIRST, &err); - if (mod == NULL) { - cmn_err(CE_WARN, "md_loadsubmod: " - "unable to ddi_modopen %s, error %d\n", name, err); - return (-1); - } - pops = ddi_modsym(mod, "md_interface_ops", &err); - if (pops == NULL) { - cmn_err(CE_WARN, "md_loadsubmod: " - "unable to import md_interface_ops from %s, error %d\n", - name, err); - (void) ddi_modclose(mod); - return (-1); - } - - /* ddi_modsym returns pointer to md_interface_ops in submod */ - ops = *pops; - - /* initialize */ - ops->md_selfindex = i; - rw_init(&ops->md_link_rw.lock, NULL, RW_DEFAULT, NULL); - (void) strncpy(ops->md_driver.md_drivername, name, - MD_DRIVERNAMELEN); - - /* plumb */ - md_ops[i] = ops; - md_mods[i] = mod; - ops->md_next = md_opslist; - md_opslist = ops; - - /* return index */ - return (i); -} - -int -md_getmodindex(md_driver_t *driver, int dont_load, int db_notrequired) -{ - int i; - int modindex; - char *name = driver->md_drivername; - set_t setno = driver->md_setno; - int drvid; - int local_dont_load; - - if (setno >= md_nsets) - return (-1); - - for (i = 0; name[i] != 0; i++) - if (i == (MD_DRIVERNAMELEN -1)) - return (-1); - - /* - * If set is STALE, set local_dont_load to 1 since no records - * should be added to DB when stale. - */ - if (md_get_setstatus(setno) & MD_SET_STALE) { - local_dont_load = 1; - } else { - local_dont_load = dont_load; - } - - /* - * Single thread ioctl module binding with respect to - * similar code executed in md_loadsubmod that is called - * from md_snarf_db_set (which is where that path does - * its md_haltsnarf_enter call). - */ - md_haltsnarf_enter(setno); - - /* See if the submodule is already ddi_modopened. */ - for (i = 0; md_ops[i] != NULL; i++) { - if (strncmp(name, md_ops[i]->md_driver.md_drivername, - MD_DRIVERNAMELEN) == 0) { - if (! local_dont_load && - (md_getshared_key(setno, name) == MD_KEYBAD)) { - if (md_setshared_name(setno, name, 0L) - == MD_KEYBAD) { - if (!db_notrequired) - goto err; - } - } - md_haltsnarf_exit(setno); - return (i); - } - - if (i == (MD_NOPS -1)) - break; - } - - if (local_dont_load) - goto err; - - drvid = ((db_notrequired) ? 0 : (int)md_getshared_key(setno, name)); - - /* ddi_modopen the submodule */ - modindex = md_loadsubmod(setno, name, drvid); - if (modindex < 0) - goto err; - - if (md_ops[modindex]->md_snarf != NULL) - (*(md_ops[modindex]->md_snarf))(MD_SNARF_DOIT, setno); - - md_haltsnarf_exit(setno); - return (modindex); - -err: md_haltsnarf_exit(setno); - return (-1); -} - -void -md_call_strategy(buf_t *bp, int flags, void *private) -{ - mdi_unit_t *ui; - - if (mdv_strategy_tstpnt) - if ((*mdv_strategy_tstpnt)(bp, flags, private) != 0) - return; - if (getmajor(bp->b_edev) != md_major) { - (void) bdev_strategy(bp); - return; - } - - flags = (flags & MD_STR_PASSEDON) | MD_STR_NOTTOP; - ui = MDI_UNIT(getminor(bp->b_edev)); - ASSERT(ui != NULL); - (*md_ops[ui->ui_opsindex]->md_strategy)(bp, flags, private); -} - -/* - * md_call_ioctl: - * ------------- - * Issue the specified ioctl to the device associated with the given md_dev64_t - * - * Arguments: - * dev - underlying device [md_dev64_t] - * cmd - ioctl to perform - * data - arguments / result location - * mode - read/write/layered ioctl - * lockp - lock reference - * - * Returns: - * 0 success - * !=0 Failure (error code) - */ -int -md_call_ioctl(md_dev64_t dev, int cmd, void *data, int mode, IOLOCK *lockp) -{ - dev_t device = md_dev64_to_dev(dev); - int rval; - mdi_unit_t *ui; - - /* - * See if device is a metadevice. If not call cdev_ioctl(), otherwise - * call the ioctl entry-point in the metadevice. - */ - if (md_getmajor(dev) != md_major) { - int rv; - rval = cdev_ioctl(device, cmd, (intptr_t)data, mode, - ddi_get_cred(), &rv); - } else { - ui = MDI_UNIT(md_getminor(dev)); - ASSERT(ui != NULL); - rval = (*md_ops[ui->ui_opsindex]->md_ioctl)(device, cmd, data, - mode, lockp); - } - return (rval); -} - -void -md_rem_link(set_t setno, int id, krwlock_t *rw, md_link_t **head) -{ - md_link_t *next; - md_link_t **pprev; - - rw_enter(rw, RW_WRITER); - - next = *head; - pprev = head; - while (next) { - if ((next->ln_setno == setno) && (next->ln_id == id)) { - *pprev = next->ln_next; - rw_exit(rw); - return; - } - pprev = &next->ln_next; - next = next->ln_next; - } - - rw_exit(rw); -} - -int -md_dev_exists(md_dev64_t dev) -{ - - if (dev == NODEV64) - return (0); - - if (strcmp(ddi_major_to_name(md_getmajor(dev)), "md") != 0) - return (1); - - if ((MD_MIN2SET(md_getminor(dev)) >= md_nsets) || - (MD_MIN2UNIT(md_getminor(dev)) >= md_nunits)) - return (0); - - if (MDI_UNIT(md_getminor(dev)) != NULL) - return (1); - - return (0); -} - -md_parent_t -md_get_parent(md_dev64_t dev) -{ - md_unit_t *un; - mdi_unit_t *ui; - md_parent_t parent; - - if (md_getmajor(dev) != md_major) - return (MD_NO_PARENT); - - ui = MDI_UNIT(md_getminor(dev)); - - un = (md_unit_t *)md_unit_readerlock(ui); - parent = un->c.un_parent; - md_unit_readerexit(ui); - - return (parent); -} - -void -md_set_parent(md_dev64_t dev, md_parent_t parent) -{ - md_unit_t *un; - mdi_unit_t *ui; - - if (md_getmajor(dev) != md_major) - return; - - ui = MDI_UNIT(md_getminor(dev)); - - un = (md_unit_t *)md_unit_readerlock(ui); - un->c.un_parent = parent; - md_unit_readerexit(ui); -} - -void -md_reset_parent(md_dev64_t dev) -{ - md_unit_t *un; - mdi_unit_t *ui; - - if (md_getmajor(dev) != md_major) - return; - - ui = MDI_UNIT(md_getminor(dev)); - - un = (md_unit_t *)md_unit_readerlock(ui); - un->c.un_parent = MD_NO_PARENT; - md_unit_readerexit(ui); -} - - -static intptr_t (*hot_spare_interface)() = (intptr_t (*)())NULL; - -int -md_hot_spare_ifc( - hs_cmds_t cmd, - mddb_recid_t id, - u_longlong_t size, - int labeled, - mddb_recid_t *hs_id, - mdkey_t *key, - md_dev64_t *dev, - diskaddr_t *sblock) -{ - int err; - - /* - * RW lock on hot_spare_interface. We don't want it to change from - * underneath us. If hot_spare_interface is NULL we're going to - * need to set it. So we need to upgrade to a WRITER lock. If that - * doesn't work, we drop the lock and reenter as WRITER. This leaves - * a small hole during which hot_spare_interface could be modified - * so we check it for NULL again. What a pain. Then if still null - * load from md_get_named_service. - */ - - rw_enter(&hsp_rwlp.lock, RW_READER); - if (hot_spare_interface == NULL) { - if (rw_tryupgrade(&hsp_rwlp.lock) == 0) { - rw_exit(&hsp_rwlp.lock); - rw_enter(&hsp_rwlp.lock, RW_WRITER); - if (hot_spare_interface != NULL) { - err = ((*hot_spare_interface) - (cmd, id, size, labeled, hs_id, key, dev, - sblock)); - rw_exit(&hsp_rwlp.lock); - return (err); - } - } - hot_spare_interface = md_get_named_service(NODEV64, ANY_SERVICE, - "hot spare interface", 0); - rw_downgrade(&hsp_rwlp.lock); - } - - if (hot_spare_interface == NULL) { - cmn_err(CE_WARN, "md: no hotspare interface"); - rw_exit(&hsp_rwlp.lock); - return (0); - } - - err = ((*hot_spare_interface) - (cmd, id, size, labeled, hs_id, key, dev, sblock)); - rw_exit(&hsp_rwlp.lock); - return (err); -} - -void -md_clear_hot_spare_interface() -{ - rw_enter(&hsp_rwlp.lock, RW_WRITER); - hot_spare_interface = NULL; - rw_exit(&hsp_rwlp.lock); -} - - -static intptr_t (*notify_interface)() = (intptr_t (*)())NULL; - -int -md_notify_interface( - md_event_cmds_t cmd, - md_tags_t tag, - set_t set, - md_dev64_t dev, - md_event_type_t event -) -{ - int err; - - if (md_event_queue == NULL) - return (0); - rw_enter(&ni_rwlp.lock, RW_READER); - if (notify_interface == NULL) { - if (rw_tryupgrade(&ni_rwlp.lock) == 0) { - rw_exit(&ni_rwlp.lock); - rw_enter(&ni_rwlp.lock, RW_WRITER); - if (notify_interface != NULL) { - err = ((*notify_interface) - (cmd, tag, set, dev, event)); - rw_exit(&ni_rwlp.lock); - return (err); - } - } - notify_interface = md_get_named_service(NODEV64, ANY_SERVICE, - "notify interface", 0); - rw_downgrade(&ni_rwlp.lock); - } - if (notify_interface == NULL) { - cmn_err(CE_WARN, "md: no notify interface"); - rw_exit(&ni_rwlp.lock); - return (0); - } - err = ((*notify_interface)(cmd, tag, set, dev, event)); - rw_exit(&ni_rwlp.lock); - return (err); -} - -char * -obj2devname(uint32_t tag, uint_t setno, md_dev64_t dev) -{ - char *setname; - char name[MD_MAX_CTDLEN]; - minor_t mnum = md_getminor(dev); - major_t maj = md_getmajor(dev); - int rtn = 0; - - /* - * Verify that the passed dev_t refers to a valid metadevice. - * If it doesn't we can make no assumptions as to what the device - * name is. Return NULL in these cases. - */ - if (((maj != md_major) || (MD_MIN2UNIT(mnum) >= md_nunits)) || - (MD_MIN2SET(mnum) >= md_nsets)) { - return (NULL); - } - - setname = NULL; - name[0] = '\0'; - switch (tag) { - case SVM_TAG_HSP: - if (setno == 0) { - rtn = snprintf(name, sizeof (name), "hsp%u", - (unsigned)MD_MIN2UNIT(mnum)); - } else { - setname = mddb_getsetname(setno); - if (setname != NULL) { - rtn = snprintf(name, sizeof (name), "%s/hsp%u", - setname, (unsigned)MD_MIN2UNIT(mnum)); - } - } - break; - case SVM_TAG_DRIVE: - (void) sprintf(name, "drive"); - break; - case SVM_TAG_HOST: - (void) sprintf(name, "host"); - break; - case SVM_TAG_SET: - rtn = snprintf(name, sizeof (name), "%s", - mddb_getsetname(setno)); - if ((name[0] == '\0') || (rtn >= sizeof (name))) { - (void) sprintf(name, "diskset"); - rtn = 0; - } - break; - default: - rtn = snprintf(name, sizeof (name), "%s", md_shortname(mnum)); - break; - } - - /* Check if we got any rubbish for any of the snprintf's */ - if ((name[0] == '\0') || (rtn >= sizeof (name))) { - return (NULL); - } - - return (md_strdup(name)); -} - -/* Sysevent subclass and mdnotify event type pairs */ -struct node { - char *se_ev; - md_event_type_t md_ev; -}; - -/* - * Table must be sorted in case sensitive ascending order of - * the sysevents values - */ -static struct node ev_table[] = { - { ESC_SVM_ADD, EQ_ADD }, - { ESC_SVM_ATTACH, EQ_ATTACH }, - { ESC_SVM_ATTACHING, EQ_ATTACHING }, - { ESC_SVM_CHANGE, EQ_CHANGE }, - { ESC_SVM_CREATE, EQ_CREATE }, - { ESC_SVM_DELETE, EQ_DELETE }, - { ESC_SVM_DETACH, EQ_DETACH }, - { ESC_SVM_DETACHING, EQ_DETACHING }, - { ESC_SVM_DRIVE_ADD, EQ_DRIVE_ADD }, - { ESC_SVM_DRIVE_DELETE, EQ_DRIVE_DELETE }, - { ESC_SVM_ENABLE, EQ_ENABLE }, - { ESC_SVM_ERRED, EQ_ERRED }, - { ESC_SVM_EXCHANGE, EQ_EXCHANGE }, - { ESC_SVM_GROW, EQ_GROW }, - { ESC_SVM_HS_CHANGED, EQ_HS_CHANGED }, - { ESC_SVM_HS_FREED, EQ_HS_FREED }, - { ESC_SVM_HOST_ADD, EQ_HOST_ADD }, - { ESC_SVM_HOST_DELETE, EQ_HOST_DELETE }, - { ESC_SVM_HOTSPARED, EQ_HOTSPARED }, - { ESC_SVM_INIT_FAILED, EQ_INIT_FAILED }, - { ESC_SVM_INIT_FATAL, EQ_INIT_FATAL }, - { ESC_SVM_INIT_START, EQ_INIT_START }, - { ESC_SVM_INIT_SUCCESS, EQ_INIT_SUCCESS }, - { ESC_SVM_IOERR, EQ_IOERR }, - { ESC_SVM_LASTERRED, EQ_LASTERRED }, - { ESC_SVM_MEDIATOR_ADD, EQ_MEDIATOR_ADD }, - { ESC_SVM_MEDIATOR_DELETE, EQ_MEDIATOR_DELETE }, - { ESC_SVM_OFFLINE, EQ_OFFLINE }, - { ESC_SVM_OK, EQ_OK }, - { ESC_SVM_ONLINE, EQ_ONLINE }, - { ESC_SVM_OPEN_FAIL, EQ_OPEN_FAIL }, - { ESC_SVM_REGEN_DONE, EQ_REGEN_DONE }, - { ESC_SVM_REGEN_FAILED, EQ_REGEN_FAILED }, - { ESC_SVM_REGEN_START, EQ_REGEN_START }, - { ESC_SVM_RELEASE, EQ_RELEASE }, - { ESC_SVM_REMOVE, EQ_REMOVE }, - { ESC_SVM_RENAME_DST, EQ_RENAME_DST }, - { ESC_SVM_RENAME_SRC, EQ_RENAME_SRC }, - { ESC_SVM_REPLACE, EQ_REPLACE }, - { ESC_SVM_RESYNC_DONE, EQ_RESYNC_DONE }, - { ESC_SVM_RESYNC_FAILED, EQ_RESYNC_FAILED }, - { ESC_SVM_RESYNC_START, EQ_RESYNC_START }, - { ESC_SVM_RESYNC_SUCCESS, EQ_RESYNC_SUCCESS }, - { ESC_SVM_TAKEOVER, EQ_TAKEOVER } -}; - -static md_tags_t md_tags[] = { - TAG_UNK, - TAG_METADEVICE, - TAG_UNK, - TAG_UNK, - TAG_UNK, - TAG_UNK, - TAG_REPLICA, - TAG_HSP, - TAG_HS, - TAG_SET, - TAG_DRIVE, - TAG_HOST, - TAG_MEDIATOR -}; - -md_event_type_t -ev_get(char *subclass) -{ - int high, mid, low, p; - - low = 0; - high = (sizeof (ev_table) / sizeof (ev_table[0])) - 1; - while (low <= high) { - mid = (high + low) / 2; - p = strcmp(subclass, ev_table[mid].se_ev); - if (p == 0) { - return (ev_table[mid].md_ev); - } else if (p < 0) { - high = mid - 1; - } else { - low = mid + 1; - } - } - - return (EQ_EMPTY); -} - -/* - * Log mdnotify event - */ -void -do_mdnotify(char *se_subclass, uint32_t tag, set_t setno, md_dev64_t devid) -{ - md_event_type_t ev_type; - md_tags_t md_tag; - - /* Translate sysevent into mdnotify event */ - ev_type = ev_get(se_subclass); - - if (tag >= (sizeof (md_tags) / sizeof (md_tags[0]))) { - md_tag = TAG_UNK; - } else { - md_tag = md_tags[tag]; - } - - NOTIFY_MD(md_tag, setno, devid, ev_type); -} - -/* - * Log SVM sys events - */ -void -svm_gen_sysevent( - char *se_class, - char *se_subclass, - uint32_t tag, - set_t setno, - md_dev64_t devid -) -{ - nvlist_t *attr_list; - sysevent_id_t eid; - int err = DDI_SUCCESS; - char *devname; - extern dev_info_t *md_devinfo; - - /* Raise the mdnotify event before anything else */ - do_mdnotify(se_subclass, tag, setno, devid); - - if (md_devinfo == NULL) { - return; - } - - err = nvlist_alloc(&attr_list, NV_UNIQUE_NAME, KM_NOSLEEP); - - if (err == DDI_SUCCESS) { - /* Add the version numver */ - err = nvlist_add_uint32(attr_list, SVM_VERSION_NO, - (uint32_t)SVM_VERSION); - if (err != DDI_SUCCESS) { - goto fail; - } - - /* Add the tag attribute */ - err = nvlist_add_uint32(attr_list, SVM_TAG, (uint32_t)tag); - if (err != DDI_SUCCESS) { - goto fail; - } - - /* Add the set number attribute */ - err = nvlist_add_uint32(attr_list, SVM_SET_NO, (uint32_t)setno); - if (err != DDI_SUCCESS) { - goto fail; - } - - /* Add the device id attribute */ - err = nvlist_add_uint64(attr_list, SVM_DEV_ID, (uint64_t)devid); - if (err != DDI_SUCCESS) { - goto fail; - } - - /* Add the device name attribute */ - devname = obj2devname(tag, setno, devid); - if (devname != NULL) { - err = nvlist_add_string(attr_list, SVM_DEV_NAME, - devname); - freestr(devname); - } else { - err = nvlist_add_string(attr_list, SVM_DEV_NAME, - "unspecified"); - } - if (err != DDI_SUCCESS) { - goto fail; - } - - /* Attempt to post event */ - err = ddi_log_sysevent(md_devinfo, DDI_VENDOR_SUNW, se_class, - se_subclass, attr_list, &eid, DDI_SLEEP); - - nvlist_free(attr_list); - if (err != DDI_SUCCESS) { - cmn_err(CE_WARN, "Failed to log event for %s, %s," - " err=%x", se_class, se_subclass, err); - } - } - - return; - -fail: - nvlist_free(attr_list); - cmn_err(CE_WARN, "Failed to setup attributes for event %s, %s, err=%x", - se_class, se_subclass, err); -} - -void -md_clear_named_service() -{ - rw_enter(&ni_rwlp.lock, RW_WRITER); - notify_interface = NULL; - rw_exit(&ni_rwlp.lock); -} - -void -md_create_unit_incore(minor_t mnum, md_ops_t *ops, int alloc_lock) -{ - mdi_unit_t *ui; - set_t setno = MD_MIN2SET(mnum); - - ui = (mdi_unit_t *)kmem_zalloc(sizeof (mdi_unit_t), KM_SLEEP); - ui->ui_opsindex = ops->md_selfindex; - - /* initialize all the incore conditional variables */ - mutex_init(&ui->ui_mx, NULL, MUTEX_DEFAULT, NULL); - cv_init(&ui->ui_cv, NULL, CV_DEFAULT, NULL); - - if (alloc_lock) { - ui->ui_io_lock = kmem_zalloc(sizeof (md_io_lock_t), KM_SLEEP); - mutex_init(&ui->ui_io_lock->io_mx, NULL, MUTEX_DEFAULT, NULL); - cv_init(&ui->ui_io_lock->io_cv, NULL, CV_DEFAULT, NULL); - mutex_init(&ui->ui_io_lock->io_list_mutex, NULL, - MUTEX_DEFAULT, NULL); - ui->ui_io_lock->io_list_front = NULL; - ui->ui_io_lock->io_list_back = NULL; - } - if (! (md_get_setstatus(setno) & MD_SET_SNARFING)) { - rw_enter(&md_unit_array_rw.lock, RW_WRITER); - MDI_VOIDUNIT(mnum) = (void *) ui; - rw_exit(&md_unit_array_rw.lock); - } else - MDI_VOIDUNIT(mnum) = (void *) ui; - - rw_enter(&ops->md_link_rw.lock, RW_WRITER); - ui->ui_link.ln_next = ops->md_head; - ui->ui_link.ln_setno = setno; - ui->ui_link.ln_id = mnum; - ops->md_head = &ui->ui_link; - /* setup the unavailable field */ -#if defined(_ILP32) - if (((md_unit_t *)MD_UNIT(mnum))->c.un_revision & MD_64BIT_META_DEV) { - ui->ui_tstate |= MD_64MD_ON_32KERNEL; - cmn_err(CE_NOTE, "d%d is unavailable because 64 bit " - "metadevices are not accessible on a 32 bit kernel", - mnum); - } -#endif - - rw_exit(&ops->md_link_rw.lock); -} - -void -md_destroy_unit_incore(minor_t mnum, md_ops_t *ops) -{ - mdi_unit_t *ui; - - /* - * ASSUMPTION: md_unit_array_rw WRITER lock is held. - */ - ui = MDI_UNIT(mnum); - if (ui == NULL) - return; - - md_rem_link(MD_MIN2SET(mnum), mnum, &ops->md_link_rw.lock, - &ops->md_head); - - /* destroy the io lock if one is being used */ - if (ui->ui_io_lock) { - mutex_destroy(&ui->ui_io_lock->io_mx); - cv_destroy(&ui->ui_io_lock->io_cv); - kmem_free(ui->ui_io_lock, sizeof (md_io_lock_t)); - } - - /* teardown kstat */ - md_kstat_destroy(mnum); - - /* destroy all the incore conditional variables */ - mutex_destroy(&ui->ui_mx); - cv_destroy(&ui->ui_cv); - - kmem_free(ui, sizeof (mdi_unit_t)); - MDI_VOIDUNIT(mnum) = (void *) NULL; -} - -void -md_rem_names(sv_dev_t *sv, int nsv) -{ - int i, s; - int max_sides; - - if (nsv == 0) - return; - - /* All entries removed are in the same diskset */ - if (md_get_setstatus(sv[0].setno) & MD_SET_MNSET) - max_sides = MD_MNMAXSIDES; - else - max_sides = MD_MAXSIDES; - - for (i = 0; i < nsv; i++) - for (s = 0; s < max_sides; s++) - (void) md_remdevname(sv[i].setno, s, sv[i].key); -} - -/* - * Checking user args before we get into physio - returns 0 for ok, else errno - * We do a lot of checking against illegal arguments here because some of the - * real disk drivers don't like certain kinds of arguments. (e.g xy doesn't - * like odd address user buffer.) Those drivers capture bad arguments in - * xxread and xxwrite. But since meta-driver calls their strategy routines - * directly, two bad scenario might happen: - * 1. the real strategy doesn't like it and panic. - * 2. the real strategy doesn't like it and set B_ERROR. - * - * The second case is no better than the first one, since the meta-driver - * will treat it as a media-error and off line the mirror metapartition. - * (Too bad there is no way to tell what error it is.) - * - */ -int -md_chk_uio(struct uio *uio) -{ - int i; - struct iovec *iov; - - /* - * Check for negative or not block-aligned offset - */ - if ((uio->uio_loffset < 0) || - ((uio->uio_loffset & (DEV_BSIZE - 1)) != 0)) { - return (EINVAL); - } - iov = uio->uio_iov; - i = uio->uio_iovcnt; - - while (i--) { - if ((iov->iov_len & (DEV_BSIZE - 1)) != 0) - return (EINVAL); - /* - * Bug # 1212146 - * The default is to not check alignment, but we can now check - * for a larger number of alignments if desired. - */ - if ((uintptr_t)(iov->iov_base) & md_uio_alignment_mask) - return (EINVAL); - iov++; - } - return (0); -} - -char * -md_shortname( - minor_t mnum -) -{ - static char buf[MAXPATHLEN]; - char *devname; - char *invalid = " (Invalid minor number %u) "; - char *metaname; - mdc_unit_t *un; - side_t side; - set_t setno = MD_MIN2SET(mnum); - unit_t unit = MD_MIN2UNIT(mnum); - - if ((un = MD_UNIT(mnum)) == NULL) { - (void) snprintf(buf, sizeof (buf), invalid, mnum); - return (buf); - } - - /* - * If unit is not a friendly name unit, derive the name from the - * minor number. - */ - if ((un->un_revision & MD_FN_META_DEV) == 0) { - /* This is a traditional metadevice */ - if (setno == MD_LOCAL_SET) { - (void) snprintf(buf, sizeof (buf), "d%u", - (unsigned)unit); - } else { - (void) snprintf(buf, sizeof (buf), "%s/d%u", - mddb_getsetname(setno), (unsigned)unit); - } - return (buf); - } - - /* - * It is a friendly name metadevice, so we need to get its name. - */ - side = mddb_getsidenum(setno); - devname = (char *)kmem_alloc(MAXPATHLEN, KM_SLEEP); - if (md_getdevname(setno, side, MD_KEYWILD, - md_makedevice(md_major, mnum), devname, MAXPATHLEN) == 0) { - /* - * md_getdevname has given us either /dev/md/dsk/ - * or /dev/md//dsk/ depending on whether - * or not we are in the local set. Thus, we'll pull the - * metaname from this string. - */ - if ((metaname = strrchr(devname, '/')) == NULL) { - (void) snprintf(buf, sizeof (buf), invalid, mnum); - goto out; - } - metaname++; /* move past slash */ - if (setno == MD_LOCAL_SET) { - /* No set name. */ - (void) snprintf(buf, sizeof (buf), "%s", metaname); - } else { - /* Include setname */ - (void) snprintf(buf, sizeof (buf), "%s/%s", - mddb_getsetname(setno), metaname); - } - } else { - /* We couldn't find the name. */ - (void) snprintf(buf, sizeof (buf), invalid, mnum); - } - -out: - kmem_free(devname, MAXPATHLEN); - return (buf); -} - -char * -md_devname( - set_t setno, - md_dev64_t dev, - char *buf, - size_t size -) -{ - static char mybuf[MD_MAX_CTDLEN]; - int err; - - if (buf == NULL) { - buf = mybuf; - size = sizeof (mybuf); - } else { - ASSERT(size >= MD_MAX_CTDLEN); - } - - err = md_getdevname_common(setno, mddb_getsidenum(setno), - 0, dev, buf, size, MD_NOWAIT_LOCK); - if (err) { - if (err == ENOENT) { - (void) sprintf(buf, "(Unavailable)"); - } else { - (void) sprintf(buf, "(%u.%u)", - md_getmajor(dev), md_getminor(dev)); - } - } - - return (buf); -} -void -md_minphys(buf_t *pb) -{ - extern unsigned md_maxbcount; - - if (pb->b_bcount > md_maxbcount) - pb->b_bcount = md_maxbcount; -} - -void -md_bioinit(struct buf *bp) -{ - ASSERT(bp); - - bioinit(bp); - bp->b_back = bp; - bp->b_forw = bp; - bp->b_flags = B_BUSY; /* initialize flags */ -} - -void -md_bioreset(struct buf *bp) -{ - ASSERT(bp); - - bioreset(bp); - bp->b_back = bp; - bp->b_forw = bp; - bp->b_flags = B_BUSY; /* initialize flags */ -} - -/* - * md_bioclone is needed as long as the real bioclone only takes a daddr_t - * as block number. - * We simply call bioclone with all input parameters but blkno, and set the - * correct blkno afterwards. - * Caveat Emptor: bp_mem must not be NULL! - */ -buf_t * -md_bioclone(buf_t *bp, off_t off, size_t len, dev_t dev, diskaddr_t blkno, - int (*iodone)(buf_t *), buf_t *bp_mem, int sleep) -{ - (void) bioclone(bp, off, len, dev, 0, iodone, bp_mem, sleep); - bp_mem->b_lblkno = blkno; - return (bp_mem); -} - - -/* - * kstat stuff - */ -void -md_kstat_init_ui( - minor_t mnum, - mdi_unit_t *ui -) -{ - if ((ui != NULL) && (ui->ui_kstat == NULL)) { - set_t setno = MD_MIN2SET(mnum); - unit_t unit = MD_MIN2UNIT(mnum); - char module[KSTAT_STRLEN]; - char *p = module; - - if (setno != MD_LOCAL_SET) { - char buf[64]; - char *s = buf; - char *e = module + sizeof (module) - 4; - - (void) sprintf(buf, "%u", setno); - while ((p < e) && (*s != '\0')) - *p++ = *s++; - *p++ = '/'; - } - *p++ = 'm'; - *p++ = 'd'; - *p = '\0'; - if ((ui->ui_kstat = kstat_create(module, unit, NULL, "disk", - KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) != NULL) { - ui->ui_kstat->ks_lock = &ui->ui_mx; - kstat_install(ui->ui_kstat); - } - } -} - -void -md_kstat_init( - minor_t mnum -) -{ - md_kstat_init_ui(mnum, MDI_UNIT(mnum)); -} - -void -md_kstat_destroy_ui( - mdi_unit_t *ui -) -{ - /* - * kstat_delete() interface has it's own locking mechanism and - * does not allow holding of kstat lock (ks_lock). - * Note: ks_lock == ui_mx from the md_kstat_init_ui(). - */ - if ((ui != NULL) && (ui->ui_kstat != NULL)) { - kstat_delete(ui->ui_kstat); - ui->ui_kstat = NULL; - } -} - -void -md_kstat_destroy( - minor_t mnum -) -{ - md_kstat_destroy_ui(MDI_UNIT(mnum)); -} - -/* - * In the following subsequent routines, locks are held before checking the - * validity of ui_kstat. This is done to make sure that we don't trip over - * a NULL ui_kstat anymore. - */ - -void -md_kstat_waitq_enter( - mdi_unit_t *ui -) -{ - mutex_enter(&ui->ui_mx); - if (ui->ui_kstat != NULL) - kstat_waitq_enter(KSTAT_IO_PTR(ui->ui_kstat)); - mutex_exit(&ui->ui_mx); -} - -void -md_kstat_waitq_to_runq( - mdi_unit_t *ui -) -{ - mutex_enter(&ui->ui_mx); - if (ui->ui_kstat != NULL) - kstat_waitq_to_runq(KSTAT_IO_PTR(ui->ui_kstat)); - mutex_exit(&ui->ui_mx); -} - -void -md_kstat_waitq_exit( - mdi_unit_t *ui -) -{ - mutex_enter(&ui->ui_mx); - if (ui->ui_kstat != NULL) - kstat_waitq_exit(KSTAT_IO_PTR(ui->ui_kstat)); - mutex_exit(&ui->ui_mx); -} - -void -md_kstat_runq_enter( - mdi_unit_t *ui -) -{ - mutex_enter(&ui->ui_mx); - if (ui->ui_kstat != NULL) - kstat_runq_enter(KSTAT_IO_PTR(ui->ui_kstat)); - mutex_exit(&ui->ui_mx); -} - -void -md_kstat_runq_exit( - mdi_unit_t *ui -) -{ - mutex_enter(&ui->ui_mx); - if (ui->ui_kstat != NULL) - kstat_runq_exit(KSTAT_IO_PTR(ui->ui_kstat)); - mutex_exit(&ui->ui_mx); -} - -void -md_kstat_done( - mdi_unit_t *ui, - buf_t *bp, - int war -) -{ - size_t n_done; - - /* check for end of device */ - if ((bp->b_resid != 0) && (! (bp->b_flags & B_ERROR))) { - n_done = bp->b_bcount; - } else if (bp->b_bcount < bp->b_resid) { - n_done = 0; - } else { - n_done = bp->b_bcount - bp->b_resid; - } - - /* do accounting */ - mutex_enter(&ui->ui_mx); - if (ui->ui_kstat != NULL) { - if ((! war) && (bp->b_flags & B_READ)) { - KSTAT_IO_PTR(ui->ui_kstat)->reads++; - KSTAT_IO_PTR(ui->ui_kstat)->nread += n_done; - } else { - KSTAT_IO_PTR(ui->ui_kstat)->writes++; - KSTAT_IO_PTR(ui->ui_kstat)->nwritten += n_done; - } - kstat_runq_exit(KSTAT_IO_PTR(ui->ui_kstat)); - } - mutex_exit(&ui->ui_mx); -} - -pid_t -md_getpid() -{ - pid_t valuep; - if (drv_getparm(PPID, (pid_t *)&valuep) != 0) { - ASSERT(0); - return ((pid_t)0); - } else { - ASSERT(valuep); - return (valuep); - } -} - - -proc_t * -md_getproc() -{ - proc_t *valuep; - if (drv_getparm(UPROCP, (proc_t **)&valuep) != 0) { - ASSERT(0); - return ((proc_t *)NULL); - } else { - ASSERT(valuep); - return (valuep); - } -} - -extern kmutex_t pidlock; - -/* - * this check to see if a process pid pair are still running. For the - * disk set lock when both pid/proc are zero then the locks is not - * currently held. - */ -int -md_checkpid(pid_t pid, proc_t *proc) -{ - int retval = 1; - - if (pid == 0 && proc == NULL) - return (0); - - mutex_enter(&pidlock); - if (prfind(pid) != proc) - retval = 0; - mutex_exit(&pidlock); - return (retval); -} - -/* - * NAME: md_init_probereq - * - * DESCRIPTION: initializes a probe request. Parcels out the mnums such that - * they can be dispatched to multiple daemon threads. - * - * PARAMETERS: struct md_probedev *p pointer ioctl input - * - * RETURN VALUE: Returns errno - * - */ - -int -md_init_probereq(struct md_probedev_impl *p, daemon_queue_t **hdrpp) -{ - int err = 0; - int modindx; - intptr_t (*probe_test)(); - - /* - * Initialize the semaphores and mutex - * for the request - */ - - p->probe_sema = kmem_alloc(sizeof (ksema_t), KM_SLEEP); - - p->probe_mx = kmem_alloc(sizeof (kmutex_t), KM_SLEEP); - sema_init(PROBE_SEMA(p), 0, NULL, SEMA_DRIVER, NULL); - mutex_init(PROBE_MX(p), NULL, MUTEX_DEFAULT, NULL); - - modindx = md_getmodindex(&(p->probe.md_driver), 1, 1); - probe_test = md_get_named_service(NODEV64, modindx, - p->probe.test_name, 0); - if (probe_test == NULL) { - err = EINVAL; - goto err_out; - } - - err = md_create_probe_rqlist(p, hdrpp, probe_test); -err_out: - return (err); -} - -/* - * NAME: md_probe_one - * - * DESCRIPTION: Generic routine for probing disks. This is called from the - * daemon. - * - * PARAMETERS: probe_req_t *reqp pointer to the probe request structure. - * - */ - -void -md_probe_one(probe_req_t *reqp) -{ - mdi_unit_t *ui; - md_probedev_impl_t *p; - int err = 0; - set_t setno; - - p = (md_probedev_impl_t *)reqp->private_handle; - /* - * Validate the unit while holding the global ioctl lock, then - * obtain the unit_writerlock. Once the writerlock has been obtained - * we can release the global lock. As long as we hold one of these - * locks this will prevent a metaclear operation being performed - * on the metadevice because metaclear takes the readerlock (via - * openclose lock). - * To avoid a potential deadlock with the probe_fcn() causing i/o to - * be issued to the writerlock'd metadevice we only grab the writerlock - * if the unit is not an SVM root device. - */ - while (md_ioctl_lock_enter() == EINTR) - ; - setno = MD_MIN2SET(reqp->mnum); - ui = MDI_UNIT(reqp->mnum); - if (ui != NULL) { - int writer_grabbed; - dev_t svm_root; - - if ((setno == MD_LOCAL_SET) && root_is_svm) { - svm_root = getrootdev(); - - if (getminor(svm_root) == reqp->mnum) { - writer_grabbed = 0; - } else { - writer_grabbed = 1; - (void) md_unit_writerlock_common(ui, 0); - } - } else { - writer_grabbed = 1; - (void) md_unit_writerlock_common(ui, 0); - } - (void) md_ioctl_lock_exit(0, 0, 0, FALSE); - err = (*reqp->probe_fcn)(ui, reqp->mnum); - if (writer_grabbed) { - md_unit_writerexit(ui); - } - } else { - (void) md_ioctl_lock_exit(0, 0, 0, FALSE); - } - - /* update the info in the probe structure */ - - mutex_enter(PROBE_MX(p)); - if (err != 0) { - cmn_err(CE_NOTE, "md_probe_one: err %d mnum %d\n", err, - reqp->mnum); - (void) mdsyserror(&(p->probe.mde), err); - } - - mutex_exit(PROBE_MX(p)); - sema_v(PROBE_SEMA(p)); - - kmem_free(reqp, sizeof (probe_req_t)); -} -char * -md_strdup(char *cp) -{ - char *new_cp = NULL; - - new_cp = kmem_alloc(strlen(cp) + 1, KM_SLEEP); - - return (strcpy(new_cp, cp)); -} - -void -freestr(char *cp) -{ - kmem_free(cp, strlen(cp) + 1); -} - -/* - * Validate the list and skip invalid devices. Then create - * a doubly linked circular list of devices to probe. - * The hdr points to the head and tail of this list. - */ - -static int -md_create_probe_rqlist(md_probedev_impl_t *plist, daemon_queue_t **hdr, - intptr_t (*probe_test)()) -{ - int i, err, nodevcnt; - probe_req_t *tp; - daemon_queue_t *hp; - minor_t mnum; - - nodevcnt = 0; - - hp = NULL; - - for (i = 0; i < plist->probe.nmdevs; i++) { - mnum = ((minor_t *)(uintptr_t)(plist->probe.mnum_list))[i]; - if (MDI_UNIT(mnum) == NULL) { - cmn_err(CE_WARN, "md: Cannot probe %s since it does " - "not exist", md_shortname(mnum)); - nodevcnt++; - continue; - } - tp = kmem_alloc(sizeof (probe_req_t), KM_SLEEP); - tp->mnum = mnum; - tp->private_handle = (void *)plist; - tp->probe_fcn = probe_test; - if (hp == NULL) { - hp = (daemon_queue_t *)tp; - hp->dq_prev = hp->dq_next = (daemon_queue_t *)tp; - } else { - tp->dq.dq_next = hp; - tp->dq.dq_prev = hp->dq_prev; - hp->dq_prev->dq_next = (daemon_queue_t *)tp; - hp->dq_prev = (daemon_queue_t *)tp; - } - } - - *hdr = hp; - if (nodevcnt > 0) - plist->probe.nmdevs -= nodevcnt; - - /* - * If there are no devices to be probed because they were - * incorrect, then return an error. - */ - err = (plist->probe.nmdevs == 0) ? ENODEV : 0; - - return (err); -} - -/* - * This routine increments the I/O count for set I/O operations. This - * value is used to determine if an I/O can done. If a release is in - * process this will return an error and cause the I/O to be errored. - */ -int -md_inc_iocount(set_t setno) -{ - int rc = 0; - - if (setno == 0) - return (0); - - mutex_enter(&md_set_io[setno].md_io_mx); - if (!(md_set_io[setno].io_state & MD_SET_ACTIVE)) { - rc = EIO; - goto out; - } - - ASSERT(md_set_io[setno].io_cnt >= 0); - md_set_io[setno].io_cnt++; - -out: mutex_exit(&md_set_io[setno].md_io_mx); - return (rc); -} - -void -md_inc_iocount_noblock(set_t setno) -{ - - if (setno == 0) - return; - - mutex_enter(&md_set_io[setno].md_io_mx); - md_set_io[setno].io_cnt++; - mutex_exit(&md_set_io[setno].md_io_mx); -} -void -md_dec_iocount(set_t setno) -{ - - if (setno == 0) - return; - - mutex_enter(&md_set_io[setno].md_io_mx); - md_set_io[setno].io_cnt--; - ASSERT(md_set_io[setno].io_cnt >= 0); - if ((md_set_io[setno].io_state & MD_SET_RELEASE) && - (md_set_io[setno].io_cnt == 0)) - cv_broadcast(&md_set_io[setno].md_io_cv); - mutex_exit(&md_set_io[setno].md_io_mx); -} - -int -md_isblock_setio(set_t setno) -{ - int rc = 0; - - if (setno == 0) - return (0); - - mutex_enter(&md_set_io[setno].md_io_mx); - if (md_set_io[setno].io_state & MD_SET_RELEASE) - rc = 1; - - mutex_exit(&md_set_io[setno].md_io_mx); - return (rc); -} - -int -md_block_setio(set_t setno) -{ - int rc = 0; - - if (setno == 0) - return (1); - - mutex_enter(&md_set_io[setno].md_io_mx); - md_set_io[setno].io_state = MD_SET_RELEASE; - - while (md_set_io[setno].io_cnt > 0) { - cv_wait(&md_set_io[setno].md_io_cv, - &md_set_io[setno].md_io_mx); - } - rc = 1; - - - ASSERT(md_set_io[setno].io_cnt == 0); - mutex_exit(&md_set_io[setno].md_io_mx); - - return (rc); -} - -void -md_clearblock_setio(set_t setno) -{ - if (setno == 0) - return; - - mutex_enter(&md_set_io[setno].md_io_mx); - md_set_io[setno].io_state = MD_SET_ACTIVE; - mutex_exit(&md_set_io[setno].md_io_mx); -} - -void -md_unblock_setio(set_t setno) -{ - if (setno == 0) - return; - - mutex_enter(&md_set_io[setno].md_io_mx); -#ifdef DEBUG - if (md_set_io[setno].io_cnt != 0) { - cmn_err(CE_NOTE, "set %d count was %ld at take", - setno, md_set_io[setno].io_cnt); - } -#endif /* DEBUG */ - - md_set_io[setno].io_state = MD_SET_ACTIVE; - md_set_io[setno].io_cnt = 0; - mutex_exit(&md_set_io[setno].md_io_mx); -} - -/* - * Test and set version of the md_block_setio. - * Set the io_state to keep new I/O from being issued. - * If there is I/O currently in progress, then set io_state to active - * and return failure. Otherwise, return a 1 for success. - * - * Used in a MN diskset since the commd must be suspended before - * this node can attempt to withdraw from a diskset. But, with commd - * suspended, I/O may have been issued that can never finish until - * commd is resumed (allocation of hotspare, etc). So, if I/O is - * outstanding after diskset io_state is marked RELEASE, then set diskset - * io_state back to ACTIVE and return failure. - */ -int -md_tas_block_setio(set_t setno) -{ - int rc; - - if (setno == 0) - return (1); - - mutex_enter(&md_set_io[setno].md_io_mx); - md_set_io[setno].io_state = MD_SET_RELEASE; - - if (md_set_io[setno].io_cnt > 0) { - md_set_io[setno].io_state = MD_SET_ACTIVE; - rc = 0; - } else { - rc = 1; - } - - mutex_exit(&md_set_io[setno].md_io_mx); - - return (rc); -} - -void -md_biodone(struct buf *pb) -{ - minor_t mnum; - set_t setno; - mdi_unit_t *ui; - - mnum = getminor(pb->b_edev); - setno = MD_MIN2SET(mnum); - - if (setno == 0) { - biodone(pb); - return; - } - -#ifdef DEBUG - ui = MDI_UNIT(mnum); - if (!md_unit_isopen(ui)) - cmn_err(CE_NOTE, "io after close on %s\n", md_shortname(mnum)); -#endif /* DEBUG */ - - /* - * Handle the local diskset - */ - if (md_set_io[setno].io_cnt > 0) - md_dec_iocount(setno); - -#ifdef DEBUG - /* - * this is being done after the lock is dropped so there - * are cases it may be invalid. It is advisory. - */ - if (md_set_io[setno].io_state & MD_SET_RELEASE) { - /* Only display this error once for this metadevice */ - if ((ui->ui_tstate & MD_RELEASE_IOERR_DONE) == 0) { - cmn_err(CE_NOTE, - "I/O to %s attempted during set RELEASE\n", - md_shortname(mnum)); - ui->ui_tstate |= MD_RELEASE_IOERR_DONE; - } - } -#endif /* DEBUG */ - - biodone(pb); -} - - -/* - * Driver special private devt handling routine - * INPUT: md_dev64_t - * OUTPUT: dev_t, 32 bit on a 32 bit kernel, 64 bit on a 64 bit kernel. - */ -dev_t -md_dev64_to_dev(md_dev64_t dev) -{ - major_t major = (major_t)(dev >> NBITSMINOR64) & MAXMAJ64; - minor_t minor = (minor_t)(dev & MAXMIN64); - - return (makedevice(major, minor)); - -} - -/* - * Driver private makedevice routine - * INPUT: major_t major, minor_t minor - * OUTPUT: md_dev64_t, no matter if on 32 bit or 64 bit kernel. - */ -md_dev64_t -md_makedevice(major_t major, minor_t minor) -{ - return (((md_dev64_t)major << NBITSMINOR64) | minor); - -} - - -/* - * Driver private devt md_getmajor routine - * INPUT: dev a 64 bit container holding either a 32 bit or a 64 bit device - * OUTPUT: the appropriate major number - */ -major_t -md_getmajor(md_dev64_t dev) -{ - major_t major = (major_t)(dev >> NBITSMINOR64) & MAXMAJ64; - - if (major == 0) { - /* Here we were given a 32bit dev */ - major = (major_t)(dev >> NBITSMINOR32) & MAXMAJ32; - } - return (major); -} - -/* - * Driver private devt md_getminor routine - * INPUT: dev a 64 bit container holding either a 32 bit or a 64 bit device - * OUTPUT: the appropriate minor number - */ -minor_t -md_getminor(md_dev64_t dev) -{ - minor_t minor; - major_t major = (major_t)(dev >> NBITSMINOR64) & MAXMAJ64; - - if (major == 0) { - /* Here we were given a 32bit dev */ - minor = (minor_t)(dev & MAXMIN32); - } else { - minor = (minor_t)(dev & MAXMIN64); - } - return (minor); -} - -int -md_check_ioctl_against_unit(int cmd, mdc_unit_t c) -{ - /* - * If the metadevice is an old style device, it has a vtoc, - * in that case all reading EFI ioctls are not applicable. - * If the metadevice has an EFI label, reading vtoc and geom ioctls - * are not supposed to work. - */ - switch (cmd) { - case DKIOCGGEOM: - case DKIOCGAPART: - /* if > 2 TB then fail */ - if (c.un_total_blocks > MD_MAX_BLKS_FOR_EXTVTOC) { - return (ENOTSUP); - } - break; - case DKIOCGVTOC: - /* if > 2 TB then fail */ - if (c.un_total_blocks > MD_MAX_BLKS_FOR_EXTVTOC) { - return (ENOTSUP); - } - - /* if > 1 TB but < 2TB return overflow */ - if (c.un_revision & MD_64BIT_META_DEV) { - return (EOVERFLOW); - } - break; - case DKIOCGEXTVTOC: - /* if > 2 TB then fail */ - if (c.un_total_blocks > MD_MAX_BLKS_FOR_EXTVTOC) { - return (ENOTSUP); - } - break; - case DKIOCGETEFI: - case DKIOCPARTITION: - if ((c.un_flag & MD_EFILABEL) == 0) { - return (ENOTSUP); - } - break; - - case DKIOCSETEFI: - /* setting an EFI label should always be ok */ - return (0); - - case DKIOCSVTOC: - /* if > 2 TB then fail */ - if (c.un_total_blocks > MD_MAX_BLKS_FOR_EXTVTOC) { - return (ENOTSUP); - } - - /* if > 1 TB but < 2TB return overflow */ - if (c.un_revision & MD_64BIT_META_DEV) { - return (EOVERFLOW); - } - break; - case DKIOCSEXTVTOC: - if (c.un_total_blocks > MD_MAX_BLKS_FOR_EXTVTOC) { - return (ENOTSUP); - } - break; - } - return (0); -} - -/* - * md_vtoc_to_efi_record() - * Input: record id of the vtoc record - * Output: record id of the efi record - * Function: - * - reads the volume name from the vtoc record - * - converts the volume name to a format, libefi understands - * - creates a new record of size MD_EFI_PARTNAME_BYTES - * - stores the volname in that record, - * - commits that record - * - returns the recid of the efi record. - * Caveat Emptor: - * The calling routine must do something like - * - un->c.un_vtoc_id = md_vtoc_to_efi_record(vtoc_recid) - * - commit(un) - * - delete(vtoc_recid) - * in order to keep the mddb consistent in case of a panic in the middle. - * Errors: - * - returns 0 on any error - */ -mddb_recid_t -md_vtoc_to_efi_record(mddb_recid_t vtoc_recid, set_t setno) -{ - struct vtoc *vtoc; - ushort_t *v; - mddb_recid_t efi_recid; - int i; - - if (mddb_getrecstatus(vtoc_recid) != MDDB_OK) { - return (0); - } - vtoc = (struct vtoc *)mddb_getrecaddr(vtoc_recid); - efi_recid = mddb_createrec(MD_EFI_PARTNAME_BYTES, MDDB_EFILABEL, 0, - MD_CRO_32BIT, setno); - if (efi_recid < 0) { - return (0); - } - v = (ushort_t *)mddb_getrecaddr(efi_recid); - - /* This for loop read, converts and writes */ - for (i = 0; i < LEN_DKL_VVOL; i++) { - v[i] = LE_16((uint16_t)vtoc->v_volume[i]); - } - /* commit the new record */ - mddb_commitrec_wrapper(efi_recid); - - return (efi_recid); -} - -/* - * Send a kernel message. - * user has to provide for an allocated result structure - * If the door handler disappears we retry, emitting warnings every so often. - * - * The recipient argument is almost always unused, and is therefore typically - * set to zero, as zero is an invalid cluster nodeid. The exceptions are the - * marking and clearing of the DRL from a node that is not currently the - * owner. In these cases, the recipient argument will be the nodeid of the - * mirror owner, and MD_MSGF_DIRECTED will be set in the flags. Non-owner - * nodes will not receive these messages. - * - * For the case where md_mn_is_commd_present() is false, we simply pre-set - * the result->kmmr_comm_state to MDMNE_RPC_FAIL. - * This covers the case where the service mdcommd has been killed and so we do - * not get a 'new' result structure copied back. Instead we return with the - * supplied result field, and we need to flag a failure to the caller. - */ -int -mdmn_ksend_message( - set_t setno, - md_mn_msgtype_t type, - uint_t flags, - md_mn_nodeid_t recipient, - char *data, - int size, - md_mn_kresult_t *result) -{ - door_arg_t da; - md_mn_kmsg_t *kmsg; - uint_t send_try_cnt = 0; - uint_t retry_noise_cnt = 0; - int rval; - k_sigset_t oldmask, newmask; - - /* - * Ensure that we default to a recoverable failure state if the - * door upcall cannot pass the request on to rpc.mdcommd. - * This may occur when shutting the node down while there is still - * a mirror resync or metadevice state update occurring. - */ - result->kmmr_comm_state = MDMNE_RPC_FAIL; - result->kmmr_exitval = ~0; - - if (size > MDMN_MAX_KMSG_DATA) - return (ENOMEM); - kmsg = kmem_zalloc(sizeof (md_mn_kmsg_t), KM_SLEEP); - kmsg->kmsg_flags = flags; - kmsg->kmsg_setno = setno; - kmsg->kmsg_recipient = recipient; - kmsg->kmsg_type = type; - kmsg->kmsg_size = size; - bcopy(data, &(kmsg->kmsg_data), size); - - /* - * Wait for the door handle to be established. - */ - while (mdmn_door_did == -1) { - if ((++retry_noise_cnt % MD_MN_WARN_INTVL) == 0) { - cmn_err(CE_WARN, "door handle not yet ready. " - "Check if /usr/lib/lvm/mddoors is running"); - } - delay(md_hz); - } - - /* - * If MD_MSGF_BLK_SIGNAL is set, mask out all signals so that we - * do not fail if the user process receives a signal while we're - * active in the door interface. - */ - if (flags & MD_MSGF_BLK_SIGNAL) { - sigfillset(&newmask); - sigreplace(&newmask, &oldmask); - } - - /* - * If message failed with an RPC_FAILURE when rpc.mdcommd had - * been gracefully shutdown (md_mn_is_commd_present returns FALSE) - * then don't retry the message anymore. If message - * failed due to any other reason, then retry up to MD_MN_WARN_INTVL - * times which should allow a shutting down system time to - * notify the kernel of a graceful shutdown of rpc.mdcommd. - * - * Caller of this routine will need to check the md_mn_commd_present - * flag and the failure error in order to determine whether to panic - * or not. If md_mn_commd_present is set to 0 and failure error - * is RPC_FAILURE, the calling routine should not panic since the - * system is in the process of being shutdown. - * - */ - - retry_noise_cnt = send_try_cnt = 0; - while (md_mn_is_commd_present_lite()) { - /* - * data_ptr and data_size are initialized here because on - * return from the upcall, they contain data duplicated from - * rbuf and rsize. This causes subsequent upcalls to fail. - */ - da.data_ptr = (char *)(kmsg); - da.data_size = sizeof (md_mn_kmsg_t); - da.desc_ptr = NULL; - da.desc_num = 0; - da.rbuf = (char *)result; - da.rsize = sizeof (*result); - - while ((rval = door_ki_upcall_limited(mdmn_door_handle, &da, - NULL, SIZE_MAX, 0)) != 0) { - if ((++retry_noise_cnt % MD_MN_WARN_INTVL) == 0) { - if (rval == EAGAIN) { - cmn_err(CE_WARN, - "md: door_upcall failed. " - "Check if mddoors is running."); - } else if (rval == EINTR) { - cmn_err(CE_WARN, - "md: door_upcall failed. " - "Check if rpc.mdcommd is running."); - } else { - cmn_err(CE_WARN, - "md: door_upcall failed. " - "Returned %d", - rval); - } - } - if (++send_try_cnt >= md_send_retry_limit) - break; - - delay(md_hz); - - /* - * data_ptr and data_size are re-initialized here - * because on return from the upcall, they contain - * data duplicated from rbuf and rsize. This causes - * subsequent upcalls to fail. - */ - da.data_ptr = (char *)(kmsg); - da.data_size = sizeof (md_mn_kmsg_t); - da.desc_ptr = NULL; - da.desc_num = 0; - da.rbuf = (char *)result; - da.rsize = sizeof (*result); - } - - - /* - * If: - * - the send succeeded (MDMNE_ACK) - * - we had an MDMNE_RPC_FAIL and commd is now gone - * (note: since the outer loop is commd-dependent, - * checking MDMN_RPC_FAIL here is meaningless) - * - we were told not to retry - * - we exceeded the RPC failure send limit - * punch out of the outer loop prior to the delay() - */ - if (result->kmmr_comm_state == MDMNE_ACK || - (flags & MD_MSGF_KSEND_NORETRY) || - (++send_try_cnt % md_send_retry_limit) == 0 || - !md_mn_is_commd_present()) - break; - delay(md_hz); - } - - if (flags & MD_MSGF_BLK_SIGNAL) { - sigreplace(&oldmask, (k_sigset_t *)NULL); - } - kmem_free(kmsg, sizeof (md_mn_kmsg_t)); - - return (0); -} - -/* - * Called to propagate the capability of a metadevice to all nodes in the set. - * - * On entry, lockp is set if the function has been called from within an ioctl. - * - * IOLOCK_RETURN_RELEASE, which drops the md_ioctl_lock is called in this - * routine to enable other mdioctls to enter the kernel while this - * thread of execution waits on the completion of mdmn_ksend_message. When - * the message is completed the thread continues and md_ioctl_lock must be - * reacquired. Even though md_ioctl_lock is interruptable, we choose to - * ignore EINTR as we must not return without acquiring md_ioctl_lock. - */ - -int -mdmn_send_capability_message(minor_t mnum, volcap_t vc, IOLOCK *lockp) -{ - md_mn_msg_setcap_t msg; - md_mn_kresult_t *kres; - mdi_unit_t *ui = MDI_UNIT(mnum); - int ret; - k_sigset_t oldmask, newmask; - - (void) strncpy((char *)&msg.msg_setcap_driver, - md_ops[ui->ui_opsindex]->md_driver.md_drivername, MD_DRIVERNAMELEN); - msg.msg_setcap_mnum = mnum; - msg.msg_setcap_set = vc.vc_set; - - if (lockp) - IOLOCK_RETURN_RELEASE(0, lockp); - kres = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP); - - /* - * Mask signals for the mdmd_ksend_message call. This keeps the door - * interface from failing if the user process receives a signal while - * in mdmn_ksend_message. - */ - sigfillset(&newmask); - sigreplace(&newmask, &oldmask); - ret = (mdmn_ksend_message(MD_MIN2SET(mnum), MD_MN_MSG_SET_CAP, - MD_MSGF_NO_LOG, 0, (char *)&msg, sizeof (md_mn_msg_setcap_t), - kres)); - sigreplace(&oldmask, (k_sigset_t *)NULL); - - if (!MDMN_KSEND_MSG_OK(ret, kres)) { - mdmn_ksend_show_error(ret, kres, "MD_MN_MSG_SET_CAP"); - ret = EIO; - } - kmem_free(kres, sizeof (md_mn_kresult_t)); - - if (lockp) { - IOLOCK_RETURN_REACQUIRE(lockp); - } - return (ret); -} - -/* - * Called to clear all of the transient capabilities for a metadevice when it is - * not open on any node in the cluster - * Called from close for mirror and sp. - */ - -void -mdmn_clear_all_capabilities(minor_t mnum) -{ - md_isopen_t clumsg; - int ret; - md_mn_kresult_t *kresult; - volcap_t vc; - k_sigset_t oldmask, newmask; - - clumsg.dev = md_makedevice(md_major, mnum); - clumsg.mde = mdnullerror; - /* - * The check open message doesn't have to be logged, nor should the - * result be stored in the MCT. We want an up-to-date state. - */ - kresult = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP); - - /* - * Mask signals for the mdmd_ksend_message call. This keeps the door - * interface from failing if the user process receives a signal while - * in mdmn_ksend_message. - */ - sigfillset(&newmask); - sigreplace(&newmask, &oldmask); - ret = mdmn_ksend_message(MD_MIN2SET(mnum), - MD_MN_MSG_CLU_CHECK, - MD_MSGF_STOP_ON_ERROR | MD_MSGF_NO_LOG | MD_MSGF_NO_MCT, 0, - (char *)&clumsg, sizeof (clumsg), kresult); - sigreplace(&oldmask, (k_sigset_t *)NULL); - - if ((ret == 0) && (kresult->kmmr_exitval == 0)) { - /* - * Not open on any node, clear all capabilities, eg ABR and - * DMR - */ - vc.vc_set = 0; - (void) mdmn_send_capability_message(mnum, vc, NULL); - } - kmem_free(kresult, sizeof (md_mn_kresult_t)); -} - -/* - * mdmn_ksend_show_error: - * --------------------- - * Called to display the error contents of a failing mdmn_ksend_message() result - * - * Input: - * rv - return value from mdmn_ksend_message() - * kres - pointer to result structure filled in by mdmn_ksend_message - * s - Informative message to identify failing condition (e.g. - * "Ownership change") This string will be displayed with - * cmn_err(CE_WARN, "%s *FAILED*",...) to alert the system - * administrator - */ -void -mdmn_ksend_show_error(int rv, md_mn_kresult_t *kres, const char *s) -{ - if (rv == 0) { - cmn_err(CE_WARN, "%s *FAILED*", s); - cmn_err(CE_CONT, "exit_val = %d, comm_state = %d, failing_node" - " = %d", kres->kmmr_exitval, kres->kmmr_comm_state, - kres->kmmr_failing_node); - } else { - cmn_err(CE_WARN, "%s *FAILED*, return value = %d", s, rv); - } -} - -/* - * Callback routine for resync thread. If requested to suspend we mark the - * commd as not being present. - */ -boolean_t -callb_md_mrs_cpr(void *arg, int code) -{ - callb_cpr_t *cp = (callb_cpr_t *)arg; - int ret = 0; /* assume success */ - clock_t delta; - - mutex_enter(cp->cc_lockp); - - switch (code) { - case CB_CODE_CPR_CHKPT: - /* - * Mark the rpc.mdcommd as no longer present. We are trying to - * suspend the system and so we should expect RPC failures to - * occur. - */ - md_mn_clear_commd_present(); - cp->cc_events |= CALLB_CPR_START; - delta = CPR_KTHREAD_TIMEOUT_SEC * hz; - while (!(cp->cc_events & CALLB_CPR_SAFE)) - /* cv_timedwait() returns -1 if it times out. */ - if ((ret = cv_reltimedwait(&cp->cc_callb_cv, - cp->cc_lockp, delta, TR_CLOCK_TICK)) == -1) - break; - break; - - case CB_CODE_CPR_RESUME: - cp->cc_events &= ~CALLB_CPR_START; - cv_signal(&cp->cc_stop_cv); - break; - } - mutex_exit(cp->cc_lockp); - return (ret != -1); -} - - -void -md_rem_hspname(set_t setno, mdkey_t n_key) -{ - int s; - int max_sides; - - - /* All entries removed are in the same diskset */ - if (md_get_setstatus(setno) & MD_SET_MNSET) - max_sides = MD_MNMAXSIDES; - else - max_sides = MD_MAXSIDES; - - for (s = 0; s < max_sides; s++) - (void) md_remdevname(setno, s, n_key); -} - - -int -md_rem_selfname(minor_t selfid) -{ - int s; - set_t setno = MD_MIN2SET(selfid); - int max_sides; - md_dev64_t dev; - struct nm_next_hdr *nh; - struct nm_name *n; - mdkey_t key; - - /* - * Get the key since remove routine expects it - */ - dev = md_makedevice(md_major, selfid); - if ((nh = get_first_record(setno, 0, NM_NOTSHARED)) == NULL) { - return (ENOENT); - } - - if ((n = (struct nm_name *)lookup_entry(nh, setno, MD_SIDEWILD, - MD_KEYWILD, dev, 0L)) == NULL) { - return (ENOENT); - } - - /* All entries removed are in the same diskset */ - key = n->n_key; - if (md_get_setstatus(setno) & MD_SET_MNSET) - max_sides = MD_MNMAXSIDES; - else - max_sides = MD_MAXSIDES; - - for (s = 0; s < max_sides; s++) - (void) md_remdevname(setno, s, key); - - return (0); -} - -void -md_upd_set_unnext(set_t setno, unit_t un) -{ - if (un < md_set[setno].s_un_next) { - md_set[setno].s_un_next = un; - } -} - -struct hot_spare_pool * -find_hot_spare_pool(set_t setno, int hsp_id) -{ - hot_spare_pool_t *hsp; - - hsp = (hot_spare_pool_t *)md_set[setno].s_hsp; - while (hsp != NULL) { - if (hsp->hsp_self_id == hsp_id) - return (hsp); - hsp = hsp->hsp_next; - } - - return ((hot_spare_pool_t *)0); -} - -/* - * md_create_taskq: - * - * Create a kernel taskq for the given set/unit combination. This is typically - * used to complete a RR_CLEAN request when the callee is unable to obtain the - * mutex / condvar access required to update the DRL safely. - */ -void * -md_create_taskq(set_t setno, minor_t mnum) -{ - char name[20]; - ddi_taskq_t *tqp; - - (void) snprintf(name, 20, "%d/d%d", setno, MD_MIN2UNIT(mnum)); - - tqp = ddi_taskq_create(md_devinfo, name, 1, TASKQ_DEFAULTPRI, 0); - - return ((void *)tqp); -} diff --git a/usr/src/uts/common/io/lvm/mirror/mirror.c b/usr/src/uts/common/io/lvm/mirror/mirror.c deleted file mode 100644 index e7893ca8c027..000000000000 --- a/usr/src/uts/common/io/lvm/mirror/mirror.c +++ /dev/null @@ -1,5853 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011 Bayard G. Bell. All rights reserved. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -md_ops_t mirror_md_ops; -#ifndef lint -md_ops_t *md_interface_ops = &mirror_md_ops; -#endif - -extern mdq_anchor_t md_done_daemon; -extern mdq_anchor_t md_mstr_daemon; -extern mdq_anchor_t md_mirror_daemon; -extern mdq_anchor_t md_mirror_io_daemon; -extern mdq_anchor_t md_mirror_rs_daemon; -extern mdq_anchor_t md_mhs_daemon; - -extern unit_t md_nunits; -extern set_t md_nsets; -extern md_set_t md_set[]; - -extern int md_status; -extern clock_t md_hz; - -extern md_krwlock_t md_unit_array_rw; -extern kmutex_t md_mx; -extern kcondvar_t md_cv; -extern int md_mtioctl_cnt; - -daemon_request_t mirror_timeout; -static daemon_request_t hotspare_request; -static daemon_request_t mn_hs_request[MD_MAXSETS]; /* Multinode hs req */ - -int md_mirror_mcs_buf_off; - -/* Flags for mdmn_ksend_message to allow debugging */ -int md_mirror_msg_flags; - -#ifdef DEBUG -/* Flag to switch on debug messages */ -int mirror_debug_flag = 0; -#endif - -/* - * Struct used to hold count of DMR reads and the timestamp of last DMR read - * It is used to verify, using a debugger, that the DMR read ioctl has been - * executed. - */ -dmr_stats_t mirror_dmr_stats = {0, 0}; - -/* - * Mutex protecting list of non-failfast drivers. - */ -static kmutex_t non_ff_drv_mutex; -extern char **non_ff_drivers; - -extern major_t md_major; - -/* - * Write-On-Write memory pool. - */ -static void copy_write_cont(wowhdr_t *wowhdr); -static kmem_cache_t *mirror_wowblk_cache = NULL; -static int md_wowbuf_size = 16384; -static size_t md_wowblk_size; - -/* - * This is a flag that allows: - * - disabling the write-on-write mechanism. - * - logging occurrences of write-on-write - * - switching wow handling procedure processing - * Counter for occurences of WOW. - */ -static uint_t md_mirror_wow_flg = 0; -static int md_mirror_wow_cnt = 0; - -/* - * Tunable to enable/disable dirty region - * processing when closing down a mirror. - */ -static int new_resync = 1; -kmem_cache_t *mirror_parent_cache = NULL; -kmem_cache_t *mirror_child_cache = NULL; - -extern int md_ff_disable; /* disable failfast */ - -static int mirror_map_write(mm_unit_t *, md_mcs_t *, md_mps_t *, int); -static void mirror_read_strategy(buf_t *, int, void *); -static void mirror_write_strategy(buf_t *, int, void *); -static void become_owner(daemon_queue_t *); -static int mirror_done(struct buf *cb); -static int mirror_done_common(struct buf *cb); -static void clear_retry_error(struct buf *cb); - -/* - * patchables - */ -int md_min_rr_size = 200; /* 2000 blocks, or 100k */ -int md_def_num_rr = 1000; /* Default number of dirty regions */ - -/* - * patchable to change delay before rescheduling mirror ownership request. - * Value is clock ticks, default 0.5 seconds - */ -clock_t md_mirror_owner_to = 500000; - -/*ARGSUSED1*/ -static int -mirror_parent_constructor(void *p, void *d1, int d2) -{ - mutex_init(&((md_mps_t *)p)->ps_mx, NULL, MUTEX_DEFAULT, NULL); - return (0); -} - -static void -mirror_parent_init(md_mps_t *ps) -{ - bzero(ps, offsetof(md_mps_t, ps_mx)); - bzero(&ps->ps_overlap_node, sizeof (avl_node_t)); -} - -/*ARGSUSED1*/ -static void -mirror_parent_destructor(void *p, void *d) -{ - mutex_destroy(&((md_mps_t *)p)->ps_mx); -} - -/*ARGSUSED1*/ -static int -mirror_child_constructor(void *p, void *d1, int d2) -{ - bioinit(&((md_mcs_t *)p)->cs_buf); - return (0); -} - -void -mirror_child_init(md_mcs_t *cs) -{ - cs->cs_ps = NULL; - cs->cs_mdunit = 0; - md_bioreset(&cs->cs_buf); -} - -/*ARGSUSED1*/ -static void -mirror_child_destructor(void *p, void *d) -{ - biofini(&((md_mcs_t *)p)->cs_buf); -} - -static void -mirror_wowblk_init(wowhdr_t *p) -{ - bzero(p, md_wowblk_size); -} - -static void -send_poke_hotspares_msg(daemon_request_t *drq) -{ - int rval; - int nretries = 0; - md_mn_msg_pokehsp_t pokehsp; - md_mn_kresult_t *kresult; - set_t setno = (set_t)drq->dq.qlen; - - pokehsp.pokehsp_setno = setno; - - kresult = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP); - -retry_sphmsg: - rval = mdmn_ksend_message(setno, MD_MN_MSG_POKE_HOTSPARES, - MD_MSGF_NO_LOG | MD_MSGF_NO_BCAST, 0, (char *)&pokehsp, - sizeof (pokehsp), kresult); - - if (!MDMN_KSEND_MSG_OK(rval, kresult)) { - mdmn_ksend_show_error(rval, kresult, "POKE_HOTSPARES"); - /* If we're shutting down already, pause things here. */ - if (kresult->kmmr_comm_state == MDMNE_RPC_FAIL) { - while (!md_mn_is_commd_present()) { - delay(md_hz); - } - /* - * commd has become reachable again, so retry once. - * If this fails we'll panic as the system is in an - * unexpected state. - */ - if (nretries++ == 0) - goto retry_sphmsg; - } - cmn_err(CE_PANIC, - "ksend_message failure: POKE_HOTSPARES"); - } - kmem_free(kresult, sizeof (md_mn_kresult_t)); - - /* Allow further requests to use this set's queue structure */ - mutex_enter(&drq->dr_mx); - drq->dr_pending = 0; - mutex_exit(&drq->dr_mx); -} - -/* - * Send a poke_hotspares message to the master node. To avoid swamping the - * commd handler with requests we only send a message if there is not one - * already outstanding. We punt the request to a separate thread context as - * cannot afford to block waiting on the request to be serviced. This is - * essential when a reconfig cycle is in progress as any open() of a multinode - * metadevice may result in a livelock. - */ -static void -send_poke_hotspares(set_t setno) -{ - daemon_request_t *drq = &mn_hs_request[setno]; - - mutex_enter(&drq->dr_mx); - if (drq->dr_pending == 0) { - drq->dr_pending = 1; - drq->dq.qlen = (int)setno; - daemon_request(&md_mhs_daemon, - send_poke_hotspares_msg, (daemon_queue_t *)drq, REQ_OLD); - } - mutex_exit(&drq->dr_mx); -} - -void -mirror_set_sm_state( - mm_submirror_t *sm, - mm_submirror_ic_t *smic, - sm_state_t newstate, - int force) -{ - int compcnt; - int i; - int errcnt; - sm_state_t origstate; - md_m_shared_t *shared; - - if (force) { - sm->sm_state = newstate; - uniqtime32(&sm->sm_timestamp); - return; - } - - origstate = newstate; - - compcnt = (*(smic->sm_get_component_count))(sm->sm_dev, sm); - for (i = 0, errcnt = 0; i < compcnt; i++) { - shared = (md_m_shared_t *)(*(smic->sm_shared_by_indx)) - (sm->sm_dev, sm, i); - if (shared->ms_state & (CS_ERRED | CS_LAST_ERRED)) - newstate |= SMS_COMP_ERRED; - if (shared->ms_state & (CS_RESYNC)) - newstate |= SMS_COMP_RESYNC; - if (shared->ms_state & CS_ERRED) - errcnt++; - } - - if ((newstate & (SMS_COMP_ERRED | SMS_COMP_RESYNC)) != 0) - newstate &= ~origstate; - - if (errcnt == compcnt) - newstate |= SMS_ALL_ERRED; - else - newstate &= ~SMS_ALL_ERRED; - - sm->sm_state = newstate; - uniqtime32(&sm->sm_timestamp); -} - -static int -mirror_geterror(mm_unit_t *un, int *smi, int *cip, int clr_error, - int frm_probe) -{ - mm_submirror_t *sm; - mm_submirror_ic_t *smic; - md_m_shared_t *shared; - int ci; - int i; - int compcnt; - int open_comp; /* flag for open component */ - - for (i = *smi; i < NMIRROR; i++) { - sm = &un->un_sm[i]; - smic = &un->un_smic[i]; - - if (!SMS_IS(sm, SMS_INUSE)) - continue; - - compcnt = (*(smic->sm_get_component_count)) (sm->sm_dev, un); - for (ci = *cip; ci < compcnt; ci++) { - shared = (md_m_shared_t *)(*(smic->sm_shared_by_indx)) - (sm->sm_dev, sm, ci); - /* - * if called from any routine but probe, we check for - * MDM_S_ISOPEN flag. Since probe does a pseduo open, - * it sets MDM_S_PROBEOPEN flag and we test for this - * flag. They are both exclusive tests. - */ - open_comp = (frm_probe) ? - (shared->ms_flags & MDM_S_PROBEOPEN): - (shared->ms_flags & MDM_S_ISOPEN); - if (((shared->ms_flags & MDM_S_IOERR || !open_comp) && - ((shared->ms_state == CS_OKAY) || - (shared->ms_state == CS_RESYNC))) || - (!open_comp && - (shared->ms_state == CS_LAST_ERRED))) { - if (clr_error) { - shared->ms_flags &= ~MDM_S_IOERR; - } - *cip = ci; - *smi = i; - return (1); - } - - if (clr_error && (shared->ms_flags & MDM_S_IOERR)) { - shared->ms_flags &= ~MDM_S_IOERR; - } - } - - *cip = 0; - } - return (0); -} - -/*ARGSUSED*/ -static void -mirror_run_queue(void *d) -{ - if (!(md_status & MD_GBL_DAEMONS_LIVE)) - md_daemon(1, &md_done_daemon); -} -/* - * check_comp_4_hotspares - * - * This function attempts to allocate a hotspare for this component if the - * component is in error. In a MN set, the function can be called in 2 modes. - * It can be called either when a component error has been detected or when a - * new hotspare has been allocated. In this case, MD_HOTSPARE_XMIT is set - * in flags and the request is sent to all nodes. - * The handler on each of the nodes then calls this function with - * MD_HOTSPARE_XMIT unset and the hotspare allocation is then performed. - * - * For non-MN sets the function simply attempts to allocate a hotspare. - * - * On entry, the following locks are held - * mirror_md_ops.md_link_rw (if flags has MD_HOTSPARE_LINKHELD set) - * md_unit_writerlock - * - * Returns 0 if ok - * 1 if the unit containing the component has been cleared while - * the mdmn_ksend_message() was being executed - */ -extern int -check_comp_4_hotspares( - mm_unit_t *un, - int smi, - int ci, - uint_t flags, - mddb_recid_t hs_id, /* Only used by MN disksets */ - IOLOCK *lockp /* can be NULL */ -) -{ - mm_submirror_t *sm; - mm_submirror_ic_t *smic; - md_m_shared_t *shared; - mddb_recid_t recids[6]; - minor_t mnum; - intptr_t (*hs_dev)(); - void (*hs_done)(); - void *hs_data; - md_error_t mde = mdnullerror; - set_t setno; - md_mn_msg_allochsp_t allochspmsg; - md_mn_kresult_t *kresult; - mm_unit_t *new_un; - int rval; - int nretries = 0; - - mnum = MD_SID(un); - setno = MD_UN2SET(un); - sm = &un->un_sm[smi]; - smic = &un->un_smic[smi]; - shared = (md_m_shared_t *)(*(smic->sm_shared_by_indx)) - (sm->sm_dev, sm, ci); - - if (shared->ms_state != CS_ERRED) - return (0); - - /* Don't start a new component resync if a resync is already running. */ - if (MD_STATUS(un) & MD_UN_RESYNC_ACTIVE) - return (0); - - if (MD_MNSET_SETNO(setno) && (flags & MD_HOTSPARE_XMIT)) { - uint_t msgflags; - md_mn_msgtype_t msgtype; - - /* Send allocate hotspare message to all nodes */ - - allochspmsg.msg_allochsp_mnum = un->c.un_self_id; - allochspmsg.msg_allochsp_sm = smi; - allochspmsg.msg_allochsp_comp = ci; - allochspmsg.msg_allochsp_hs_id = shared->ms_hs_id; - - /* - * Before calling mdmn_ksend_message(), release locks - * Can never be in the context of an ioctl. - */ - md_unit_writerexit(MDI_UNIT(mnum)); - if (flags & MD_HOTSPARE_LINKHELD) - rw_exit(&mirror_md_ops.md_link_rw.lock); -#ifdef DEBUG - if (mirror_debug_flag) - printf("send alloc hotspare, flags=" - "0x%x %x, %x, %x, %x\n", flags, - allochspmsg.msg_allochsp_mnum, - allochspmsg.msg_allochsp_sm, - allochspmsg.msg_allochsp_comp, - allochspmsg.msg_allochsp_hs_id); -#endif - if (flags & MD_HOTSPARE_WMUPDATE) { - msgtype = MD_MN_MSG_ALLOCATE_HOTSPARE2; - /* - * When coming from an update of watermarks, there - * must already be a message logged that triggered - * this action. So, no need to log this message, too. - */ - msgflags = MD_MSGF_NO_LOG; - } else { - msgtype = MD_MN_MSG_ALLOCATE_HOTSPARE; - msgflags = MD_MSGF_DEFAULT_FLAGS; - } - - kresult = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP); - -cc4hs_msg: - rval = mdmn_ksend_message(setno, msgtype, msgflags, 0, - (char *)&allochspmsg, sizeof (allochspmsg), - kresult); - - if (!MDMN_KSEND_MSG_OK(rval, kresult)) { -#ifdef DEBUG - if (mirror_debug_flag) - mdmn_ksend_show_error(rval, kresult, - "ALLOCATE HOTSPARE"); -#endif - /* - * If message is sent ok but exitval indicates an error - * it must be because the mirror has been cleared. In - * this case re-obtain lock and return an error - */ - if ((rval == 0) && (kresult->kmmr_exitval != 0)) { - if (flags & MD_HOTSPARE_LINKHELD) { - rw_enter(&mirror_md_ops.md_link_rw.lock, - RW_READER); - } - kmem_free(kresult, sizeof (md_mn_kresult_t)); - return (1); - } - /* If we're shutting down already, pause things here. */ - if (kresult->kmmr_comm_state == MDMNE_RPC_FAIL) { - while (!md_mn_is_commd_present()) { - delay(md_hz); - } - /* - * commd has become reachable again, so retry - * once. If this fails we'll panic as the - * system is in an unexpected state. - */ - if (nretries++ == 0) - goto cc4hs_msg; - } - cmn_err(CE_PANIC, - "ksend_message failure: ALLOCATE_HOTSPARE"); - } - kmem_free(kresult, sizeof (md_mn_kresult_t)); - - /* - * re-obtain the locks - */ - if (flags & MD_HOTSPARE_LINKHELD) - rw_enter(&mirror_md_ops.md_link_rw.lock, RW_READER); - new_un = md_unit_writerlock(MDI_UNIT(mnum)); - - /* - * As we had to release the locks in order to send the - * message to all nodes, we need to check to see if the - * unit has changed. If it has we release the writerlock - * and return fail. - */ - if ((new_un != un) || (un->c.un_type != MD_METAMIRROR)) { - md_unit_writerexit(MDI_UNIT(mnum)); - return (1); - } - } else { - if (MD_MNSET_SETNO(setno)) { - /* - * If 2 or more nodes simultaneously see a - * component failure, these nodes will each - * send an ALLOCATE_HOTSPARE[2] message. - * The first message will allocate the hotspare - * and the subsequent messages should do nothing. - * - * If a slave node doesn't have a hotspare allocated - * at the time the message is initiated, then the - * passed in hs_id will be 0. If the node - * executing this routine has a component shared - * ms_hs_id of non-zero, but the message shows a - * hs_id of 0, then just return since a hotspare - * has already been allocated for this failing - * component. When the slave node returns from - * the ksend_message the hotspare will have - * already been allocated. - * - * If the slave node does send an hs_id of non-zero, - * and the slave node's hs_id matches this node's - * ms_hs_id, then the hotspare has error'd and - * should be replaced. - * - * If the slave node sends an hs_id of non-zero and - * this node has a different shared ms_hs_id, then - * just return since this hotspare has already - * been hotspared. - */ - if (shared->ms_hs_id != 0) { - if (hs_id == 0) { -#ifdef DEBUG - if (mirror_debug_flag) { - printf("check_comp_4_hotspares" - "(NOXMIT), short circuit " - "hs_id=0x%x, " - "ms_hs_id=0x%x\n", - hs_id, shared->ms_hs_id); - } -#endif - return (0); - } - if (hs_id != shared->ms_hs_id) { -#ifdef DEBUG - if (mirror_debug_flag) { - printf("check_comp_4_hotspares" - "(NOXMIT), short circuit2 " - "hs_id=0x%x, " - "ms_hs_id=0x%x\n", - hs_id, shared->ms_hs_id); - } -#endif - return (0); - } - } - } - - sm = &un->un_sm[smi]; - hs_dev = md_get_named_service(sm->sm_dev, 0, - "hotspare device", 0); - if ((*hs_dev)(sm->sm_dev, 0, ci, recids, 6, &hs_done, - &hs_data) != 0) - return (0); - - /* - * set_sm_comp_state() commits the modified records. - * As we don't transmit the changes, no need to drop the lock. - */ - set_sm_comp_state(un, smi, ci, CS_RESYNC, recids, - MD_STATE_NO_XMIT, (IOLOCK *)NULL); - - (*hs_done)(sm->sm_dev, hs_data); - - mirror_check_failfast(mnum); - - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_HOTSPARED, SVM_TAG_METADEVICE, - setno, MD_SID(un)); - - /* - * For a multi-node set we need to reset the un_rs_type, - * un_rs_resync_done and un_rs_resync_2_do fields as the - * hot-spare resync must copy all applicable data. - */ - if (MD_MNSET_SETNO(setno)) { - un->un_rs_type = MD_RS_NONE; - un->un_rs_resync_done = 0; - un->un_rs_resync_2_do = 0; - } - - /* - * Must drop writer lock since mirror_resync_unit will - * open devices and must be able to grab readerlock. - * Don't need to drop IOLOCK since any descendent routines - * calling ksend_messages will drop the IOLOCK as needed. - * - */ - if (lockp) { - md_ioctl_writerexit(lockp); - } else { - md_unit_writerexit(MDI_UNIT(mnum)); - } - - /* start resync */ - (void) mirror_resync_unit(mnum, NULL, &mde, lockp); - - if (lockp) { - new_un = md_ioctl_writerlock(lockp, MDI_UNIT(mnum)); - } else { - new_un = md_unit_writerlock(MDI_UNIT(mnum)); - } - } - return (0); -} - -/* - * check_unit_4_hotspares - * - * For a given mirror, allocate hotspares, if available for any components - * that are in error - * - * Returns 0 if ok - * 1 if check_comp_4_hotspares returns non-zero. This will only - * happen for a MN unit where the unit has been cleared while - * the allocate hotspare message is sent to all nodes. - */ -static int -check_unit_4_hotspares(mm_unit_t *un, int flags) -{ - mm_submirror_t *sm; - mm_submirror_ic_t *smic; - int ci; - int i; - int compcnt; - - if (MD_STATUS(un) & MD_UN_RESYNC_ACTIVE) - return (0); - - for (i = 0; i < NMIRROR; i++) { - sm = &un->un_sm[i]; - smic = &un->un_smic[i]; - if (!SMS_IS(sm, SMS_INUSE)) - continue; - compcnt = (*(smic->sm_get_component_count)) (sm->sm_dev, sm); - for (ci = 0; ci < compcnt; ci++) { - md_m_shared_t *shared; - - shared = (md_m_shared_t *) - (*(smic->sm_shared_by_indx))(sm->sm_dev, sm, ci); - /* - * Never called from ioctl context, so pass in - * (IOLOCK *)NULL. Pass through flags from calling - * routine, also setting XMIT flag. - */ - if (check_comp_4_hotspares(un, i, ci, - (MD_HOTSPARE_XMIT | flags), - shared->ms_hs_id, (IOLOCK *)NULL) != 0) - return (1); - } - } - return (0); -} - -static void -check_4_hotspares(daemon_request_t *drq) -{ - mdi_unit_t *ui; - mm_unit_t *un; - md_link_t *next; - int x; - - mutex_enter(&drq->dr_mx); /* clear up front so can poke */ - drq->dr_pending = 0; /* again in low level routine if */ - mutex_exit(&drq->dr_mx); /* something found to do */ - - /* - * Used to have a problem here. The disksets weren't marked as being - * MNHOLD. This opened a window where we could be searching for - * hotspares and have the disk set unloaded (released) from under - * us causing a panic in stripe_component_count(). - * The way to prevent that is to mark the set MNHOLD which prevents - * any diskset from being released while we are scanning the mirrors, - * submirrors and components. - */ - - for (x = 0; x < md_nsets; x++) - md_holdset_enter(x); - - rw_enter(&mirror_md_ops.md_link_rw.lock, RW_READER); - for (next = mirror_md_ops.md_head; next != NULL; next = next->ln_next) { - ui = MDI_UNIT(next->ln_id); - - un = (mm_unit_t *)md_unit_readerlock(ui); - - /* - * Only check the unit if we are the master for this set - * For an MN set, poke_hotspares() is only effective on the - * master - */ - if (MD_MNSET_SETNO(MD_UN2SET(un)) && - md_set[MD_UN2SET(un)].s_am_i_master == 0) { - md_unit_readerexit(ui); - continue; - } - if (MD_STATUS(un) & MD_UN_RESYNC_ACTIVE) { - md_unit_readerexit(ui); - continue; - } - md_unit_readerexit(ui); - - un = (mm_unit_t *)md_unit_writerlock(ui); - /* - * check_unit_4_hotspares will exit 1 if the unit has been - * removed during the process of allocating the hotspare. - * This can only happen for a MN metadevice. If unit no longer - * exists, no need to release writerlock - */ - if (check_unit_4_hotspares(un, MD_HOTSPARE_LINKHELD) == 0) - md_unit_writerexit(ui); - else { - /* - * If check_unit_4_hotspares failed, queue another - * request and break out of this one - */ - (void) poke_hotspares(); - break; - } - } - rw_exit(&mirror_md_ops.md_link_rw.lock); - - for (x = 0; x < md_nsets; x++) - md_holdset_exit(x); -} - -/* - * poke_hotspares - * - * If there is not a pending poke_hotspares request pending, queue a requent - * to call check_4_hotspares(). This will scan all mirrors and attempt to - * allocate hotspares for all components in error. - */ -int -poke_hotspares() -{ - mutex_enter(&hotspare_request.dr_mx); - if (hotspare_request.dr_pending == 0) { - hotspare_request.dr_pending = 1; - daemon_request(&md_mhs_daemon, - check_4_hotspares, (daemon_queue_t *)&hotspare_request, - REQ_OLD); - } - mutex_exit(&hotspare_request.dr_mx); - return (0); -} - -static void -free_all_ecomps(err_comp_t *ecomp) -{ - err_comp_t *d; - - while (ecomp != NULL) { - d = ecomp; - ecomp = ecomp->ec_next; - kmem_free(d, sizeof (err_comp_t)); - } -} - -/* - * NAME: mirror_openfail_console_info - * - * DESCRIPTION: Prints a informative message to the console when mirror - * cannot be opened. - * - * PARAMETERS: mm_unit_t un - pointer to mirror unit structure - * int smi - submirror index - * int ci - component index - */ - -void -mirror_openfail_console_info(mm_unit_t *un, int smi, int ci) -{ - void (*get_dev)(); - ms_cd_info_t cd; - md_dev64_t tmpdev; - - tmpdev = un->un_sm[smi].sm_dev; - get_dev = (void (*)())md_get_named_service(tmpdev, 0, "get device", 0); - if (get_dev != NULL) { - (void) (*get_dev)(tmpdev, smi, ci, &cd); - cmn_err(CE_WARN, "md %s: open error on %s", - md_shortname(MD_SID(un)), md_devname(MD_UN2SET(un), - cd.cd_dev, NULL, 0)); - } else { - cmn_err(CE_WARN, "md %s: open error", - md_shortname(MD_SID(un))); - } -} - -static int -mirror_close_all_devs(mm_unit_t *un, int md_cflags) -{ - int i; - md_dev64_t dev; - - for (i = 0; i < NMIRROR; i++) { - if (!SMS_BY_INDEX_IS(un, i, SMS_INUSE)) - continue; - dev = un->un_sm[i].sm_dev; - md_layered_close(dev, md_cflags); - } - return (0); -} - -/* - * Keep track of drivers that don't support failfast. We use this so that - * we only log one diagnostic message for each of these drivers, no matter - * how many times we run the mirror_check_failfast function. - * Return 1 if this is a new driver that does not support failfast, - * return 0 if we have already seen this non-failfast driver. - */ -static int -new_non_ff_driver(const char *s) -{ - mutex_enter(&non_ff_drv_mutex); - if (non_ff_drivers == NULL) { - non_ff_drivers = (char **)kmem_alloc(2 * sizeof (char *), - KM_NOSLEEP); - if (non_ff_drivers == NULL) { - mutex_exit(&non_ff_drv_mutex); - return (1); - } - - non_ff_drivers[0] = (char *)kmem_alloc(strlen(s) + 1, - KM_NOSLEEP); - if (non_ff_drivers[0] == NULL) { - kmem_free(non_ff_drivers, 2 * sizeof (char *)); - non_ff_drivers = NULL; - mutex_exit(&non_ff_drv_mutex); - return (1); - } - - (void) strcpy(non_ff_drivers[0], s); - non_ff_drivers[1] = NULL; - - } else { - int i; - char **tnames; - char **tmp; - - for (i = 0; non_ff_drivers[i] != NULL; i++) { - if (strcmp(s, non_ff_drivers[i]) == 0) { - mutex_exit(&non_ff_drv_mutex); - return (0); - } - } - - /* allow for new element and null */ - i += 2; - tnames = (char **)kmem_alloc(i * sizeof (char *), KM_NOSLEEP); - if (tnames == NULL) { - mutex_exit(&non_ff_drv_mutex); - return (1); - } - - for (i = 0; non_ff_drivers[i] != NULL; i++) - tnames[i] = non_ff_drivers[i]; - - tnames[i] = (char *)kmem_alloc(strlen(s) + 1, KM_NOSLEEP); - if (tnames[i] == NULL) { - /* adjust i so that it is the right count to free */ - kmem_free(tnames, (i + 2) * sizeof (char *)); - mutex_exit(&non_ff_drv_mutex); - return (1); - } - - (void) strcpy(tnames[i++], s); - tnames[i] = NULL; - - tmp = non_ff_drivers; - non_ff_drivers = tnames; - /* i now represents the count we previously alloced */ - kmem_free(tmp, i * sizeof (char *)); - } - mutex_exit(&non_ff_drv_mutex); - - return (1); -} - -/* - * Check for the "ddi-failfast-supported" devtree property on each submirror - * component to indicate if we should do I/O to that submirror with the - * B_FAILFAST flag set or not. This check is made at various state transitions - * in the mirror code (e.g. open, enable, hotspare, etc.). Sometimes we - * only need to check one drive (e.g. hotspare) but since the check is - * fast and infrequent and sometimes needs to be done on all components we - * just check all components on each call. - */ -void -mirror_check_failfast(minor_t mnum) -{ - int i; - mm_unit_t *un; - - if (md_ff_disable) - return; - - un = MD_UNIT(mnum); - - for (i = 0; i < NMIRROR; i++) { - int ci; - int cnt; - int ff = 1; - mm_submirror_t *sm; - mm_submirror_ic_t *smic; - void (*get_dev)(); - - if (!SMS_BY_INDEX_IS(un, i, SMS_INUSE)) - continue; - - sm = &un->un_sm[i]; - smic = &un->un_smic[i]; - - get_dev = (void (*)())md_get_named_service(sm->sm_dev, 0, - "get device", 0); - - cnt = (*(smic->sm_get_component_count))(sm->sm_dev, sm); - for (ci = 0; ci < cnt; ci++) { - int found = 0; - dev_t ci_dev; - major_t major; - dev_info_t *devi; - ms_cd_info_t cd; - - /* - * this already returns the hs - * dev if the device is spared - */ - (void) (*get_dev)(sm->sm_dev, sm, ci, &cd); - - ci_dev = md_dev64_to_dev(cd.cd_dev); - major = getmajor(ci_dev); - - if (major == md_major) { - /* - * this component must be a soft - * partition; get the real dev - */ - minor_t dev_mnum; - mdi_unit_t *ui; - mp_unit_t *un; - set_t setno; - side_t side; - md_dev64_t tmpdev; - - ui = MDI_UNIT(getminor(ci_dev)); - - /* grab necessary lock */ - un = (mp_unit_t *)md_unit_readerlock(ui); - - dev_mnum = MD_SID(un); - setno = MD_MIN2SET(dev_mnum); - side = mddb_getsidenum(setno); - - tmpdev = un->un_dev; - - /* Get dev by device id */ - if (md_devid_found(setno, side, - un->un_key) == 1) { - tmpdev = md_resolve_bydevid(dev_mnum, - tmpdev, un->un_key); - } - - md_unit_readerexit(ui); - - ci_dev = md_dev64_to_dev(tmpdev); - major = getmajor(ci_dev); - } - - if (ci_dev != NODEV32 && - (devi = e_ddi_hold_devi_by_dev(ci_dev, 0)) - != NULL) { - ddi_prop_op_t prop_op = PROP_LEN_AND_VAL_BUF; - int propvalue = 0; - int proplength = sizeof (int); - int error; - struct cb_ops *cb; - - if ((cb = devopsp[major]->devo_cb_ops) != - NULL) { - error = (*cb->cb_prop_op) - (DDI_DEV_T_ANY, devi, prop_op, - DDI_PROP_NOTPROM|DDI_PROP_DONTPASS, - "ddi-failfast-supported", - (caddr_t)&propvalue, &proplength); - - if (error == DDI_PROP_SUCCESS) - found = 1; - } - - if (!found && new_non_ff_driver( - ddi_driver_name(devi))) { - cmn_err(CE_NOTE, "!md: B_FAILFAST I/O" - "disabled on %s", - ddi_driver_name(devi)); - } - - ddi_release_devi(devi); - } - - /* - * All components must support - * failfast in the submirror. - */ - if (!found) { - ff = 0; - break; - } - } - - if (ff) { - sm->sm_flags |= MD_SM_FAILFAST; - } else { - sm->sm_flags &= ~MD_SM_FAILFAST; - } - } -} - -/* - * Return true if the submirror is unavailable. - * If any of the submirror components are opened then the submirror cannot - * be unavailable (MD_INACCESSIBLE). - * If any of the components are already in the errored state, then the submirror - * cannot be unavailable (MD_INACCESSIBLE). - */ -static bool_t -submirror_unavailable(mm_unit_t *un, int smi, int from_probe) -{ - mm_submirror_t *sm; - mm_submirror_ic_t *smic; - md_m_shared_t *shared; - int ci; - int compcnt; - - sm = &un->un_sm[smi]; - smic = &un->un_smic[smi]; - - compcnt = (*(smic->sm_get_component_count)) (sm->sm_dev, un); - for (ci = 0; ci < compcnt; ci++) { - shared = (md_m_shared_t *)(*(smic->sm_shared_by_indx)) - (sm->sm_dev, sm, ci); - if (from_probe) { - if (shared->ms_flags & MDM_S_PROBEOPEN) - return (B_FALSE); - } else { - if (shared->ms_flags & MDM_S_ISOPEN) - return (B_FALSE); - } - if (shared->ms_state == CS_ERRED || - shared->ms_state == CS_LAST_ERRED) - return (B_FALSE); - } - - return (B_TRUE); -} - -static int -mirror_open_all_devs(minor_t mnum, int md_oflags, IOLOCK *lockp) -{ - int i; - mm_unit_t *un; - mdi_unit_t *ui; - int err; - int smi; - int ci; - err_comp_t *c; - err_comp_t *ecomps = NULL; - int smmask = 0; - set_t setno; - int sm_cnt; - int sm_unavail_cnt; - - mirror_check_failfast(mnum); - - un = MD_UNIT(mnum); - ui = MDI_UNIT(mnum); - setno = MD_UN2SET(un); - - for (i = 0; i < NMIRROR; i++) { - md_dev64_t tmpdev = un->un_sm[i].sm_dev; - - if (!SMS_BY_INDEX_IS(un, i, SMS_INUSE)) - continue; - if (md_layered_open(mnum, &tmpdev, md_oflags)) - smmask |= SMI2BIT(i); - un->un_sm[i].sm_dev = tmpdev; - } - - /* - * If smmask is clear, all submirrors are accessible. Clear the - * MD_INACCESSIBLE bit in this case. This bit is also cleared for the - * mirror device. If smmask is set, we have to determine which of the - * submirrors are in error. If no submirror is accessible we mark the - * whole mirror as MD_INACCESSIBLE. - */ - if (smmask == 0) { - if (lockp) { - md_ioctl_readerexit(lockp); - (void) md_ioctl_writerlock(lockp, ui); - } else { - md_unit_readerexit(ui); - (void) md_unit_writerlock(ui); - } - ui->ui_tstate &= ~MD_INACCESSIBLE; - if (lockp) { - md_ioctl_writerexit(lockp); - (void) md_ioctl_readerlock(lockp, ui); - } else { - md_unit_writerexit(ui); - (void) md_unit_readerlock(ui); - } - - for (i = 0; i < NMIRROR; i++) { - md_dev64_t tmpdev; - mdi_unit_t *sm_ui; - - if (!SMS_BY_INDEX_IS(un, i, SMS_INUSE)) - continue; - - tmpdev = un->un_sm[i].sm_dev; - sm_ui = MDI_UNIT(getminor(md_dev64_to_dev(tmpdev))); - (void) md_unit_writerlock(sm_ui); - sm_ui->ui_tstate &= ~MD_INACCESSIBLE; - md_unit_writerexit(sm_ui); - } - - return (0); - } - - for (i = 0; i < NMIRROR; i++) { - md_dev64_t tmpdev; - - if (!(smmask & SMI2BIT(i))) - continue; - - tmpdev = un->un_sm[i].sm_dev; - err = md_layered_open(mnum, &tmpdev, MD_OFLG_CONT_ERRS); - un->un_sm[i].sm_dev = tmpdev; - ASSERT(err == 0); - } - - if (lockp) { - md_ioctl_readerexit(lockp); - un = (mm_unit_t *)md_ioctl_writerlock(lockp, ui); - } else { - md_unit_readerexit(ui); - un = (mm_unit_t *)md_unit_writerlock(ui); - } - - /* - * We want to make sure the unavailable flag is not masking a real - * error on the submirror. - * For each submirror, - * if all of the submirror components couldn't be opened and there - * are no errors on the submirror, then set the unavailable flag - * otherwise, clear unavailable. - */ - sm_cnt = 0; - sm_unavail_cnt = 0; - for (i = 0; i < NMIRROR; i++) { - md_dev64_t tmpdev; - mdi_unit_t *sm_ui; - - if (!SMS_BY_INDEX_IS(un, i, SMS_INUSE)) - continue; - - sm_cnt++; - tmpdev = un->un_sm[i].sm_dev; - sm_ui = MDI_UNIT(getminor(md_dev64_to_dev(tmpdev))); - - (void) md_unit_writerlock(sm_ui); - if (submirror_unavailable(un, i, 0)) { - sm_ui->ui_tstate |= MD_INACCESSIBLE; - sm_unavail_cnt++; - } else { - sm_ui->ui_tstate &= ~MD_INACCESSIBLE; - } - md_unit_writerexit(sm_ui); - } - - /* - * If all of the submirrors are unavailable, the mirror is also - * unavailable. - */ - if (sm_cnt == sm_unavail_cnt) { - ui->ui_tstate |= MD_INACCESSIBLE; - } else { - ui->ui_tstate &= ~MD_INACCESSIBLE; - } - - smi = 0; - ci = 0; - while (mirror_geterror(un, &smi, &ci, 1, 0) != 0) { - if (mirror_other_sources(un, smi, ci, 1) == 1) { - - free_all_ecomps(ecomps); - (void) mirror_close_all_devs(un, md_oflags); - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_OPEN_FAIL, - SVM_TAG_METADEVICE, setno, MD_SID(un)); - mirror_openfail_console_info(un, smi, ci); - if (lockp) { - md_ioctl_writerexit(lockp); - (void) md_ioctl_readerlock(lockp, ui); - } else { - md_unit_writerexit(ui); - (void) md_unit_readerlock(ui); - } - return (ENXIO); - } - - /* track all component states that need changing */ - c = (err_comp_t *)kmem_alloc(sizeof (err_comp_t), KM_SLEEP); - c->ec_next = ecomps; - c->ec_smi = smi; - c->ec_ci = ci; - ecomps = c; - ci++; - } - - /* Make all state changes and commit them */ - for (c = ecomps; c != NULL; c = c->ec_next) { - /* - * If lockp is set, then entering kernel through ioctl. - * For a MN set, the only ioctl path is via a commd message - * (ALLOCATE_HOTSPARE or *RESYNC* messages) that is already - * being sent to each node. - * In this case, set NO_XMIT so that set_sm_comp_state - * won't attempt to send a message on a message. - * - * In !MN sets, the xmit flag is ignored, so it doesn't matter - * which flag is passed. - */ - if (lockp) { - set_sm_comp_state(un, c->ec_smi, c->ec_ci, CS_ERRED, 0, - MD_STATE_NO_XMIT, lockp); - } else { - set_sm_comp_state(un, c->ec_smi, c->ec_ci, CS_ERRED, 0, - (MD_STATE_XMIT | MD_STATE_OCHELD), lockp); - } - /* - * For a MN set, the NOTIFY is done when the state change is - * processed on each node - */ - if (!MD_MNSET_SETNO(setno)) { - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ERRED, - SVM_TAG_METADEVICE, setno, MD_SID(un)); - } - } - - if (lockp) { - md_ioctl_writerexit(lockp); - (void) md_ioctl_readerlock(lockp, ui); - } else { - md_unit_writerexit(ui); - (void) md_unit_readerlock(ui); - } - - free_all_ecomps(ecomps); - - /* allocate hotspares for all errored components */ - if (MD_MNSET_SETNO(setno)) { - /* - * If we're called from an ioctl (lockp set) then we cannot - * directly call send_poke_hotspares as this will block until - * the message gets despatched to all nodes. If the cluster is - * going through a reconfig cycle then the message will block - * until the cycle is complete, and as we originate from a - * service call from commd we will livelock. - */ - if (lockp == NULL) { - md_unit_readerexit(ui); - send_poke_hotspares(setno); - (void) md_unit_readerlock(ui); - } - } else { - (void) poke_hotspares(); - } - return (0); -} - -void -mirror_overlap_tree_remove(md_mps_t *ps) -{ - mm_unit_t *un; - - if (panicstr) - return; - - VERIFY(ps->ps_flags & MD_MPS_ON_OVERLAP); - un = ps->ps_un; - - mutex_enter(&un->un_overlap_tree_mx); - avl_remove(&un->un_overlap_root, ps); - ps->ps_flags &= ~MD_MPS_ON_OVERLAP; - if (un->un_overlap_tree_flag != 0) { - un->un_overlap_tree_flag = 0; - cv_broadcast(&un->un_overlap_tree_cv); - } - mutex_exit(&un->un_overlap_tree_mx); -} - - -/* - * wait_for_overlaps: - * ----------------- - * Check that given i/o request does not cause an overlap with already pending - * i/o. If it does, block until the overlapped i/o completes. - * - * The flag argument has MD_OVERLAP_ALLOW_REPEAT set if it is ok for the parent - * structure to be already in the overlap tree and MD_OVERLAP_NO_REPEAT if - * it must not already be in the tree. - */ -static void -wait_for_overlaps(md_mps_t *ps, int flags) -{ - mm_unit_t *un; - avl_index_t where; - md_mps_t *ps1; - - if (panicstr) - return; - - un = ps->ps_un; - mutex_enter(&un->un_overlap_tree_mx); - if ((flags & MD_OVERLAP_ALLOW_REPEAT) && - (ps->ps_flags & MD_MPS_ON_OVERLAP)) { - mutex_exit(&un->un_overlap_tree_mx); - return; - } - - VERIFY(!(ps->ps_flags & MD_MPS_ON_OVERLAP)); - - do { - ps1 = avl_find(&un->un_overlap_root, ps, &where); - if (ps1 == NULL) { - /* - * The candidate range does not overlap with any - * range in the tree. Insert it and be done. - */ - avl_insert(&un->un_overlap_root, ps, where); - ps->ps_flags |= MD_MPS_ON_OVERLAP; - } else { - /* - * The candidate range would overlap. Set the flag - * indicating we need to be woken up, and sleep - * until another thread removes a range. If upon - * waking up we find this mps was put on the tree - * by another thread, the loop terminates. - */ - un->un_overlap_tree_flag = 1; - cv_wait(&un->un_overlap_tree_cv, - &un->un_overlap_tree_mx); - } - } while (!(ps->ps_flags & MD_MPS_ON_OVERLAP)); - mutex_exit(&un->un_overlap_tree_mx); -} - -/* - * This function is called from mirror_done to check whether any pages have - * been modified while a mirrored write was in progress. Returns 0 if - * all pages associated with bp are clean, 1 otherwise. - */ -static int -any_pages_dirty(struct buf *bp) -{ - int rval; - - rval = biomodified(bp); - if (rval == -1) - rval = 0; - - return (rval); -} - -#define MAX_EXTRAS 10 - -void -mirror_commit( - mm_unit_t *un, - int smmask, - mddb_recid_t *extras -) -{ - mm_submirror_t *sm; - md_unit_t *su; - int i; - - /* 2=mirror,null id */ - mddb_recid_t recids[NMIRROR+2+MAX_EXTRAS]; - - int ri = 0; - - if (md_get_setstatus(MD_UN2SET(un)) & MD_SET_STALE) - return; - - /* Add two, this includes the mirror unit and the null recid */ - if (extras != NULL) { - int nrecids = 0; - while (extras[nrecids] != 0) { - nrecids++; - } - ASSERT(nrecids <= MAX_EXTRAS); - } - - if (un != NULL) - recids[ri++] = un->c.un_record_id; - for (i = 0; i < NMIRROR; i++) { - if (!(smmask & SMI2BIT(i))) - continue; - sm = &un->un_sm[i]; - if (!SMS_IS(sm, SMS_INUSE)) - continue; - if (md_getmajor(sm->sm_dev) != md_major) - continue; - su = MD_UNIT(md_getminor(sm->sm_dev)); - recids[ri++] = su->c.un_record_id; - } - - if (extras != NULL) - while (*extras != 0) { - recids[ri++] = *extras; - extras++; - } - - if (ri == 0) - return; - recids[ri] = 0; - - /* - * Ok to hold ioctl lock across record commit to mddb as - * long as the record(s) being committed aren't resync records. - */ - mddb_commitrecs_wrapper(recids); -} - - -/* - * This routine is used to set a bit in the writable_bm bitmap - * which represents each submirror in a metamirror which - * is writable. The first writable submirror index is assigned - * to the sm_index. The number of writable submirrors are returned in nunits. - * - * This routine returns the submirror's unit number. - */ - -static void -select_write_units(struct mm_unit *un, md_mps_t *ps) -{ - - int i; - unsigned writable_bm = 0; - unsigned nunits = 0; - - for (i = 0; i < NMIRROR; i++) { - if (SUBMIRROR_IS_WRITEABLE(un, i)) { - /* set bit of all writable units */ - writable_bm |= SMI2BIT(i); - nunits++; - } - } - ps->ps_writable_sm = writable_bm; - ps->ps_active_cnt = nunits; - ps->ps_current_sm = 0; -} - -static -unsigned -select_write_after_read_units(struct mm_unit *un, md_mps_t *ps) -{ - - int i; - unsigned writable_bm = 0; - unsigned nunits = 0; - - for (i = 0; i < NMIRROR; i++) { - if (SUBMIRROR_IS_WRITEABLE(un, i) && - un->un_sm[i].sm_flags & MD_SM_RESYNC_TARGET) { - writable_bm |= SMI2BIT(i); - nunits++; - } - } - if ((writable_bm & ps->ps_allfrom_sm) != 0) { - writable_bm &= ~ps->ps_allfrom_sm; - nunits--; - } - ps->ps_writable_sm = writable_bm; - ps->ps_active_cnt = nunits; - ps->ps_current_sm = 0; - return (nunits); -} - -static md_dev64_t -select_read_unit( - mm_unit_t *un, - diskaddr_t blkno, - u_longlong_t reqcount, - u_longlong_t *cando, - int must_be_opened, - md_m_shared_t **shared, - md_mcs_t *cs) -{ - int i; - md_m_shared_t *s; - uint_t lasterrcnt = 0; - md_dev64_t dev = 0; - u_longlong_t cnt; - u_longlong_t mincnt; - mm_submirror_t *sm; - mm_submirror_ic_t *smic; - mdi_unit_t *ui; - - mincnt = reqcount; - for (i = 0; i < NMIRROR; i++) { - if (!SUBMIRROR_IS_READABLE(un, i)) - continue; - sm = &un->un_sm[i]; - smic = &un->un_smic[i]; - cnt = reqcount; - - /* - * If the current submirror is marked as inaccessible, do not - * try to access it. - */ - ui = MDI_UNIT(getminor(expldev(sm->sm_dev))); - (void) md_unit_readerlock(ui); - if (ui->ui_tstate & MD_INACCESSIBLE) { - md_unit_readerexit(ui); - continue; - } - md_unit_readerexit(ui); - - s = (md_m_shared_t *)(*(smic->sm_shared_by_blk)) - (sm->sm_dev, sm, blkno, &cnt); - - if (must_be_opened && !(s->ms_flags & MDM_S_ISOPEN)) - continue; - if (s->ms_state == CS_OKAY) { - *cando = cnt; - if (shared != NULL) - *shared = s; - - if (un->un_sm[i].sm_flags & MD_SM_FAILFAST && - cs != NULL) { - cs->cs_buf.b_flags |= B_FAILFAST; - } - - return (un->un_sm[i].sm_dev); - } - if (s->ms_state != CS_LAST_ERRED) - continue; - - /* don't use B_FAILFAST since we're Last Erred */ - - if (mincnt > cnt) - mincnt = cnt; - if (s->ms_lasterrcnt > lasterrcnt) { - lasterrcnt = s->ms_lasterrcnt; - if (shared != NULL) - *shared = s; - dev = un->un_sm[i].sm_dev; - } - } - *cando = mincnt; - return (dev); -} - -/* - * Given a 32-bit bitmap, this routine will return the bit number - * of the nth bit set. The nth bit set is passed via the index integer. - * - * This routine is used to run through the writable submirror bitmap - * and starting all of the writes. See the value returned is the - * index to appropriate submirror structure, in the md_sm - * array for metamirrors. - */ -static int -md_find_nth_unit(uint_t mask, int index) -{ - int bit, nfound; - - for (bit = -1, nfound = -1; nfound != index; bit++) { - ASSERT(mask != 0); - nfound += (mask & 1); - mask >>= 1; - } - return (bit); -} - -static int -fast_select_read_unit(md_mps_t *ps, md_mcs_t *cs) -{ - mm_unit_t *un; - buf_t *bp; - int i; - unsigned nunits = 0; - int iunit; - uint_t running_bm = 0; - uint_t sm_index; - - bp = &cs->cs_buf; - un = ps->ps_un; - - for (i = 0; i < NMIRROR; i++) { - if (!SMS_BY_INDEX_IS(un, i, SMS_RUNNING)) - continue; - running_bm |= SMI2BIT(i); - nunits++; - } - if (nunits == 0) - return (1); - - /* - * For directed mirror read (DMR) we only use the specified side and - * do not compute the source of the read. - * If we're running with MD_MPS_DIRTY_RD set we always return the - * first mirror side (this prevents unnecessary ownership switching). - * Otherwise we return the submirror according to the mirror read option - */ - if (ps->ps_flags & MD_MPS_DMR) { - sm_index = un->un_dmr_last_read; - } else if (ps->ps_flags & MD_MPS_DIRTY_RD) { - sm_index = md_find_nth_unit(running_bm, 0); - } else { - /* Normal (non-DMR) operation */ - switch (un->un_read_option) { - case RD_GEOMETRY: - iunit = (int)(bp->b_lblkno / - howmany(un->c.un_total_blocks, nunits)); - sm_index = md_find_nth_unit(running_bm, iunit); - break; - case RD_FIRST: - sm_index = md_find_nth_unit(running_bm, 0); - break; - case RD_LOAD_BAL: - /* this is intentional to fall into the default */ - default: - un->un_last_read = (un->un_last_read + 1) % nunits; - sm_index = md_find_nth_unit(running_bm, - un->un_last_read); - break; - } - } - bp->b_edev = md_dev64_to_dev(un->un_sm[sm_index].sm_dev); - ps->ps_allfrom_sm = SMI2BIT(sm_index); - - if (un->un_sm[sm_index].sm_flags & MD_SM_FAILFAST) { - bp->b_flags |= B_FAILFAST; - } - - return (0); -} - -static -int -mirror_are_submirrors_available(mm_unit_t *un) -{ - int i; - for (i = 0; i < NMIRROR; i++) { - md_dev64_t tmpdev = un->un_sm[i].sm_dev; - - if ((!SMS_BY_INDEX_IS(un, i, SMS_INUSE)) || - md_getmajor(tmpdev) != md_major) - continue; - - if ((MD_MIN2SET(md_getminor(tmpdev)) >= md_nsets) || - (MD_MIN2UNIT(md_getminor(tmpdev)) >= md_nunits)) - return (0); - - if (MDI_UNIT(md_getminor(tmpdev)) == NULL) - return (0); - } - return (1); -} - -void -build_submirror(mm_unit_t *un, int i, int snarfing) -{ - struct mm_submirror *sm; - struct mm_submirror_ic *smic; - md_unit_t *su; - set_t setno; - - sm = &un->un_sm[i]; - smic = &un->un_smic[i]; - - sm->sm_flags = 0; /* sometime we may need to do more here */ - - setno = MD_UN2SET(un); - - if (!SMS_IS(sm, SMS_INUSE)) - return; - if (snarfing) { - sm->sm_dev = md_getdevnum(setno, mddb_getsidenum(setno), - sm->sm_key, MD_NOTRUST_DEVT); - } else { - if (md_getmajor(sm->sm_dev) == md_major) { - su = MD_UNIT(md_getminor(sm->sm_dev)); - un->c.un_flag |= (su->c.un_flag & MD_LABELED); - /* submirror can no longer be soft partitioned */ - MD_CAPAB(su) &= (~MD_CAN_SP); - } - } - smic->sm_shared_by_blk = md_get_named_service(sm->sm_dev, - 0, "shared by blk", 0); - smic->sm_shared_by_indx = md_get_named_service(sm->sm_dev, - 0, "shared by indx", 0); - smic->sm_get_component_count = (int (*)())md_get_named_service( - sm->sm_dev, 0, "get component count", 0); - smic->sm_get_bcss = (int (*)())md_get_named_service(sm->sm_dev, 0, - "get block count skip size", 0); - sm->sm_state &= ~SMS_IGNORE; - if (SMS_IS(sm, SMS_OFFLINE)) - MD_STATUS(un) |= MD_UN_OFFLINE_SM; - md_set_parent(sm->sm_dev, MD_SID(un)); -} - -static void -mirror_cleanup(mm_unit_t *un) -{ - mddb_recid_t recid; - int smi; - sv_dev_t sv[NMIRROR]; - int nsv = 0; - - /* - * If a MN diskset and this node is not the master, do - * not delete any records on snarf of the mirror records. - */ - if (MD_MNSET_SETNO(MD_UN2SET(un)) && - md_set[MD_UN2SET(un)].s_am_i_master == 0) { - return; - } - - for (smi = 0; smi < NMIRROR; smi++) { - if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE)) - continue; - sv[nsv].setno = MD_UN2SET(un); - sv[nsv++].key = un->un_sm[smi].sm_key; - } - - recid = un->un_rr_dirty_recid; - mddb_deleterec_wrapper(un->c.un_record_id); - if (recid > 0) - mddb_deleterec_wrapper(recid); - - md_rem_names(sv, nsv); -} - -/* - * Comparison function for the avl tree which tracks - * outstanding writes on submirrors. - * - * Returns: - * -1: ps1 < ps2 - * 0: ps1 and ps2 overlap - * 1: ps1 > ps2 - */ -static int -mirror_overlap_compare(const void *p1, const void *p2) -{ - const md_mps_t *ps1 = (md_mps_t *)p1; - const md_mps_t *ps2 = (md_mps_t *)p2; - - if (ps1->ps_firstblk < ps2->ps_firstblk) { - if (ps1->ps_lastblk >= ps2->ps_firstblk) - return (0); - return (-1); - } - - if (ps1->ps_firstblk > ps2->ps_firstblk) { - if (ps1->ps_firstblk <= ps2->ps_lastblk) - return (0); - return (1); - } - - return (0); -} - -/* - * Collapse any sparse submirror entries snarfed from the on-disk replica. - * Only the in-core entries are updated. The replica will be updated on-disk - * when the in-core replica is committed on shutdown of the SVM subsystem. - */ -static void -collapse_submirrors(mm_unit_t *un) -{ - int smi, nremovals, smiremove; - mm_submirror_t *sm, *new_sm, *old_sm; - mm_submirror_ic_t *smic; - int nsmidx = un->un_nsm - 1; - -rescan: - nremovals = 0; - smiremove = -1; - - for (smi = 0; smi <= nsmidx; smi++) { - sm = &un->un_sm[smi]; - - /* - * Check to see if this submirror is marked as in-use. - * If it isn't then it is a potential sparse entry and - * may need to be cleared from the configuration. - * The records should _already_ have been cleared by the - * original mirror_detach() code, but we need to shuffle - * any NULL entries in un_sm[] to the end of the array. - * Any NULL un_smic[] entries need to be reset to the underlying - * submirror/slice accessor functions. - */ - if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE)) { - nremovals++; - smiremove = smi; - break; - } - } - - if (nremovals == 0) { - /* - * Ensure that we have a matching contiguous set of un_smic[] - * entries for the corresponding un_sm[] entries - */ - for (smi = 0; smi <= nsmidx; smi++) { - smic = &un->un_smic[smi]; - sm = &un->un_sm[smi]; - - smic->sm_shared_by_blk = - md_get_named_service(sm->sm_dev, 0, - "shared by_blk", 0); - smic->sm_shared_by_indx = - md_get_named_service(sm->sm_dev, 0, - "shared by indx", 0); - smic->sm_get_component_count = - (int (*)())md_get_named_service(sm->sm_dev, 0, - "get component count", 0); - smic->sm_get_bcss = - (int (*)())md_get_named_service(sm->sm_dev, 0, - "get block count skip size", 0); - } - return; - } - - /* - * Reshuffle the submirror devices so that we do not have a dead record - * in the middle of the array. Once we've done this we need to rescan - * the mirror to check for any other holes. - */ - for (smi = 0; smi < NMIRROR; smi++) { - if (smi < smiremove) - continue; - if (smi > smiremove) { - old_sm = &un->un_sm[smi]; - new_sm = &un->un_sm[smi - 1]; - bcopy(old_sm, new_sm, sizeof (mm_submirror_t)); - bzero(old_sm, sizeof (mm_submirror_t)); - } - } - - /* - * Now we need to rescan the array to find the next potential dead - * entry. - */ - goto rescan; -} - -/* Return a -1 if optimized record unavailable and set should be released */ -int -mirror_build_incore(mm_unit_t *un, int snarfing) -{ - int i; - - if (MD_STATUS(un) & MD_UN_BEING_RESET) { - mddb_setrecprivate(un->c.un_record_id, MD_PRV_PENDCLEAN); - return (1); - } - - if (mirror_are_submirrors_available(un) == 0) - return (1); - - if (MD_UNIT(MD_SID(un)) != NULL) - return (0); - - MD_STATUS(un) = 0; - - /* pre-4.1 didn't define CAN_META_CHILD capability */ - MD_CAPAB(un) = MD_CAN_META_CHILD | MD_CAN_PARENT | MD_CAN_SP; - - un->un_overlap_tree_flag = 0; - avl_create(&un->un_overlap_root, mirror_overlap_compare, - sizeof (md_mps_t), offsetof(md_mps_t, ps_overlap_node)); - - /* - * We need to collapse any sparse submirror entries into a non-sparse - * array. This is to cover the case where we have an old replica image - * which has not been updated (i.e. snarfed) since being modified. - * The new code expects all submirror access to be sequential (i.e. - * both the un_sm[] and un_smic[] entries correspond to non-empty - * submirrors. - */ - - collapse_submirrors(un); - - for (i = 0; i < NMIRROR; i++) - build_submirror(un, i, snarfing); - - if (unit_setup_resync(un, snarfing) != 0) { - if (snarfing) { - mddb_setrecprivate(un->c.un_record_id, MD_PRV_GOTIT); - /* - * If a MN set and set is not stale, then return -1 - * which will force the caller to unload the set. - * The MN diskset nodes will return failure if - * unit_setup_resync fails so that nodes won't - * get out of sync. - * - * If set is STALE, the master node can't allocate - * a resync record (if needed), but node needs to - * join the set so that user can delete broken mddbs. - * So, if set is STALE, just continue on. - */ - if (MD_MNSET_SETNO(MD_UN2SET(un)) && - !(md_get_setstatus(MD_UN2SET(un)) & MD_SET_STALE)) { - return (-1); - } - } else - return (1); - } - - mutex_init(&un->un_overlap_tree_mx, NULL, MUTEX_DEFAULT, NULL); - cv_init(&un->un_overlap_tree_cv, NULL, CV_DEFAULT, NULL); - - un->un_suspend_wr_flag = 0; - mutex_init(&un->un_suspend_wr_mx, NULL, MUTEX_DEFAULT, NULL); - cv_init(&un->un_suspend_wr_cv, NULL, CV_DEFAULT, NULL); - - /* - * Allocate mutexes for mirror-owner and resync-owner changes. - * All references to the owner message state field must be guarded - * by this mutex. - */ - mutex_init(&un->un_owner_mx, NULL, MUTEX_DEFAULT, NULL); - - /* - * Allocate mutex and condvar for resync thread manipulation. These - * will be used by mirror_resync_unit/mirror_ioctl_resync - */ - mutex_init(&un->un_rs_thread_mx, NULL, MUTEX_DEFAULT, NULL); - cv_init(&un->un_rs_thread_cv, NULL, CV_DEFAULT, NULL); - - /* - * Allocate mutex and condvar for resync progress thread manipulation. - * This allows resyncs to be continued across an intervening reboot. - */ - mutex_init(&un->un_rs_progress_mx, NULL, MUTEX_DEFAULT, NULL); - cv_init(&un->un_rs_progress_cv, NULL, CV_DEFAULT, NULL); - - /* - * Allocate mutex and condvar for Directed Mirror Reads (DMR). This - * provides synchronization between a user-ioctl and the resulting - * strategy() call that performs the read(). - */ - mutex_init(&un->un_dmr_mx, NULL, MUTEX_DEFAULT, NULL); - cv_init(&un->un_dmr_cv, NULL, CV_DEFAULT, NULL); - - /* - * Allocate rwlocks for un_pernode_dirty_bm accessing. - */ - for (i = 0; i < MD_MNMAXSIDES; i++) { - rw_init(&un->un_pernode_dirty_mx[i], NULL, RW_DEFAULT, NULL); - } - - /* place various information in the in-core data structures */ - md_nblocks_set(MD_SID(un), un->c.un_total_blocks); - MD_UNIT(MD_SID(un)) = un; - - return (0); -} - - -void -reset_mirror(struct mm_unit *un, minor_t mnum, int removing) -{ - mddb_recid_t recid, vtoc_id; - size_t bitcnt; - size_t shortcnt; - int smi; - sv_dev_t sv[NMIRROR]; - int nsv = 0; - uint_t bits = 0; - minor_t selfid; - md_unit_t *su; - int i; - - md_destroy_unit_incore(mnum, &mirror_md_ops); - - shortcnt = un->un_rrd_num * sizeof (short); - bitcnt = howmany(un->un_rrd_num, NBBY); - - if (un->un_outstanding_writes) - kmem_free((caddr_t)un->un_outstanding_writes, shortcnt); - if (un->un_goingclean_bm) - kmem_free((caddr_t)un->un_goingclean_bm, bitcnt); - if (un->un_goingdirty_bm) - kmem_free((caddr_t)un->un_goingdirty_bm, bitcnt); - if (un->un_resync_bm) - kmem_free((caddr_t)un->un_resync_bm, bitcnt); - if (un->un_pernode_dirty_sum) - kmem_free((caddr_t)un->un_pernode_dirty_sum, un->un_rrd_num); - - /* - * Destroy the taskq for deferred processing of DRL clean requests. - * This taskq will only be present for Multi Owner mirrors. - */ - if (un->un_drl_task != NULL) - ddi_taskq_destroy(un->un_drl_task); - - md_nblocks_set(mnum, -1ULL); - MD_UNIT(mnum) = NULL; - - /* - * Attempt release of its minor node - */ - md_remove_minor_node(mnum); - - if (!removing) - return; - - for (smi = 0; smi < NMIRROR; smi++) { - if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE)) - continue; - /* reallow soft partitioning of submirror and reset parent */ - su = MD_UNIT(md_getminor(un->un_sm[smi].sm_dev)); - MD_CAPAB(su) |= MD_CAN_SP; - md_reset_parent(un->un_sm[smi].sm_dev); - reset_comp_states(&un->un_sm[smi], &un->un_smic[smi]); - - sv[nsv].setno = MD_MIN2SET(mnum); - sv[nsv++].key = un->un_sm[smi].sm_key; - bits |= SMI2BIT(smi); - } - - MD_STATUS(un) |= MD_UN_BEING_RESET; - recid = un->un_rr_dirty_recid; - vtoc_id = un->c.un_vtoc_id; - selfid = MD_SID(un); - - mirror_commit(un, bits, 0); - - avl_destroy(&un->un_overlap_root); - - /* Destroy all mutexes and condvars before returning. */ - mutex_destroy(&un->un_suspend_wr_mx); - cv_destroy(&un->un_suspend_wr_cv); - mutex_destroy(&un->un_overlap_tree_mx); - cv_destroy(&un->un_overlap_tree_cv); - mutex_destroy(&un->un_owner_mx); - mutex_destroy(&un->un_rs_thread_mx); - cv_destroy(&un->un_rs_thread_cv); - mutex_destroy(&un->un_rs_progress_mx); - cv_destroy(&un->un_rs_progress_cv); - mutex_destroy(&un->un_dmr_mx); - cv_destroy(&un->un_dmr_cv); - - for (i = 0; i < MD_MNMAXSIDES; i++) { - rw_destroy(&un->un_pernode_dirty_mx[i]); - if (un->un_pernode_dirty_bm[i]) - kmem_free((caddr_t)un->un_pernode_dirty_bm[i], bitcnt); - } - - /* - * Remove self from the namespace - */ - if (un->c.un_revision & MD_FN_META_DEV) { - (void) md_rem_selfname(un->c.un_self_id); - } - - /* This frees the unit structure. */ - mddb_deleterec_wrapper(un->c.un_record_id); - - if (recid != 0) - mddb_deleterec_wrapper(recid); - - /* Remove the vtoc, if present */ - if (vtoc_id) - mddb_deleterec_wrapper(vtoc_id); - - md_rem_names(sv, nsv); - - SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DELETE, SVM_TAG_METADEVICE, - MD_MIN2SET(selfid), selfid); -} - -int -mirror_internal_open( - minor_t mnum, - int flag, - int otyp, - int md_oflags, - IOLOCK *lockp /* can be NULL */ -) -{ - mdi_unit_t *ui = MDI_UNIT(mnum); - int err = 0; - -tryagain: - /* single thread */ - if (lockp) { - /* - * If ioctl lock is held, use openclose_enter - * routine that will set the ioctl flag when - * grabbing the readerlock. - */ - (void) md_ioctl_openclose_enter(lockp, ui); - } else { - (void) md_unit_openclose_enter(ui); - } - - /* - * The mirror_open_all_devs routine may end up sending a STATE_UPDATE - * message in a MN diskset and this requires that the openclose - * lock is dropped in order to send this message. So, another - * flag (MD_UL_OPENINPROGRESS) is used to keep another thread from - * attempting an open while this thread has an open in progress. - * Call the *_lh version of the lock exit routines since the ui_mx - * mutex must be held from checking for OPENINPROGRESS until - * after the cv_wait call. - */ - mutex_enter(&ui->ui_mx); - if (ui->ui_lock & MD_UL_OPENINPROGRESS) { - if (lockp) { - (void) md_ioctl_openclose_exit_lh(lockp); - } else { - md_unit_openclose_exit_lh(ui); - } - cv_wait(&ui->ui_cv, &ui->ui_mx); - mutex_exit(&ui->ui_mx); - goto tryagain; - } - - ui->ui_lock |= MD_UL_OPENINPROGRESS; - mutex_exit(&ui->ui_mx); - - /* open devices, if necessary */ - if (! md_unit_isopen(ui) || (ui->ui_tstate & MD_INACCESSIBLE)) { - if ((err = mirror_open_all_devs(mnum, md_oflags, lockp)) != 0) - goto out; - } - - /* count open */ - if ((err = md_unit_incopen(mnum, flag, otyp)) != 0) - goto out; - - /* unlock, return success */ -out: - mutex_enter(&ui->ui_mx); - ui->ui_lock &= ~MD_UL_OPENINPROGRESS; - mutex_exit(&ui->ui_mx); - - if (lockp) { - /* - * If ioctl lock is held, use openclose_exit - * routine that will clear the lockp reader flag. - */ - (void) md_ioctl_openclose_exit(lockp); - } else { - md_unit_openclose_exit(ui); - } - return (err); -} - -int -mirror_internal_close( - minor_t mnum, - int otyp, - int md_cflags, - IOLOCK *lockp /* can be NULL */ -) -{ - mdi_unit_t *ui = MDI_UNIT(mnum); - mm_unit_t *un; - int err = 0; - - /* single thread */ - if (lockp) { - /* - * If ioctl lock is held, use openclose_enter - * routine that will set the ioctl flag when - * grabbing the readerlock. - */ - un = (mm_unit_t *)md_ioctl_openclose_enter(lockp, ui); - } else { - un = (mm_unit_t *)md_unit_openclose_enter(ui); - } - - /* count closed */ - if ((err = md_unit_decopen(mnum, otyp)) != 0) - goto out; - - /* close devices, if necessary */ - if (! md_unit_isopen(ui) || (md_cflags & MD_OFLG_PROBEDEV)) { - /* - * Clean up dirty bitmap for this unit. Do this - * before closing the underlying devices to avoid - * race conditions with reset_mirror() as a - * result of a 'metaset -r' command running in - * parallel. This might cause deallocation of - * dirty region bitmaps; with underlying metadevices - * in place this can't happen. - * Don't do this if a MN set and ABR not set - */ - if (new_resync && !(MD_STATUS(un) & MD_UN_KEEP_DIRTY)) { - if (!MD_MNSET_SETNO(MD_UN2SET(un)) || - !(ui->ui_tstate & MD_ABR_CAP)) - mirror_process_unit_resync(un); - } - (void) mirror_close_all_devs(un, md_cflags); - - /* - * For a MN set with transient capabilities (eg ABR/DMR) set, - * clear these capabilities on the last open in the cluster. - * To do this we send a message to all nodes to see of the - * device is open. - */ - if (MD_MNSET_SETNO(MD_UN2SET(un)) && - (ui->ui_tstate & (MD_ABR_CAP|MD_DMR_CAP))) { - if (lockp) { - (void) md_ioctl_openclose_exit(lockp); - } else { - md_unit_openclose_exit(ui); - } - - /* - * if we are in the context of an ioctl, drop the - * ioctl lock. - * Otherwise, no other locks should be held. - */ - if (lockp) { - IOLOCK_RETURN_RELEASE(0, lockp); - } - - mdmn_clear_all_capabilities(mnum); - - /* if dropped the lock previously, regain it */ - if (lockp) { - IOLOCK_RETURN_REACQUIRE(lockp); - } - return (0); - } - /* unlock and return success */ - } -out: - /* Call whether lockp is NULL or not. */ - if (lockp) { - md_ioctl_openclose_exit(lockp); - } else { - md_unit_openclose_exit(ui); - } - return (err); -} - -/* - * When a component has completed resyncing and is now ok, check if the - * corresponding component in the other submirrors is in the Last Erred - * state. If it is, we want to change that to the Erred state so we stop - * using that component and start using this good component instead. - * - * This is called from set_sm_comp_state and recursively calls - * set_sm_comp_state if it needs to change the Last Erred state. - */ -static void -reset_lasterred(mm_unit_t *un, int smi, mddb_recid_t *extras, uint_t flags, - IOLOCK *lockp) -{ - mm_submirror_t *sm; - mm_submirror_ic_t *smic; - int ci; - int i; - int compcnt; - int changed = 0; - - for (i = 0; i < NMIRROR; i++) { - sm = &un->un_sm[i]; - smic = &un->un_smic[i]; - - if (!SMS_IS(sm, SMS_INUSE)) - continue; - - /* ignore the submirror that we just made ok */ - if (i == smi) - continue; - - compcnt = (*(smic->sm_get_component_count)) (sm->sm_dev, un); - for (ci = 0; ci < compcnt; ci++) { - md_m_shared_t *shared; - - shared = (md_m_shared_t *)(*(smic->sm_shared_by_indx)) - (sm->sm_dev, sm, ci); - - if ((shared->ms_state & CS_LAST_ERRED) && - !mirror_other_sources(un, i, ci, 1)) { - - set_sm_comp_state(un, i, ci, CS_ERRED, extras, - flags, lockp); - changed = 1; - } - } - } - - /* maybe there is a hotspare for this newly erred component */ - if (changed) { - set_t setno; - - setno = MD_UN2SET(un); - if (MD_MNSET_SETNO(setno)) { - send_poke_hotspares(setno); - } else { - (void) poke_hotspares(); - } - } -} - -/* - * set_sm_comp_state - * - * Set the state of a submirror component to the specified new state. - * If the mirror is in a multi-node set, send messages to all nodes to - * block all writes to the mirror and then update the state and release the - * writes. These messages are only sent if MD_STATE_XMIT is set in flags. - * MD_STATE_XMIT will be unset in 2 cases: - * 1. When the state is changed to CS_RESYNC as this state change - * will already have been updated on each node by the processing of the - * distributed metasync command, hence no need to xmit. - * 2. When the state is change to CS_OKAY after a resync has completed. Again - * the resync completion will already have been processed on each node by - * the processing of the MD_MN_MSG_RESYNC_PHASE_DONE message for a component - * resync, hence no need to xmit. - * - * In case we are called from the updates of a watermark, - * (then MD_STATE_WMUPDATE will be set in the ps->flags) this is due to - * a metainit or similar. In this case the message that we sent to propagate - * the state change must not be a class1 message as that would deadlock with - * the metainit command that is still being processed. - * This we achieve by creating a class2 message MD_MN_MSG_STATE_UPDATE2 - * instead. This also makes the submessage generator to create a class2 - * submessage rather than a class1 (which would also block) - * - * On entry, unit_writerlock is held - * If MD_STATE_OCHELD is set in flags, then unit_openclose lock is - * also held. - */ -void -set_sm_comp_state( - mm_unit_t *un, - int smi, - int ci, - int newstate, - mddb_recid_t *extras, - uint_t flags, - IOLOCK *lockp -) -{ - mm_submirror_t *sm; - mm_submirror_ic_t *smic; - md_m_shared_t *shared; - int origstate; - void (*get_dev)(); - ms_cd_info_t cd; - char devname[MD_MAX_CTDLEN]; - int err; - set_t setno = MD_UN2SET(un); - md_mn_msg_stch_t stchmsg; - mdi_unit_t *ui = MDI_UNIT(MD_SID(un)); - md_mn_kresult_t *kresult; - int rval; - uint_t msgflags; - md_mn_msgtype_t msgtype; - int save_lock = 0; - mdi_unit_t *ui_sm; - int nretries = 0; - - sm = &un->un_sm[smi]; - smic = &un->un_smic[smi]; - - /* If we have a real error status then turn off MD_INACCESSIBLE. */ - ui_sm = MDI_UNIT(getminor(md_dev64_to_dev(sm->sm_dev))); - if (newstate & (CS_ERRED | CS_RESYNC | CS_LAST_ERRED) && - ui_sm->ui_tstate & MD_INACCESSIBLE) { - ui_sm->ui_tstate &= ~MD_INACCESSIBLE; - } - - shared = (md_m_shared_t *)(*(smic->sm_shared_by_indx)) - (sm->sm_dev, sm, ci); - origstate = shared->ms_state; - - /* - * If the new state is an error and the old one wasn't, generate - * a console message. We do this before we send the state to other - * nodes in a MN set because the state change may change the component - * name if a hotspare is allocated. - */ - if ((! (origstate & (CS_ERRED|CS_LAST_ERRED))) && - (newstate & (CS_ERRED|CS_LAST_ERRED))) { - - get_dev = (void (*)())md_get_named_service(sm->sm_dev, 0, - "get device", 0); - (void) (*get_dev)(sm->sm_dev, sm, ci, &cd); - - err = md_getdevname(setno, mddb_getsidenum(setno), 0, - cd.cd_dev, devname, sizeof (devname)); - - if (err == ENOENT) { - (void) md_devname(setno, cd.cd_dev, devname, - sizeof (devname)); - } - - cmn_err(CE_WARN, "md: %s: %s needs maintenance", - md_shortname(md_getminor(sm->sm_dev)), devname); - - if (newstate & CS_LAST_ERRED) { - cmn_err(CE_WARN, "md: %s: %s last erred", - md_shortname(md_getminor(sm->sm_dev)), - devname); - - } else if (shared->ms_flags & MDM_S_ISOPEN) { - /* - * Close the broken device and clear the open flag on - * it. Closing the device means the RCM framework will - * be able to unconfigure the device if required. - * - * We have to check that the device is open, otherwise - * the first open on it has resulted in the error that - * is being processed and the actual cd.cd_dev will be - * NODEV64. - * - * If this is a multi-node mirror, then the multinode - * state checks following this code will cause the - * slave nodes to close the mirror in the function - * mirror_set_state(). - */ - md_layered_close(cd.cd_dev, MD_OFLG_NULL); - shared->ms_flags &= ~MDM_S_ISOPEN; - } - - } else if ((origstate & CS_LAST_ERRED) && (newstate & CS_ERRED) && - (shared->ms_flags & MDM_S_ISOPEN)) { - /* - * Similar to logic above except no log messages since we - * are just transitioning from Last Erred to Erred. - */ - get_dev = (void (*)())md_get_named_service(sm->sm_dev, 0, - "get device", 0); - (void) (*get_dev)(sm->sm_dev, sm, ci, &cd); - - md_layered_close(cd.cd_dev, MD_OFLG_NULL); - shared->ms_flags &= ~MDM_S_ISOPEN; - } - - if ((MD_MNSET_SETNO(setno)) && (origstate != newstate) && - (flags & MD_STATE_XMIT) && !(ui->ui_tstate & MD_ERR_PENDING)) { - /* - * For a multi-node mirror, send the state change to the - * master, which broadcasts to all nodes, including this - * one. Once the message is received, the state is set - * in-core and the master commits the change to disk. - * There is a case, comp_replace, where this function - * can be called from within an ioctl and therefore in this - * case, as the ioctl will already be called on each node, - * there is no need to xmit the state change to the master for - * distribution to the other nodes. MD_STATE_XMIT flag is used - * to indicate whether a xmit is required. The mirror's - * transient state is set to MD_ERR_PENDING to avoid sending - * multiple messages. - */ - if (newstate & (CS_ERRED|CS_LAST_ERRED)) - ui->ui_tstate |= MD_ERR_PENDING; - - /* - * Send a state update message to all nodes. This message - * will generate 2 submessages, the first one to suspend - * all writes to the mirror and the second to update the - * state and resume writes. - */ - stchmsg.msg_stch_mnum = un->c.un_self_id; - stchmsg.msg_stch_sm = smi; - stchmsg.msg_stch_comp = ci; - stchmsg.msg_stch_new_state = newstate; - stchmsg.msg_stch_hs_id = shared->ms_hs_id; -#ifdef DEBUG - if (mirror_debug_flag) - printf("send set state, %x, %x, %x, %x, %x\n", - stchmsg.msg_stch_mnum, stchmsg.msg_stch_sm, - stchmsg.msg_stch_comp, stchmsg.msg_stch_new_state, - stchmsg.msg_stch_hs_id); -#endif - if (flags & MD_STATE_WMUPDATE) { - msgtype = MD_MN_MSG_STATE_UPDATE2; - /* - * When coming from an update of watermarks, there - * must already be a message logged that triggered - * this action. So, no need to log this message, too. - */ - msgflags = MD_MSGF_NO_LOG; - } else { - msgtype = MD_MN_MSG_STATE_UPDATE; - msgflags = MD_MSGF_DEFAULT_FLAGS; - } - - /* - * If we are in the context of an ioctl, drop the ioctl lock. - * lockp holds the list of locks held. - * - * Otherwise, increment the appropriate reacquire counters. - * If openclose lock is *held, then must reacquire reader - * lock before releasing the openclose lock. - * Do not drop the ARRAY_WRITER lock as we may not be able - * to reacquire it. - */ - if (lockp) { - if (lockp->l_flags & MD_ARRAY_WRITER) { - save_lock = MD_ARRAY_WRITER; - lockp->l_flags &= ~MD_ARRAY_WRITER; - } else if (lockp->l_flags & MD_ARRAY_READER) { - save_lock = MD_ARRAY_READER; - lockp->l_flags &= ~MD_ARRAY_READER; - } - IOLOCK_RETURN_RELEASE(0, lockp); - } else { - if (flags & MD_STATE_OCHELD) { - md_unit_writerexit(ui); - (void) md_unit_readerlock(ui); - md_unit_openclose_exit(ui); - } else { - md_unit_writerexit(ui); - } - } - - kresult = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP); -sscs_msg: - rval = mdmn_ksend_message(setno, msgtype, msgflags, 0, - (char *)&stchmsg, sizeof (stchmsg), kresult); - - if (!MDMN_KSEND_MSG_OK(rval, kresult)) { - mdmn_ksend_show_error(rval, kresult, "STATE UPDATE"); - /* If we're shutting down already, pause things here. */ - if (kresult->kmmr_comm_state == MDMNE_RPC_FAIL) { - while (!md_mn_is_commd_present()) { - delay(md_hz); - } - /* - * commd is now available; retry the message - * one time. If that fails we fall through and - * panic as the system is in an unexpected state - */ - if (nretries++ == 0) - goto sscs_msg; - } - cmn_err(CE_PANIC, - "ksend_message failure: STATE_UPDATE"); - } - kmem_free(kresult, sizeof (md_mn_kresult_t)); - - /* if dropped the lock previously, regain it */ - if (lockp) { - IOLOCK_RETURN_REACQUIRE(lockp); - lockp->l_flags |= save_lock; - } else { - /* - * Reacquire dropped locks and update acquirecnts - * appropriately. - */ - if (flags & MD_STATE_OCHELD) { - /* - * openclose also grabs readerlock. - */ - (void) md_unit_openclose_enter(ui); - md_unit_readerexit(ui); - (void) md_unit_writerlock(ui); - } else { - (void) md_unit_writerlock(ui); - } - } - - ui->ui_tstate &= ~MD_ERR_PENDING; - } else { - shared->ms_state = newstate; - uniqtime32(&shared->ms_timestamp); - - if (newstate == CS_ERRED) - shared->ms_flags |= MDM_S_NOWRITE; - else - shared->ms_flags &= ~MDM_S_NOWRITE; - - shared->ms_flags &= ~MDM_S_IOERR; - un->un_changecnt++; - shared->ms_lasterrcnt = un->un_changecnt; - - mirror_set_sm_state(sm, smic, SMS_RUNNING, 0); - mirror_commit(un, SMI2BIT(smi), extras); - } - - if ((origstate & CS_RESYNC) && (newstate & CS_OKAY)) { - /* - * Resetting the Last Erred state will recursively call back - * into this function (set_sm_comp_state) to update the state. - */ - reset_lasterred(un, smi, extras, flags, lockp); - } -} - -static int -find_another_logical( - mm_unit_t *un, - mm_submirror_t *esm, - diskaddr_t blk, - u_longlong_t cnt, - int must_be_open, - int state, - int err_cnt) -{ - u_longlong_t cando; - md_dev64_t dev; - md_m_shared_t *s; - - esm->sm_state |= SMS_IGNORE; - while (cnt != 0) { - u_longlong_t mcnt; - - mcnt = MIN(cnt, lbtodb(1024 * 1024 * 1024)); /* 1 Gig Blks */ - - dev = select_read_unit(un, blk, mcnt, &cando, - must_be_open, &s, NULL); - if (dev == (md_dev64_t)0) - break; - - if ((state == CS_LAST_ERRED) && - (s->ms_state == CS_LAST_ERRED) && - (err_cnt > s->ms_lasterrcnt)) - break; - - cnt -= cando; - blk += cando; - } - esm->sm_state &= ~SMS_IGNORE; - return (cnt != 0); -} - -int -mirror_other_sources(mm_unit_t *un, int smi, int ci, int must_be_open) -{ - mm_submirror_t *sm; - mm_submirror_ic_t *smic; - size_t count; - diskaddr_t block; - u_longlong_t skip; - u_longlong_t size; - md_dev64_t dev; - int cnt; - md_m_shared_t *s; - int not_found; - - sm = &un->un_sm[smi]; - smic = &un->un_smic[smi]; - dev = sm->sm_dev; - - /* - * Make sure every component of the submirror - * has other sources. - */ - if (ci < 0) { - /* Find the highest lasterrcnt */ - cnt = (*(smic->sm_get_component_count))(dev, sm); - for (ci = 0; ci < cnt; ci++) { - not_found = mirror_other_sources(un, smi, ci, - must_be_open); - if (not_found) - return (1); - } - return (0); - } - - /* - * Make sure this component has other sources - */ - (void) (*(smic->sm_get_bcss)) - (dev, sm, ci, &block, &count, &skip, &size); - - if (count == 0) - return (1); - - s = (md_m_shared_t *)(*(smic->sm_shared_by_indx))(dev, sm, ci); - - while (count--) { - if (block >= un->c.un_total_blocks) - return (0); - - if ((block + size) > un->c.un_total_blocks) - size = un->c.un_total_blocks - block; - - not_found = find_another_logical(un, sm, block, size, - must_be_open, s->ms_state, s->ms_lasterrcnt); - if (not_found) - return (1); - - block += size + skip; - } - return (0); -} - -static void -finish_error(md_mps_t *ps) -{ - struct buf *pb; - mm_unit_t *un; - mdi_unit_t *ui; - uint_t new_str_flags; - - pb = ps->ps_bp; - un = ps->ps_un; - ui = ps->ps_ui; - - /* - * Must flag any error to the resync originator if we're performing - * a Write-after-Read. This corresponds to an i/o error on a resync - * target device and in this case we ought to abort the resync as there - * is nothing that can be done to recover from this without operator - * intervention. If we don't set the B_ERROR flag we will continue - * reading from the mirror but won't write to the target (as it will - * have been placed into an errored state). - * To handle the case of multiple components within a submirror we only - * set the B_ERROR bit if explicitly requested to via MD_MPS_FLAG_ERROR. - * The originator of the resync read will cause this bit to be set if - * the underlying component count is one for a submirror resync. All - * other resync types will have the flag set as there is no underlying - * resync which can be performed on a contained metadevice for these - * resync types (optimized or component). - */ - - if (ps->ps_flags & MD_MPS_WRITE_AFTER_READ) { - if (ps->ps_flags & MD_MPS_FLAG_ERROR) - pb->b_flags |= B_ERROR; - md_kstat_done(ui, pb, (ps->ps_flags & MD_MPS_WRITE_AFTER_READ)); - MPS_FREE(mirror_parent_cache, ps); - md_unit_readerexit(ui); - md_biodone(pb); - return; - } - /* - * Set the MD_IO_COUNTED flag as we are retrying the same I/O - * operation therefore this I/O request has already been counted, - * the I/O count variable will be decremented by mirror_done()'s - * call to md_biodone(). - */ - if (ps->ps_changecnt != un->un_changecnt) { - new_str_flags = MD_STR_NOTTOP | MD_IO_COUNTED; - if (ps->ps_flags & MD_MPS_WOW) - new_str_flags |= MD_STR_WOW; - if (ps->ps_flags & MD_MPS_MAPPED) - new_str_flags |= MD_STR_MAPPED; - /* - * If this I/O request was a read that was part of a resync, - * set MD_STR_WAR for the retried read to ensure that the - * resync write (i.e. write-after-read) will be performed - */ - if (ps->ps_flags & MD_MPS_RESYNC_READ) - new_str_flags |= MD_STR_WAR; - md_kstat_done(ui, pb, (ps->ps_flags & MD_MPS_WRITE_AFTER_READ)); - MPS_FREE(mirror_parent_cache, ps); - md_unit_readerexit(ui); - (void) md_mirror_strategy(pb, new_str_flags, NULL); - return; - } - - pb->b_flags |= B_ERROR; - md_kstat_done(ui, pb, (ps->ps_flags & MD_MPS_WRITE_AFTER_READ)); - MPS_FREE(mirror_parent_cache, ps); - md_unit_readerexit(ui); - md_biodone(pb); -} - -static void -error_update_unit(md_mps_t *ps) -{ - mm_unit_t *un; - mdi_unit_t *ui; - int smi; /* sub mirror index */ - int ci; /* errored component */ - set_t setno; - uint_t flags; /* for set_sm_comp_state() */ - uint_t hspflags; /* for check_comp_4_hotspares() */ - - ui = ps->ps_ui; - un = (mm_unit_t *)md_unit_writerlock(ui); - setno = MD_UN2SET(un); - - /* All of these updates have to propagated in case of MN set */ - flags = MD_STATE_XMIT; - hspflags = MD_HOTSPARE_XMIT; - - /* special treatment if we are called during updating watermarks */ - if (ps->ps_flags & MD_MPS_WMUPDATE) { - flags |= MD_STATE_WMUPDATE; - hspflags |= MD_HOTSPARE_WMUPDATE; - } - smi = 0; - ci = 0; - while (mirror_geterror(un, &smi, &ci, 1, 0) != 0) { - if (mirror_other_sources(un, smi, ci, 0) == 1) { - - /* Never called from ioctl context, so (IOLOCK *)NULL */ - set_sm_comp_state(un, smi, ci, CS_LAST_ERRED, 0, flags, - (IOLOCK *)NULL); - /* - * For a MN set, the NOTIFY is done when the state - * change is processed on each node - */ - if (!MD_MNSET_SETNO(MD_UN2SET(un))) { - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_LASTERRED, - SVM_TAG_METADEVICE, setno, MD_SID(un)); - } - continue; - } - /* Never called from ioctl context, so (IOLOCK *)NULL */ - set_sm_comp_state(un, smi, ci, CS_ERRED, 0, flags, - (IOLOCK *)NULL); - /* - * For a MN set, the NOTIFY is done when the state - * change is processed on each node - */ - if (!MD_MNSET_SETNO(MD_UN2SET(un))) { - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ERRED, - SVM_TAG_METADEVICE, setno, MD_SID(un)); - } - smi = 0; - ci = 0; - } - - md_unit_writerexit(ui); - if (MD_MNSET_SETNO(setno)) { - send_poke_hotspares(setno); - } else { - (void) poke_hotspares(); - } - (void) md_unit_readerlock(ui); - - finish_error(ps); -} - -/* - * When we have a B_FAILFAST IO error on a Last Erred component we need to - * retry the IO without B_FAILFAST set so that we try to ensure that the - * component "sees" each IO. - */ -static void -last_err_retry(md_mcs_t *cs) -{ - struct buf *cb; - md_mps_t *ps; - uint_t flags; - - cb = &cs->cs_buf; - cb->b_flags &= ~B_FAILFAST; - - /* if we're panicing just let this I/O error out */ - if (panicstr) { - (void) mirror_done(cb); - return; - } - - /* reissue the I/O */ - - ps = cs->cs_ps; - - bioerror(cb, 0); - - mutex_enter(&ps->ps_mx); - - flags = MD_STR_NOTTOP; - if (ps->ps_flags & MD_MPS_MAPPED) - flags |= MD_STR_MAPPED; - if (ps->ps_flags & MD_MPS_NOBLOCK) - flags |= MD_NOBLOCK; - - mutex_exit(&ps->ps_mx); - - clear_retry_error(cb); - - cmn_err(CE_NOTE, "!md: %s: Last Erred, retry I/O without B_FAILFAST", - md_shortname(getminor(cb->b_edev))); - - md_call_strategy(cb, flags, NULL); -} - -static void -mirror_error(md_mps_t *ps) -{ - int smi; /* sub mirror index */ - int ci; /* errored component */ - - if (panicstr) { - finish_error(ps); - return; - } - - if (ps->ps_flags & MD_MPS_ON_OVERLAP) - mirror_overlap_tree_remove(ps); - - smi = 0; - ci = 0; - if (mirror_geterror(ps->ps_un, &smi, &ci, 0, 0) != 0) { - md_unit_readerexit(ps->ps_ui); - daemon_request(&md_mstr_daemon, error_update_unit, - (daemon_queue_t *)ps, REQ_OLD); - return; - } - - finish_error(ps); -} - -static int -copy_write_done(struct buf *cb) -{ - md_mps_t *ps; - buf_t *pb; - char *wowbuf; - wowhdr_t *wowhdr; - ssize_t wow_resid; - - /* get wowbuf ans save structure */ - wowbuf = cb->b_un.b_addr; - wowhdr = WOWBUF_HDR(wowbuf); - ps = wowhdr->wow_ps; - pb = ps->ps_bp; - - /* Save error information, then free cb */ - if (cb->b_flags & B_ERROR) - pb->b_flags |= B_ERROR; - - if (cb->b_flags & B_REMAPPED) - bp_mapout(cb); - - freerbuf(cb); - - /* update residual and continue if needed */ - if ((pb->b_flags & B_ERROR) == 0) { - wow_resid = pb->b_bcount - wowhdr->wow_offset; - pb->b_resid = wow_resid; - if (wow_resid > 0) { - daemon_request(&md_mstr_daemon, copy_write_cont, - (daemon_queue_t *)wowhdr, REQ_OLD); - return (1); - } - } - - /* Write is complete, release resources. */ - kmem_cache_free(mirror_wowblk_cache, wowhdr); - ASSERT(!(ps->ps_flags & MD_MPS_ON_OVERLAP)); - md_kstat_done(ps->ps_ui, pb, (ps->ps_flags & MD_MPS_WRITE_AFTER_READ)); - MPS_FREE(mirror_parent_cache, ps); - md_biodone(pb); - return (0); -} - -static void -copy_write_cont(wowhdr_t *wowhdr) -{ - buf_t *pb; - buf_t *cb; - char *wowbuf; - int wow_offset; - size_t wow_resid; - diskaddr_t wow_blkno; - - wowbuf = WOWHDR_BUF(wowhdr); - pb = wowhdr->wow_ps->ps_bp; - - /* get data on current location */ - wow_offset = wowhdr->wow_offset; - wow_resid = pb->b_bcount - wow_offset; - wow_blkno = pb->b_lblkno + lbtodb(wow_offset); - - /* setup child buffer */ - cb = getrbuf(KM_SLEEP); - cb->b_flags = B_WRITE; - cb->b_edev = pb->b_edev; - cb->b_un.b_addr = wowbuf; /* change to point at WOWBUF */ - cb->b_bufsize = md_wowbuf_size; /* change to wowbuf_size */ - cb->b_iodone = copy_write_done; - cb->b_bcount = MIN(md_wowbuf_size, wow_resid); - cb->b_lblkno = wow_blkno; - - /* move offset to next section */ - wowhdr->wow_offset += cb->b_bcount; - - /* copy and setup write for current section */ - bcopy(&pb->b_un.b_addr[wow_offset], wowbuf, cb->b_bcount); - - /* do it */ - /* - * Do not set the MD_IO_COUNTED flag as this is a new I/O request - * that handles the WOW condition. The resultant increment on the - * I/O count variable is cleared by copy_write_done()'s call to - * md_biodone(). - */ - (void) md_mirror_strategy(cb, MD_STR_NOTTOP | MD_STR_WOW - | MD_STR_MAPPED, NULL); -} - -static void -md_mirror_copy_write(md_mps_t *ps) -{ - wowhdr_t *wowhdr; - - wowhdr = kmem_cache_alloc(mirror_wowblk_cache, MD_ALLOCFLAGS); - mirror_wowblk_init(wowhdr); - wowhdr->wow_ps = ps; - wowhdr->wow_offset = 0; - copy_write_cont(wowhdr); -} - -static void -handle_wow(md_mps_t *ps) -{ - buf_t *pb; - - pb = ps->ps_bp; - - bp_mapin(pb); - - md_mirror_wow_cnt++; - if (!(pb->b_flags & B_PHYS) && (md_mirror_wow_flg & WOW_LOGIT)) { - cmn_err(CE_NOTE, - "md: %s, blk %lld, cnt %ld: Write on write %d occurred", - md_shortname(getminor(pb->b_edev)), - (longlong_t)pb->b_lblkno, pb->b_bcount, md_mirror_wow_cnt); - } - - /* - * Set the MD_IO_COUNTED flag as we are retrying the same I/O - * operation therefore this I/O request has already been counted, - * the I/O count variable will be decremented by mirror_done()'s - * call to md_biodone(). - */ - if (md_mirror_wow_flg & WOW_NOCOPY) - (void) md_mirror_strategy(pb, MD_STR_NOTTOP | MD_STR_WOW | - MD_STR_MAPPED | MD_IO_COUNTED, ps); - else - md_mirror_copy_write(ps); -} - -/* - * Return true if the specified submirror is either in the Last Erred - * state or is transitioning into the Last Erred state. - */ -static bool_t -submirror_is_lasterred(mm_unit_t *un, int smi) -{ - mm_submirror_t *sm; - mm_submirror_ic_t *smic; - md_m_shared_t *shared; - int ci; - int compcnt; - - sm = &un->un_sm[smi]; - smic = &un->un_smic[smi]; - - compcnt = (*(smic->sm_get_component_count)) (sm->sm_dev, un); - for (ci = 0; ci < compcnt; ci++) { - shared = (md_m_shared_t *)(*(smic->sm_shared_by_indx)) - (sm->sm_dev, sm, ci); - - if (shared->ms_state == CS_LAST_ERRED) - return (B_TRUE); - - /* - * It is not currently Last Erred, check if entering Last Erred. - */ - if ((shared->ms_flags & MDM_S_IOERR) && - ((shared->ms_state == CS_OKAY) || - (shared->ms_state == CS_RESYNC))) { - if (mirror_other_sources(un, smi, ci, 0) == 1) - return (B_TRUE); - } - } - - return (B_FALSE); -} - - -static int -mirror_done(struct buf *cb) -{ - md_mps_t *ps; - md_mcs_t *cs; - - /*LINTED*/ - cs = (md_mcs_t *)((caddr_t)cb - md_mirror_mcs_buf_off); - ps = cs->cs_ps; - - mutex_enter(&ps->ps_mx); - - /* check if we need to retry an errored failfast I/O */ - if (cb->b_flags & B_ERROR) { - struct buf *pb = ps->ps_bp; - - if (cb->b_flags & B_FAILFAST) { - int i; - mm_unit_t *un = ps->ps_un; - - for (i = 0; i < NMIRROR; i++) { - if (!SMS_BY_INDEX_IS(un, i, SMS_INUSE)) - continue; - - if (cb->b_edev == - md_dev64_to_dev(un->un_sm[i].sm_dev)) { - - /* - * This is the submirror that had the - * error. Check if it is Last Erred. - */ - if (submirror_is_lasterred(un, i)) { - daemon_queue_t *dqp; - - mutex_exit(&ps->ps_mx); - dqp = (daemon_queue_t *)cs; - dqp->dq_prev = NULL; - dqp->dq_next = NULL; - daemon_request(&md_done_daemon, - last_err_retry, dqp, - REQ_OLD); - return (1); - } - break; - } - } - } - - /* continue to process the buf without doing a retry */ - ps->ps_flags |= MD_MPS_ERROR; - pb->b_error = cb->b_error; - } - - return (mirror_done_common(cb)); -} - -/* - * Split from the original mirror_done function so we can handle bufs after a - * retry. - * ps->ps_mx is already held in the caller of this function and the cb error - * has already been checked and handled in the caller. - */ -static int -mirror_done_common(struct buf *cb) -{ - struct buf *pb; - mm_unit_t *un; - mdi_unit_t *ui; - md_mps_t *ps; - md_mcs_t *cs; - size_t end_rr, start_rr, current_rr; - - /*LINTED*/ - cs = (md_mcs_t *)((caddr_t)cb - md_mirror_mcs_buf_off); - ps = cs->cs_ps; - pb = ps->ps_bp; - - if (cb->b_flags & B_REMAPPED) - bp_mapout(cb); - - ps->ps_frags--; - if (ps->ps_frags != 0) { - mutex_exit(&ps->ps_mx); - kmem_cache_free(mirror_child_cache, cs); - return (1); - } - un = ps->ps_un; - ui = ps->ps_ui; - - /* - * Do not update outstanding_writes if we're running with ABR - * set for this mirror or the write() was issued with MD_STR_ABR set. - * Also a resync initiated write() has no outstanding_writes update - * either. - */ - if (((cb->b_flags & B_READ) == 0) && - (un->un_nsm >= 2) && - (ps->ps_call == NULL) && - !((ui->ui_tstate & MD_ABR_CAP) || (ps->ps_flags & MD_MPS_ABR)) && - !(ps->ps_flags & MD_MPS_WRITE_AFTER_READ)) { - BLK_TO_RR(end_rr, ps->ps_lastblk, un); - BLK_TO_RR(start_rr, ps->ps_firstblk, un); - mutex_enter(&un->un_resync_mx); - for (current_rr = start_rr; current_rr <= end_rr; current_rr++) - un->un_outstanding_writes[current_rr]--; - mutex_exit(&un->un_resync_mx); - } - kmem_cache_free(mirror_child_cache, cs); - mutex_exit(&ps->ps_mx); - - if (ps->ps_call != NULL) { - daemon_request(&md_done_daemon, ps->ps_call, - (daemon_queue_t *)ps, REQ_OLD); - return (1); - } - - if ((ps->ps_flags & MD_MPS_ERROR)) { - daemon_request(&md_done_daemon, mirror_error, - (daemon_queue_t *)ps, REQ_OLD); - return (1); - } - - if (ps->ps_flags & MD_MPS_ON_OVERLAP) - mirror_overlap_tree_remove(ps); - - /* - * Handle Write-on-Write problem. - * Skip In case of Raw and Direct I/O as they are - * handled earlier. - * - */ - if (!(md_mirror_wow_flg & WOW_DISABLE) && - !(pb->b_flags & B_READ) && - !(ps->ps_flags & MD_MPS_WOW) && - !(pb->b_flags & B_PHYS) && - any_pages_dirty(pb)) { - md_unit_readerexit(ps->ps_ui); - daemon_request(&md_mstr_daemon, handle_wow, - (daemon_queue_t *)ps, REQ_OLD); - return (1); - } - - md_kstat_done(ui, pb, (ps->ps_flags & MD_MPS_WRITE_AFTER_READ)); - MPS_FREE(mirror_parent_cache, ps); - md_unit_readerexit(ui); - md_biodone(pb); - return (0); -} - -/* - * Clear error state in submirror component if the retry worked after - * a failfast error. - */ -static void -clear_retry_error(struct buf *cb) -{ - int smi; - md_mcs_t *cs; - mm_unit_t *un; - mdi_unit_t *ui_sm; - mm_submirror_t *sm; - mm_submirror_ic_t *smic; - u_longlong_t cnt; - md_m_shared_t *shared; - - /*LINTED*/ - cs = (md_mcs_t *)((caddr_t)cb - md_mirror_mcs_buf_off); - un = cs->cs_ps->ps_un; - - for (smi = 0; smi < NMIRROR; smi++) { - if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE)) - continue; - - if (cb->b_edev == md_dev64_to_dev(un->un_sm[smi].sm_dev)) - break; - } - - if (smi >= NMIRROR) - return; - - sm = &un->un_sm[smi]; - smic = &un->un_smic[smi]; - cnt = cb->b_bcount; - - ui_sm = MDI_UNIT(getminor(cb->b_edev)); - (void) md_unit_writerlock(ui_sm); - - shared = (md_m_shared_t *)(*(smic->sm_shared_by_blk))(sm->sm_dev, sm, - cb->b_blkno, &cnt); - - if (shared->ms_flags & MDM_S_IOERR) { - shared->ms_flags &= ~MDM_S_IOERR; - - } else { - /* the buf spans components and the first one is not erred */ - int cnt; - int i; - - cnt = (*(smic->sm_get_component_count))(sm->sm_dev, un); - for (i = 0; i < cnt; i++) { - shared = (md_m_shared_t *)(*(smic->sm_shared_by_indx)) - (sm->sm_dev, sm, i); - - if (shared->ms_flags & MDM_S_IOERR && - shared->ms_state == CS_OKAY) { - - shared->ms_flags &= ~MDM_S_IOERR; - break; - } - } - } - - md_unit_writerexit(ui_sm); -} - -static size_t -mirror_map_read( - md_mps_t *ps, - md_mcs_t *cs, - diskaddr_t blkno, - u_longlong_t count -) -{ - mm_unit_t *un; - buf_t *bp; - u_longlong_t cando; - - bp = &cs->cs_buf; - un = ps->ps_un; - - bp->b_lblkno = blkno; - if (fast_select_read_unit(ps, cs) == 0) { - bp->b_bcount = ldbtob(count); - return (0); - } - bp->b_edev = md_dev64_to_dev(select_read_unit(un, blkno, - count, &cando, 0, NULL, cs)); - bp->b_bcount = ldbtob(cando); - if (count != cando) - return (cando); - return (0); -} - -static void -write_after_read(md_mps_t *ps) -{ - struct buf *pb; - int flags; - - if (ps->ps_flags & MD_MPS_ERROR) { - mirror_error(ps); - return; - } - - pb = ps->ps_bp; - md_kstat_done(ps->ps_ui, pb, (ps->ps_flags & MD_MPS_WRITE_AFTER_READ)); - ps->ps_call = NULL; - ps->ps_flags |= MD_MPS_WRITE_AFTER_READ; - flags = MD_STR_NOTTOP | MD_STR_WAR; - if (ps->ps_flags & MD_MPS_MAPPED) - flags |= MD_STR_MAPPED; - if (ps->ps_flags & MD_MPS_NOBLOCK) - flags |= MD_NOBLOCK; - if (ps->ps_flags & MD_MPS_DIRTY_RD) - flags |= MD_STR_DIRTY_RD; - (void) mirror_write_strategy(pb, flags, ps); -} - -static void -continue_serial(md_mps_t *ps) -{ - md_mcs_t *cs; - buf_t *cb; - mm_unit_t *un; - int flags; - - un = ps->ps_un; - cs = kmem_cache_alloc(mirror_child_cache, MD_ALLOCFLAGS); - mirror_child_init(cs); - cb = &cs->cs_buf; - ps->ps_call = NULL; - ps->ps_frags = 1; - (void) mirror_map_write(un, cs, ps, 0); - flags = MD_STR_NOTTOP; - if (ps->ps_flags & MD_MPS_MAPPED) - flags |= MD_STR_MAPPED; - md_call_strategy(cb, flags, NULL); -} - -static int -mirror_map_write(mm_unit_t *un, md_mcs_t *cs, md_mps_t *ps, int war) -{ - int i; - dev_t dev; /* needed for bioclone, so not md_dev64_t */ - buf_t *cb; - buf_t *pb; - diskaddr_t blkno; - size_t bcount; - off_t offset; - - pb = ps->ps_bp; - cb = &cs->cs_buf; - cs->cs_ps = ps; - - i = md_find_nth_unit(ps->ps_writable_sm, ps->ps_current_sm); - - dev = md_dev64_to_dev(un->un_sm[i].sm_dev); - - blkno = pb->b_lblkno; - bcount = pb->b_bcount; - offset = 0; - if (war && (blkno == 0) && (un->c.un_flag & MD_LABELED)) { - blkno = DK_LABEL_LOC + 1; - /* - * This handles the case where we're requesting - * a write to block 0 on a label partition - * and the request size was smaller than the - * size of the label. If this is the case - * then we'll return -1. Failure to do so will - * either cause the calling thread to hang due to - * an ssd bug, or worse if the bcount were allowed - * to go negative (ie large). - */ - if (bcount <= DEV_BSIZE*(DK_LABEL_LOC + 1)) - return (-1); - bcount -= (DEV_BSIZE*(DK_LABEL_LOC + 1)); - offset = (DEV_BSIZE*(DK_LABEL_LOC + 1)); - } - - cb = md_bioclone(pb, offset, bcount, dev, blkno, mirror_done, - cb, KM_NOSLEEP); - if (war) - cb->b_flags = (cb->b_flags & ~B_READ) | B_WRITE; - - /* - * If the submirror is in the erred stated, check if any component is - * in the Last Erred state. If so, we don't want to use the B_FAILFAST - * flag on the IO. - * - * Provide a fast path for the non-erred case (which should be the - * normal case). - */ - if (un->un_sm[i].sm_flags & MD_SM_FAILFAST) { - if (un->un_sm[i].sm_state & SMS_COMP_ERRED) { - mm_submirror_t *sm; - mm_submirror_ic_t *smic; - int ci; - int compcnt; - - sm = &un->un_sm[i]; - smic = &un->un_smic[i]; - - compcnt = (*(smic->sm_get_component_count)) - (sm->sm_dev, un); - for (ci = 0; ci < compcnt; ci++) { - md_m_shared_t *shared; - - shared = (md_m_shared_t *) - (*(smic->sm_shared_by_indx))(sm->sm_dev, - sm, ci); - - if (shared->ms_state == CS_LAST_ERRED) - break; - } - if (ci >= compcnt) - cb->b_flags |= B_FAILFAST; - - } else { - cb->b_flags |= B_FAILFAST; - } - } - - ps->ps_current_sm++; - if (ps->ps_current_sm != ps->ps_active_cnt) { - if (un->un_write_option == WR_SERIAL) { - ps->ps_call = continue_serial; - return (0); - } - return (1); - } - return (0); -} - -/* - * directed_read_done: - * ------------------ - * Completion routine called when a DMR request has been returned from the - * underlying driver. Wake-up the original ioctl() and return the data to - * the user. - */ -static void -directed_read_done(md_mps_t *ps) -{ - mm_unit_t *un; - mdi_unit_t *ui; - - un = ps->ps_un; - ui = ps->ps_ui; - - md_unit_readerexit(ui); - md_kstat_done(ui, ps->ps_bp, (ps->ps_flags & MD_MPS_WRITE_AFTER_READ)); - ps->ps_call = NULL; - - mutex_enter(&un->un_dmr_mx); - cv_signal(&un->un_dmr_cv); - mutex_exit(&un->un_dmr_mx); - - /* release the parent structure */ - kmem_cache_free(mirror_parent_cache, ps); -} - -/* - * daemon_io: - * ------------ - * Called to issue a mirror_write_strategy() or mirror_read_strategy - * call from a blockable context. NOTE: no mutex can be held on entry to this - * routine - */ -static void -daemon_io(daemon_queue_t *dq) -{ - md_mps_t *ps = (md_mps_t *)dq; - int flag = MD_STR_NOTTOP; - buf_t *pb = ps->ps_bp; - - if (ps->ps_flags & MD_MPS_MAPPED) - flag |= MD_STR_MAPPED; - if (ps->ps_flags & MD_MPS_WOW) - flag |= MD_STR_WOW; - if (ps->ps_flags & MD_MPS_WRITE_AFTER_READ) - flag |= MD_STR_WAR; - if (ps->ps_flags & MD_MPS_ABR) - flag |= MD_STR_ABR; - if (ps->ps_flags & MD_MPS_BLOCKABLE_IO) - flag |= MD_STR_BLOCK_OK; - - /* - * If this is a resync read, ie MD_STR_DIRTY_RD not set, set - * MD_STR_WAR before calling mirror_read_strategy - */ - if (pb->b_flags & B_READ) { - if (!(ps->ps_flags & MD_MPS_DIRTY_RD)) - flag |= MD_STR_WAR; - mirror_read_strategy(pb, flag, ps); - } else - mirror_write_strategy(pb, flag, ps); -} - -/* - * update_resync: - * ------------- - * Called to update the in-core version of the resync record with the latest - * version that was committed to disk when the previous mirror owner - * relinquished ownership. This call is likely to block as we must hold-off - * any current resync processing that may be occurring. - * On completion of the resync record update we issue the mirror_write_strategy - * call to complete the i/o that first started this sequence. To remove a race - * condition between a new write() request which is submitted and the resync - * record update we acquire the writerlock. This will hold off all i/o to the - * mirror until the resync update has completed. - * NOTE: no mutex can be held on entry to this routine - */ -static void -update_resync(daemon_queue_t *dq) -{ - md_mps_t *ps = (md_mps_t *)dq; - buf_t *pb = ps->ps_bp; - mdi_unit_t *ui = ps->ps_ui; - mm_unit_t *un = MD_UNIT(ui->ui_link.ln_id); - set_t setno; - int restart_resync; - - mutex_enter(&un->un_rrp_inflight_mx); - (void) md_unit_writerlock(ui); - ps->ps_un = un; - setno = MD_MIN2SET(getminor(pb->b_edev)); - if (mddb_reread_rr(setno, un->un_rr_dirty_recid) == 0) { - /* - * Synchronize our in-core view of what regions need to be - * resync'd with the on-disk version. - */ - mirror_copy_rr(howmany(un->un_rrd_num, NBBY), un->un_resync_bm, - un->un_dirty_bm); - - /* Region dirty map is now up to date */ - } - restart_resync = (un->un_rs_thread_flags & MD_RI_BLOCK_OWNER) ? 1 : 0; - md_unit_writerexit(ui); - mutex_exit(&un->un_rrp_inflight_mx); - - /* Restart the resync thread if it was previously blocked */ - if (restart_resync) { - mutex_enter(&un->un_rs_thread_mx); - un->un_rs_thread_flags &= ~MD_RI_BLOCK_OWNER; - cv_signal(&un->un_rs_thread_cv); - mutex_exit(&un->un_rs_thread_mx); - } - /* Continue with original deferred i/o */ - daemon_io(dq); -} - -/* - * owner_timeout: - * ------------- - * Called if the original mdmn_ksend_message() failed and the request is to be - * retried. Reattempt the original ownership change. - * - * NOTE: called at interrupt context (see timeout(9f)). - */ -static void -owner_timeout(void *arg) -{ - daemon_queue_t *dq = (daemon_queue_t *)arg; - - daemon_request(&md_mirror_daemon, become_owner, dq, REQ_OLD); -} - -/* - * become_owner: - * ------------ - * Called to issue RPC request to become the owner of the mirror - * associated with this i/o request. We assume that the ownership request - * is synchronous, so if it succeeds we will issue the request via - * mirror_write_strategy(). - * If multiple i/o's are outstanding we will be called from the mirror_daemon - * service thread. - * NOTE: no mutex should be held on entry to this routine. - */ -static void -become_owner(daemon_queue_t *dq) -{ - md_mps_t *ps = (md_mps_t *)dq; - mm_unit_t *un = ps->ps_un; - buf_t *pb = ps->ps_bp; - set_t setno; - md_mn_kresult_t *kres; - int msg_flags = md_mirror_msg_flags; - md_mps_t *ps1; - - ASSERT(dq->dq_next == NULL && dq->dq_prev == NULL); - - /* - * If we're already the mirror owner we do not need to send a message - * but can simply process the i/o request immediately. - * If we've already sent the request to become owner we requeue the - * request as we're waiting for the synchronous ownership message to - * be processed. - */ - if (MD_MN_MIRROR_OWNER(un)) { - /* - * As the strategy() call will potentially block we need to - * punt this to a separate thread and complete this request - * as quickly as possible. Note: if we're a read request - * this must be a resync, we cannot afford to be queued - * behind any intervening i/o requests. In this case we put the - * request on the md_mirror_rs_daemon queue. - */ - if (pb->b_flags & B_READ) { - daemon_request(&md_mirror_rs_daemon, daemon_io, dq, - REQ_OLD); - } else { - daemon_request(&md_mirror_io_daemon, daemon_io, dq, - REQ_OLD); - } - } else { - mutex_enter(&un->un_owner_mx); - if ((un->un_owner_state & MM_MN_OWNER_SENT) == 0) { - md_mn_req_owner_t *msg; - int rval = 0; - - /* - * Check to see that we haven't exceeded the maximum - * retry count. If we have we fail the i/o as the - * comms mechanism has become wedged beyond recovery. - */ - if (dq->qlen++ >= MD_OWNER_RETRIES) { - mutex_exit(&un->un_owner_mx); - cmn_err(CE_WARN, - "md_mirror: Request exhausted ownership " - "retry limit of %d attempts", dq->qlen); - pb->b_error = EIO; - pb->b_flags |= B_ERROR; - pb->b_resid = pb->b_bcount; - kmem_cache_free(mirror_parent_cache, ps); - md_biodone(pb); - return; - } - - /* - * Issue request to change ownership. The call is - * synchronous so when it returns we can complete the - * i/o (if successful), or enqueue it again so that - * the operation will be retried. - */ - un->un_owner_state |= MM_MN_OWNER_SENT; - mutex_exit(&un->un_owner_mx); - - msg = kmem_zalloc(sizeof (md_mn_req_owner_t), KM_SLEEP); - setno = MD_MIN2SET(getminor(pb->b_edev)); - msg->mnum = MD_SID(un); - msg->owner = md_mn_mynode_id; - msg_flags |= MD_MSGF_NO_LOG; - /* - * If this IO is triggered by updating a watermark, - * it might be issued by the creation of a softpartition - * while the commd subsystem is suspended. - * We don't want this message to block. - */ - if (ps->ps_flags & MD_MPS_WMUPDATE) { - msg_flags |= MD_MSGF_OVERRIDE_SUSPEND; - } - - kres = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP); - rval = mdmn_ksend_message(setno, - MD_MN_MSG_REQUIRE_OWNER, msg_flags, 0, - (char *)msg, sizeof (md_mn_req_owner_t), kres); - - kmem_free(msg, sizeof (md_mn_req_owner_t)); - - if (MDMN_KSEND_MSG_OK(rval, kres)) { - dq->qlen = 0; - /* - * Successfully changed owner, reread the - * resync record so that we have a valid idea of - * any previously committed incomplete write()s. - * NOTE: As we need to acquire the resync mutex - * this may block, so we defer it to a separate - * thread handler. This makes us (effectively) - * non-blocking once the ownership message - * handling has completed. - */ - mutex_enter(&un->un_owner_mx); - if (un->un_owner_state & MM_MN_BECOME_OWNER) { - un->un_mirror_owner = md_mn_mynode_id; - /* Sets owner of un_rr_dirty record */ - if (un->un_rr_dirty_recid) - (void) mddb_setowner( - un->un_rr_dirty_recid, - md_mn_mynode_id); - un->un_owner_state &= - ~MM_MN_BECOME_OWNER; - /* - * Release the block on the current - * resync region if it is blocked - */ - ps1 = un->un_rs_prev_overlap; - if ((ps1 != NULL) && - (ps1->ps_flags & MD_MPS_ON_OVERLAP)) - mirror_overlap_tree_remove(ps1); - mutex_exit(&un->un_owner_mx); - - /* - * If we're a read, this must be a - * resync request, issue - * the i/o request on the - * md_mirror_rs_daemon queue. This is - * to avoid a deadlock between the - * resync_unit thread and - * subsequent i/o requests that may - * block on the resync region. - */ - if (pb->b_flags & B_READ) { - daemon_request( - &md_mirror_rs_daemon, - update_resync, dq, REQ_OLD); - } else { - daemon_request( - &md_mirror_io_daemon, - update_resync, dq, REQ_OLD); - } - kmem_free(kres, - sizeof (md_mn_kresult_t)); - return; - } else { - /* - * Some other node has beaten us to - * obtain ownership. We need to - * reschedule our ownership request - */ - mutex_exit(&un->un_owner_mx); - } - } else { - mdmn_ksend_show_error(rval, kres, - "MD_MN_MSG_REQUIRE_OWNER"); - /* - * Message transport failure is handled by the - * comms layer. If the ownership change request - * does not succeed we need to flag the error to - * the initiator of the i/o. This is handled by - * the retry logic above. As the request failed - * we do not know _who_ the owner of the mirror - * currently is. We reset our idea of the owner - * to None so that any further write()s will - * attempt to become the owner again. This stops - * multiple nodes writing to the same mirror - * simultaneously. - */ - mutex_enter(&un->un_owner_mx); - un->un_owner_state &= - ~(MM_MN_OWNER_SENT|MM_MN_BECOME_OWNER); - un->un_mirror_owner = MD_MN_MIRROR_UNOWNED; - mutex_exit(&un->un_owner_mx); - } - kmem_free(kres, sizeof (md_mn_kresult_t)); - } else - mutex_exit(&un->un_owner_mx); - - /* - * Re-enqueue this request on the deferred i/o list. Delay the - * request for md_mirror_owner_to usecs to stop thrashing. - */ - (void) timeout(owner_timeout, dq, - drv_usectohz(md_mirror_owner_to)); - } -} - -static void -mirror_write_strategy(buf_t *pb, int flag, void *private) -{ - md_mps_t *ps; - md_mcs_t *cs; - int more; - mm_unit_t *un; - mdi_unit_t *ui; - buf_t *cb; /* child buf pointer */ - set_t setno; - int rs_on_overlap = 0; - - ui = MDI_UNIT(getminor(pb->b_edev)); - un = (mm_unit_t *)MD_UNIT(getminor(pb->b_edev)); - - - md_kstat_waitq_enter(ui); - - /* - * If a state change is in progress for this mirror in a MN set, - * suspend all non-resync writes until the state change is complete. - * The objective of this suspend is to ensure that it is not - * possible for one node to read data from a submirror that another node - * has not written to because of the state change. Therefore we - * suspend all writes until the state change has been made. As it is - * not possible to read from the target of a resync, there is no need - * to suspend resync writes. - * Note that we only block here if the caller can handle a busy-wait. - * The MD_STR_BLOCK_OK flag is set for daemon_io originated i/o only. - */ - - if (!(flag & MD_STR_WAR)) { - if (flag & MD_STR_BLOCK_OK) { - mutex_enter(&un->un_suspend_wr_mx); - while (un->un_suspend_wr_flag) { - cv_wait(&un->un_suspend_wr_cv, - &un->un_suspend_wr_mx); - } - mutex_exit(&un->un_suspend_wr_mx); - } - (void) md_unit_readerlock(ui); - } - - if (!(flag & MD_STR_NOTTOP)) { - if (md_checkbuf(ui, (md_unit_t *)un, pb)) { - md_kstat_waitq_exit(ui); - return; - } - } - - setno = MD_MIN2SET(getminor(pb->b_edev)); - - /* If an ABR write has been requested, set MD_STR_ABR flag */ - if (MD_MNSET_SETNO(setno) && (pb->b_flags & B_ABRWRITE)) - flag |= MD_STR_ABR; - - if (private == NULL) { - ps = kmem_cache_alloc(mirror_parent_cache, MD_ALLOCFLAGS); - mirror_parent_init(ps); - } else { - ps = private; - private = NULL; - } - if (flag & MD_STR_MAPPED) - ps->ps_flags |= MD_MPS_MAPPED; - - if (flag & MD_STR_WOW) - ps->ps_flags |= MD_MPS_WOW; - - if (flag & MD_STR_ABR) - ps->ps_flags |= MD_MPS_ABR; - - if (flag & MD_STR_WMUPDATE) - ps->ps_flags |= MD_MPS_WMUPDATE; - - /* - * Save essential information from the original buffhdr - * in the md_save structure. - */ - ps->ps_un = un; - ps->ps_ui = ui; - ps->ps_bp = pb; - ps->ps_addr = pb->b_un.b_addr; - ps->ps_firstblk = pb->b_lblkno; - ps->ps_lastblk = pb->b_lblkno + lbtodb(pb->b_bcount) - 1; - ps->ps_changecnt = un->un_changecnt; - - /* - * Check for suspended writes here. This is where we can defer the - * write request to the daemon_io queue which will then call us with - * the MD_STR_BLOCK_OK flag set and we'll busy-wait (if necessary) at - * the top of this routine. - */ - if (!(flag & MD_STR_WAR) && !(flag & MD_STR_BLOCK_OK)) { - mutex_enter(&un->un_suspend_wr_mx); - if (un->un_suspend_wr_flag) { - ps->ps_flags |= MD_MPS_BLOCKABLE_IO; - mutex_exit(&un->un_suspend_wr_mx); - md_unit_readerexit(ui); - daemon_request(&md_mirror_daemon, daemon_io, - (daemon_queue_t *)ps, REQ_OLD); - return; - } - mutex_exit(&un->un_suspend_wr_mx); - } - - /* - * If not MN owner and this is an ABR write, make sure the current - * resync region is in the overlaps tree - */ - mutex_enter(&un->un_owner_mx); - if (MD_MNSET_SETNO(setno) && (!(MD_MN_MIRROR_OWNER(un))) && - ((ui->ui_tstate & MD_ABR_CAP) || (flag & MD_STR_ABR))) { - md_mps_t *ps1; - /* Block the current resync region, if not already blocked */ - ps1 = un->un_rs_prev_overlap; - - if ((ps1 != NULL) && ((ps1->ps_firstblk != 0) || - (ps1->ps_lastblk != 0))) { - /* Drop locks to avoid deadlock */ - mutex_exit(&un->un_owner_mx); - md_unit_readerexit(ui); - wait_for_overlaps(ps1, MD_OVERLAP_ALLOW_REPEAT); - rs_on_overlap = 1; - (void) md_unit_readerlock(ui); - mutex_enter(&un->un_owner_mx); - /* - * Check to see if we have obtained ownership - * while waiting for overlaps. If we have, remove - * the resync_region entry from the overlap tree - */ - if (MD_MN_MIRROR_OWNER(un) && - (ps1->ps_flags & MD_MPS_ON_OVERLAP)) { - mirror_overlap_tree_remove(ps1); - rs_on_overlap = 0; - } - } - } - mutex_exit(&un->un_owner_mx); - - - /* - * following keep write after read from writing to the - * source in the case where it all came from one place - */ - if (flag & MD_STR_WAR) { - int abort_write = 0; - /* - * We are perfoming a write-after-read. This is either as a - * result of a resync read or as a result of a read in a - * dirty resync region when the optimized resync is not - * complete. If in a MN set and a resync generated i/o, - * if the current block is not in the current - * resync region terminate the write as another node must have - * completed this resync region - */ - if ((MD_MNSET_SETNO(MD_UN2SET(un))) && - (!(flag & MD_STR_DIRTY_RD))) { - if (!IN_RESYNC_REGION(un, ps)) - abort_write = 1; - } - if ((select_write_after_read_units(un, ps) == 0) || - (abort_write)) { -#ifdef DEBUG - if (mirror_debug_flag) - printf("Abort resync write on %x, block %lld\n", - MD_SID(un), ps->ps_firstblk); -#endif - if (ps->ps_flags & MD_MPS_ON_OVERLAP) - mirror_overlap_tree_remove(ps); - kmem_cache_free(mirror_parent_cache, ps); - md_kstat_waitq_exit(ui); - md_unit_readerexit(ui); - md_biodone(pb); - return; - } - } else { - select_write_units(un, ps); - - /* Drop readerlock to avoid deadlock */ - md_unit_readerexit(ui); - wait_for_overlaps(ps, MD_OVERLAP_NO_REPEAT); - un = md_unit_readerlock(ui); - /* - * For a MN set with an ABR write, if we are now the - * owner and we have a resync region in the overlap - * tree, remove the entry from overlaps and retry the write. - */ - - if (MD_MNSET_SETNO(setno) && - ((ui->ui_tstate & MD_ABR_CAP) || (flag & MD_STR_ABR))) { - mutex_enter(&un->un_owner_mx); - if (((MD_MN_MIRROR_OWNER(un))) && rs_on_overlap) { - mirror_overlap_tree_remove(ps); - md_kstat_waitq_exit(ui); - mutex_exit(&un->un_owner_mx); - md_unit_readerexit(ui); - daemon_request(&md_mirror_daemon, daemon_io, - (daemon_queue_t *)ps, REQ_OLD); - return; - } - mutex_exit(&un->un_owner_mx); - } - } - - /* - * For Multinode mirrors with no owner and a Resync Region (not ABR) - * we need to become the mirror owner before continuing with the - * write(). For ABR mirrors we check that we 'own' the resync if - * we're in write-after-read mode. We do this _after_ ensuring that - * there are no overlaps to ensure that once we know that we are - * the owner, the readerlock will not be released until the write is - * complete. As a change of ownership in a MN set requires the - * writerlock, this ensures that ownership cannot be changed until - * the write is complete. - */ - if (MD_MNSET_SETNO(setno) && (!((ui->ui_tstate & MD_ABR_CAP) || - (flag & MD_STR_ABR)) || (flag & MD_STR_WAR))) { - if (MD_MN_NO_MIRROR_OWNER(un)) { - if (ps->ps_flags & MD_MPS_ON_OVERLAP) - mirror_overlap_tree_remove(ps); - md_kstat_waitq_exit(ui); - ASSERT(!(flag & MD_STR_WAR)); - md_unit_readerexit(ui); - daemon_request(&md_mirror_daemon, become_owner, - (daemon_queue_t *)ps, REQ_OLD); - return; - } - } - - /* - * Mark resync region if mirror has a Resync Region _and_ we are not - * a resync initiated write(). Don't mark region if we're flagged as - * an ABR write. - */ - if (!((ui->ui_tstate & MD_ABR_CAP) || (flag & MD_STR_ABR)) && - !(flag & MD_STR_WAR)) { - if (mirror_mark_resync_region(un, ps->ps_firstblk, - ps->ps_lastblk, md_mn_mynode_id)) { - pb->b_flags |= B_ERROR; - pb->b_resid = pb->b_bcount; - if (ps->ps_flags & MD_MPS_ON_OVERLAP) - mirror_overlap_tree_remove(ps); - kmem_cache_free(mirror_parent_cache, ps); - md_kstat_waitq_exit(ui); - md_unit_readerexit(ui); - md_biodone(pb); - return; - } - } - - ps->ps_childbflags = pb->b_flags | B_WRITE; - ps->ps_childbflags &= ~B_READ; - if (flag & MD_STR_MAPPED) - ps->ps_childbflags &= ~B_PAGEIO; - - if (!(flag & MD_STR_NOTTOP) && panicstr) - /* Disable WOW and don't free ps */ - ps->ps_flags |= (MD_MPS_WOW|MD_MPS_DONTFREE); - - md_kstat_waitq_to_runq(ui); - - /* - * Treat Raw and Direct I/O as Write-on-Write always - */ - - if (!(md_mirror_wow_flg & WOW_DISABLE) && - (md_mirror_wow_flg & WOW_PHYS_ENABLE) && - (pb->b_flags & B_PHYS) && - !(ps->ps_flags & MD_MPS_WOW)) { - if (ps->ps_flags & MD_MPS_ON_OVERLAP) - mirror_overlap_tree_remove(ps); - md_unit_readerexit(ui); - daemon_request(&md_mstr_daemon, handle_wow, - (daemon_queue_t *)ps, REQ_OLD); - return; - } - - ps->ps_frags = 1; - do { - cs = kmem_cache_alloc(mirror_child_cache, MD_ALLOCFLAGS); - mirror_child_init(cs); - cb = &cs->cs_buf; - more = mirror_map_write(un, cs, ps, (flag & MD_STR_WAR)); - - /* - * This handles the case where we're requesting - * a write to block 0 on a label partition. (more < 0) - * means that the request size was smaller than the - * size of the label. If so this request is done. - */ - if (more < 0) { - if (ps->ps_flags & MD_MPS_ON_OVERLAP) - mirror_overlap_tree_remove(ps); - md_kstat_runq_exit(ui); - kmem_cache_free(mirror_child_cache, cs); - kmem_cache_free(mirror_parent_cache, ps); - md_unit_readerexit(ui); - md_biodone(pb); - return; - } - if (more) { - mutex_enter(&ps->ps_mx); - ps->ps_frags++; - mutex_exit(&ps->ps_mx); - } - md_call_strategy(cb, flag, private); - } while (more); - - if (!(flag & MD_STR_NOTTOP) && panicstr) { - while (!(ps->ps_flags & MD_MPS_DONE)) { - md_daemon(1, &md_done_daemon); - drv_usecwait(10); - } - kmem_cache_free(mirror_parent_cache, ps); - } -} - -static void -mirror_read_strategy(buf_t *pb, int flag, void *private) -{ - md_mps_t *ps; - md_mcs_t *cs; - size_t more; - mm_unit_t *un; - mdi_unit_t *ui; - size_t current_count; - diskaddr_t current_blkno; - off_t current_offset; - buf_t *cb; /* child buf pointer */ - set_t setno; - - ui = MDI_UNIT(getminor(pb->b_edev)); - - md_kstat_waitq_enter(ui); - - un = (mm_unit_t *)md_unit_readerlock(ui); - - if (!(flag & MD_STR_NOTTOP)) { - if (md_checkbuf(ui, (md_unit_t *)un, pb)) { - md_kstat_waitq_exit(ui); - return; - } - } - - if (private == NULL) { - ps = kmem_cache_alloc(mirror_parent_cache, MD_ALLOCFLAGS); - mirror_parent_init(ps); - } else { - ps = private; - private = NULL; - } - - if (flag & MD_STR_MAPPED) - ps->ps_flags |= MD_MPS_MAPPED; - if (flag & MD_NOBLOCK) - ps->ps_flags |= MD_MPS_NOBLOCK; - if (flag & MD_STR_WMUPDATE) - ps->ps_flags |= MD_MPS_WMUPDATE; - - /* - * Check to see if this is a DMR driven read. If so we need to use the - * specified side (in un->un_dmr_last_read) for the source of the data. - */ - if (flag & MD_STR_DMR) - ps->ps_flags |= MD_MPS_DMR; - - /* - * Save essential information from the original buffhdr - * in the md_save structure. - */ - ps->ps_un = un; - ps->ps_ui = ui; - ps->ps_bp = pb; - ps->ps_addr = pb->b_un.b_addr; - ps->ps_firstblk = pb->b_lblkno; - ps->ps_lastblk = pb->b_lblkno + lbtodb(pb->b_bcount) - 1; - ps->ps_changecnt = un->un_changecnt; - - current_count = btodb(pb->b_bcount); - current_blkno = pb->b_lblkno; - current_offset = 0; - - /* - * If flag has MD_STR_WAR set this means that the read is issued by a - * resync thread which may or may not be an optimised resync. - * - * If MD_UN_OPT_NOT_DONE is set this means that the optimized resync - * code has not completed; either a resync has not started since snarf, - * or there is an optimized resync in progress. - * - * We need to generate a write after this read in the following two - * cases, - * - * 1. Any Resync-Generated read - * - * 2. Any read to a DIRTY REGION if there is an optimized resync - * pending or in progress. - * - * The write after read is done in these cases to ensure that all sides - * of the mirror are in sync with the read data and that it is not - * possible for an application to read the same block multiple times - * and get different data. - * - * This would be possible if the block was in a dirty region. - * - * If we're performing a directed read we don't write the data out as - * the application is responsible for restoring the mirror to a known - * state. - */ - if (((MD_STATUS(un) & MD_UN_OPT_NOT_DONE) || (flag & MD_STR_WAR)) && - !(flag & MD_STR_DMR)) { - size_t start_rr, i, end_rr; - int region_dirty = 1; - - /* - * We enter here under three circumstances, - * - * MD_UN_OPT_NOT_DONE MD_STR_WAR - * 0 1 - * 1 0 - * 1 1 - * - * To be optimal we only care to explicitly check for dirty - * regions in the second case since if MD_STR_WAR is set we - * always do the write after read. - */ - if (!(flag & MD_STR_WAR)) { - BLK_TO_RR(end_rr, ps->ps_lastblk, un); - BLK_TO_RR(start_rr, ps->ps_firstblk, un); - - for (i = start_rr; i <= end_rr; i++) - if ((region_dirty = IS_KEEPDIRTY(i, un)) != 0) - break; - } - - if ((region_dirty) && - !(md_get_setstatus(MD_UN2SET(un)) & MD_SET_STALE)) { - ps->ps_call = write_after_read; - /* - * Mark this as a RESYNC_READ in ps_flags. - * This is used if the read fails during a - * resync of a 3-way mirror to ensure that - * the retried read to the remaining - * good submirror has MD_STR_WAR set. This - * is needed to ensure that the resync write - * (write-after-read) takes place. - */ - ps->ps_flags |= MD_MPS_RESYNC_READ; - - /* - * If MD_STR_FLAG_ERR is set in the flags we - * set MD_MPS_FLAG_ERROR so that an error on the resync - * write (issued by write_after_read) will be flagged - * to the biowait'ing resync thread. This allows us to - * avoid issuing further resync requests to a device - * that has had a write failure. - */ - if (flag & MD_STR_FLAG_ERR) - ps->ps_flags |= MD_MPS_FLAG_ERROR; - - setno = MD_UN2SET(un); - /* - * Drop the readerlock to avoid - * deadlock - */ - md_unit_readerexit(ui); - wait_for_overlaps(ps, MD_OVERLAP_NO_REPEAT); - un = md_unit_readerlock(ui); - /* - * Ensure that we are owner - */ - if (MD_MNSET_SETNO(setno)) { - /* - * For a non-resync read that requires a - * write-after-read to be done, set a flag - * in the parent structure, so that the - * write_strategy routine can omit the - * test that the write is still within the - * resync region - */ - if (!(flag & MD_STR_WAR)) - ps->ps_flags |= MD_MPS_DIRTY_RD; - - /* - * Before reading the buffer, see if - * there is an owner. - */ - if (MD_MN_NO_MIRROR_OWNER(un)) { - ps->ps_call = NULL; - mirror_overlap_tree_remove(ps); - md_kstat_waitq_exit(ui); - md_unit_readerexit(ui); - daemon_request( - &md_mirror_daemon, - become_owner, - (daemon_queue_t *)ps, - REQ_OLD); - return; - } - /* - * For a resync read, check to see if I/O is - * outside of the current resync region, or - * the resync has finished. If so - * just terminate the I/O - */ - if ((flag & MD_STR_WAR) && - (!(un->c.un_status & MD_UN_WAR) || - (!IN_RESYNC_REGION(un, ps)))) { -#ifdef DEBUG - if (mirror_debug_flag) - printf("Abort resync read " - "%x: %lld\n", - MD_SID(un), - ps->ps_firstblk); -#endif - mirror_overlap_tree_remove(ps); - kmem_cache_free(mirror_parent_cache, - ps); - md_kstat_waitq_exit(ui); - md_unit_readerexit(ui); - md_biodone(pb); - return; - } - } - } - } - - if (flag & MD_STR_DMR) { - ps->ps_call = directed_read_done; - } - - if (!(flag & MD_STR_NOTTOP) && panicstr) - ps->ps_flags |= MD_MPS_DONTFREE; - - md_kstat_waitq_to_runq(ui); - - ps->ps_frags++; - do { - cs = kmem_cache_alloc(mirror_child_cache, MD_ALLOCFLAGS); - mirror_child_init(cs); - cb = &cs->cs_buf; - cs->cs_ps = ps; - - cb = md_bioclone(pb, current_offset, current_count, NODEV, - current_blkno, mirror_done, cb, KM_NOSLEEP); - - more = mirror_map_read(ps, cs, current_blkno, - (u_longlong_t)current_count); - if (more) { - mutex_enter(&ps->ps_mx); - ps->ps_frags++; - mutex_exit(&ps->ps_mx); - } - - /* - * Do these calculations now, - * so that we pickup a valid b_bcount from the chld_bp. - */ - current_count -= more; - current_offset += cb->b_bcount; - current_blkno += more; - md_call_strategy(cb, flag, private); - } while (more); - - if (!(flag & MD_STR_NOTTOP) && panicstr) { - while (!(ps->ps_flags & MD_MPS_DONE)) { - md_daemon(1, &md_done_daemon); - drv_usecwait(10); - } - kmem_cache_free(mirror_parent_cache, ps); - } -} - -void -md_mirror_strategy(buf_t *bp, int flag, void *private) -{ - set_t setno = MD_MIN2SET(getminor(bp->b_edev)); - - /* - * When doing IO to a multi owner meta device, check if set is halted. - * We do this check without the needed lock held, for performance - * reasons. - * If an IO just slips through while the set is locked via an - * MD_MN_SUSPEND_SET, we don't care about it. - * Only check for suspension if we are a top-level i/o request - * (MD_STR_NOTTOP is cleared in 'flag'). - */ - if ((md_set[setno].s_status & (MD_SET_HALTED | MD_SET_MNSET)) == - (MD_SET_HALTED | MD_SET_MNSET)) { - if ((flag & MD_STR_NOTTOP) == 0) { - mutex_enter(&md_mx); - /* Here we loop until the set is no longer halted */ - while (md_set[setno].s_status & MD_SET_HALTED) { - cv_wait(&md_cv, &md_mx); - } - mutex_exit(&md_mx); - } - } - - if ((flag & MD_IO_COUNTED) == 0) { - if ((flag & MD_NOBLOCK) == 0) { - if (md_inc_iocount(setno) != 0) { - bp->b_flags |= B_ERROR; - bp->b_error = ENXIO; - bp->b_resid = bp->b_bcount; - biodone(bp); - return; - } - } else { - md_inc_iocount_noblock(setno); - } - } - - if (bp->b_flags & B_READ) - mirror_read_strategy(bp, flag, private); - else - mirror_write_strategy(bp, flag, private); -} - -/* - * mirror_directed_read: - * -------------------- - * Entry-point for the DKIOCDMR ioctl. We issue a read to a specified sub-mirror - * so that the application can determine what (if any) resync needs to be - * performed. The data is copied out to the user-supplied buffer. - * - * Parameters: - * mdev - dev_t for the mirror device - * vdr - directed read parameters specifying location and submirror - * to perform the read from - * mode - used to ddi_copyout() any resulting data from the read - * - * Returns: - * 0 success - * !0 error code - * EINVAL - invalid request format - */ -int -mirror_directed_read(dev_t mdev, vol_directed_rd_t *vdr, int mode) -{ - buf_t *bp; - minor_t mnum = getminor(mdev); - mdi_unit_t *ui = MDI_UNIT(mnum); - mm_unit_t *un; - mm_submirror_t *sm; - char *sm_nm; - uint_t next_side; - void *kbuffer; - - if (ui == NULL) - return (ENXIO); - - if (!(vdr->vdr_flags & DKV_DMR_NEXT_SIDE)) { - return (EINVAL); - } - - /* Check for aligned block access. We disallow non-aligned requests. */ - if (vdr->vdr_offset % DEV_BSIZE) { - return (EINVAL); - } - - /* - * Allocate kernel buffer for target of read(). If we had a reliable - * (sorry functional) DDI this wouldn't be needed. - */ - kbuffer = kmem_alloc(vdr->vdr_nbytes, KM_NOSLEEP); - if (kbuffer == NULL) { - cmn_err(CE_WARN, "mirror_directed_read: couldn't allocate %lx" - " bytes\n", vdr->vdr_nbytes); - return (ENOMEM); - } - - bp = getrbuf(KM_SLEEP); - - bp->b_un.b_addr = kbuffer; - bp->b_flags = B_READ; - bp->b_bcount = vdr->vdr_nbytes; - bp->b_lblkno = lbtodb(vdr->vdr_offset); - bp->b_edev = mdev; - - un = md_unit_readerlock(ui); - - /* - * If DKV_SIDE_INIT is set we need to determine the first available - * side to start reading from. If it isn't set we increment to the - * next readable submirror. - * If there are no readable submirrors we error out with DKV_DMR_ERROR. - * Note: we check for a readable submirror on completion of the i/o so - * we should _always_ have one available. If this becomes unavailable - * we have missed the 'DKV_DMR_DONE' opportunity. This could happen if - * a metadetach is made between the completion of one DKIOCDMR ioctl - * and the start of the next (i.e. a sys-admin 'accident' occurred). - * The chance of this is small, but not non-existent. - */ - if (vdr->vdr_side == DKV_SIDE_INIT) { - next_side = 0; - } else { - next_side = vdr->vdr_side + 1; - } - while ((next_side < NMIRROR) && - !SUBMIRROR_IS_READABLE(un, next_side)) - next_side++; - if (next_side >= NMIRROR) { - vdr->vdr_flags |= DKV_DMR_ERROR; - freerbuf(bp); - vdr->vdr_bytesread = 0; - md_unit_readerexit(ui); - return (0); - } - - /* Set the side to read from */ - un->un_dmr_last_read = next_side; - - md_unit_readerexit(ui); - - /* - * Save timestamp for verification purposes. Can be read by debugger - * to verify that this ioctl has been executed and to find the number - * of DMR reads and the time of the last DMR read. - */ - uniqtime(&mirror_dmr_stats.dmr_timestamp); - mirror_dmr_stats.dmr_count++; - - /* Issue READ request and wait for completion */ - mirror_read_strategy(bp, MD_STR_DMR|MD_NOBLOCK|MD_STR_NOTTOP, NULL); - - mutex_enter(&un->un_dmr_mx); - cv_wait(&un->un_dmr_cv, &un->un_dmr_mx); - mutex_exit(&un->un_dmr_mx); - - /* - * Check to see if we encountered an error during the read. If so we - * can make no guarantee about any possibly returned data. - */ - if ((bp->b_flags & B_ERROR) == 0) { - vdr->vdr_flags &= ~DKV_DMR_ERROR; - if (bp->b_resid) { - vdr->vdr_flags |= DKV_DMR_SHORT; - vdr->vdr_bytesread = vdr->vdr_nbytes - bp->b_resid; - } else { - vdr->vdr_flags |= DKV_DMR_SUCCESS; - vdr->vdr_bytesread = vdr->vdr_nbytes; - } - /* Copy the data read back out to the user supplied buffer */ - if (ddi_copyout(kbuffer, vdr->vdr_data, vdr->vdr_bytesread, - mode)) { - kmem_free(kbuffer, vdr->vdr_nbytes); - return (EFAULT); - } - - } else { - /* Error out with DKV_DMR_ERROR */ - vdr->vdr_flags |= DKV_DMR_ERROR; - vdr->vdr_flags &= ~(DKV_DMR_SUCCESS|DKV_DMR_SHORT|DKV_DMR_DONE); - } - /* - * Update the DMR parameters with the side and name of submirror that - * we have just read from (un->un_dmr_last_read) - */ - un = md_unit_readerlock(ui); - - vdr->vdr_side = un->un_dmr_last_read; - sm = &un->un_sm[un->un_dmr_last_read]; - sm_nm = md_shortname(md_getminor(sm->sm_dev)); - - (void) strncpy(vdr->vdr_side_name, sm_nm, sizeof (vdr->vdr_side_name)); - - /* - * Determine if we've completed the read cycle. This is true iff the - * next computed submirror (side) equals or exceeds NMIRROR. We cannot - * use un_nsm as we need to handle a sparse array of submirrors (which - * can occur if a submirror is metadetached). - */ - next_side = un->un_dmr_last_read + 1; - while ((next_side < NMIRROR) && - !SUBMIRROR_IS_READABLE(un, next_side)) - next_side++; - if (next_side >= NMIRROR) { - /* We've finished */ - vdr->vdr_flags |= DKV_DMR_DONE; - } - - md_unit_readerexit(ui); - freerbuf(bp); - kmem_free(kbuffer, vdr->vdr_nbytes); - - return (0); -} - -/* - * mirror_resync_message: - * --------------------- - * Handle the multi-node resync messages that keep all nodes within a given - * disk-set in sync with their view of a mirror's resync status. - * - * The message types dealt with are: - * MD_MN_MSG_RESYNC_STARTING - start a resync thread for a unit - * MD_MN_MSG_RESYNC_NEXT - specified next region to be resynced - * MD_MN_MSG_RESYNC_FINISH - stop the resync thread for a unit - * MD_MN_MSG_RESYNC_PHASE_DONE - end of a resync phase, opt, submirror or comp - * - * Returns: - * 0 Success - * >0 Failure error number - */ -int -mirror_resync_message(md_mn_rs_params_t *p, IOLOCK *lockp) -{ - mdi_unit_t *ui; - mm_unit_t *un; - set_t setno; - int is_ABR; - int smi; - int ci; - sm_state_t state; - int broke_out; - mm_submirror_t *sm; - mm_submirror_ic_t *smic; - md_m_shared_t *shared; - md_error_t mde = mdnullerror; - md_mps_t *ps; - int rs_active; - int rr, rr_start, rr_end; - - /* Check that the given device is part of a multi-node set */ - setno = MD_MIN2SET(p->mnum); - if (setno >= md_nsets) { - return (ENXIO); - } - if (!MD_MNSET_SETNO(setno)) { - return (EINVAL); - } - - if ((un = mirror_getun(p->mnum, &p->mde, NO_LOCK, NULL)) == NULL) - return (EINVAL); - if ((ui = MDI_UNIT(p->mnum)) == NULL) - return (EINVAL); - is_ABR = (ui->ui_tstate & MD_ABR_CAP); - - /* Obtain the current resync status */ - (void) md_ioctl_readerlock(lockp, ui); - rs_active = (MD_STATUS(un) & MD_UN_RESYNC_ACTIVE) ? 1 : 0; - md_ioctl_readerexit(lockp); - - switch ((md_mn_msgtype_t)p->msg_type) { - case MD_MN_MSG_RESYNC_STARTING: - /* Start the resync thread for the mirror */ - (void) mirror_resync_unit(p->mnum, NULL, &p->mde, lockp); - break; - - case MD_MN_MSG_RESYNC_NEXT: - /* - * We have to release any previously marked overlap regions - * so that i/o can resume. Then we need to block the region - * from [rs_start..rs_start+rs_size) * so that no i/o is issued. - * Update un_rs_resync_done and un_rs_resync_2_do. - */ - (void) md_ioctl_readerlock(lockp, ui); - /* - * Ignore the message if there is no active resync thread or - * if it is for a resync type that we have already completed. - * un_resync_completed is set to the last resync completed - * when processing a PHASE_DONE message. - */ - if (!rs_active || (p->rs_type == un->un_resync_completed)) - break; - /* - * If this message is for the same resync and is for an earlier - * resync region, just ignore it. This can only occur if this - * node has progressed on to the next resync region before - * we receive this message. This can occur if the class for - * this message is busy and the originator has to retry thus - * allowing this node to move onto the next resync_region. - */ - if ((p->rs_type == un->un_rs_type) && - (p->rs_start < un->un_resync_startbl)) - break; - ps = un->un_rs_prev_overlap; - - /* Allocate previous overlap reference if needed */ - if (ps == NULL) { - ps = kmem_cache_alloc(mirror_parent_cache, - MD_ALLOCFLAGS); - ps->ps_un = un; - ps->ps_ui = ui; - ps->ps_firstblk = 0; - ps->ps_lastblk = 0; - ps->ps_flags = 0; - md_ioctl_readerexit(lockp); - (void) md_ioctl_writerlock(lockp, ui); - un->un_rs_prev_overlap = ps; - md_ioctl_writerexit(lockp); - } else - md_ioctl_readerexit(lockp); - - if (p->rs_originator != md_mn_mynode_id) { - /* - * Clear our un_resync_bm for the regions completed. - * The owner (originator) will take care of itself. - */ - BLK_TO_RR(rr_end, ps->ps_lastblk, un); - BLK_TO_RR(rr_start, p->rs_start, un); - if (ps->ps_lastblk && rr_end < rr_start) { - BLK_TO_RR(rr_start, ps->ps_firstblk, un); - mutex_enter(&un->un_resync_mx); - /* - * Update our resync bitmap to reflect that - * another node has synchronized this range. - */ - for (rr = rr_start; rr <= rr_end; rr++) { - CLR_KEEPDIRTY(rr, un); - } - mutex_exit(&un->un_resync_mx); - } - - /* - * On all but the originating node, first update - * the resync state, then unblock the previous - * region and block the next one. No need - * to do this if the region is already blocked. - * Update the submirror state and flags from the - * originator. This keeps the cluster in sync with - * regards to the resync status. - */ - - (void) md_ioctl_writerlock(lockp, ui); - un->un_rs_resync_done = p->rs_done; - un->un_rs_resync_2_do = p->rs_2_do; - un->un_rs_type = p->rs_type; - un->un_resync_startbl = p->rs_start; - md_ioctl_writerexit(lockp); - /* - * Use un_owner_mx to ensure that an ownership change - * cannot happen at the same time as this message - */ - mutex_enter(&un->un_owner_mx); - if (MD_MN_MIRROR_OWNER(un)) { - ps->ps_firstblk = p->rs_start; - ps->ps_lastblk = ps->ps_firstblk + - p->rs_size - 1; - } else { - if ((ps->ps_firstblk != p->rs_start) || - (ps->ps_lastblk != p->rs_start + - p->rs_size - 1)) { - /* Remove previous overlap range */ - if (ps->ps_flags & MD_MPS_ON_OVERLAP) - mirror_overlap_tree_remove(ps); - - ps->ps_firstblk = p->rs_start; - ps->ps_lastblk = ps->ps_firstblk + - p->rs_size - 1; - - mutex_exit(&un->un_owner_mx); - /* Block this range from all i/o. */ - if (ps->ps_firstblk != 0 || - ps->ps_lastblk != 0) - wait_for_overlaps(ps, - MD_OVERLAP_ALLOW_REPEAT); - mutex_enter(&un->un_owner_mx); - /* - * Check to see if we have obtained - * ownership while waiting for - * overlaps. If we have, remove - * the resync_region entry from the - * overlap tree - */ - if (MD_MN_MIRROR_OWNER(un) && - (ps->ps_flags & MD_MPS_ON_OVERLAP)) - mirror_overlap_tree_remove(ps); - } - } - mutex_exit(&un->un_owner_mx); - - /* - * If this is the first RESYNC_NEXT message (i.e. - * MD_MN_RS_FIRST_RESYNC_NEXT set in p->rs_flags), - * issue RESYNC_START NOTIFY event - */ - if (p->rs_flags & MD_MN_RS_FIRST_RESYNC_NEXT) { - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_RESYNC_START, - SVM_TAG_METADEVICE, MD_UN2SET(un), - MD_SID(un)); - } - - /* Ensure that our local resync thread is running */ - if (un->un_rs_thread == NULL) { - (void) mirror_resync_unit(p->mnum, NULL, - &p->mde, lockp); - } - } - - break; - case MD_MN_MSG_RESYNC_FINISH: - /* - * Complete the resync by stopping the resync thread. - * Also release the previous overlap region field. - * Update the resync_progress_thread by cv_signal'ing it so - * that we mark the end of the resync as soon as possible. This - * stops an unnecessary delay should be panic after resync - * completion. - */ -#ifdef DEBUG - if (!rs_active) { - if (mirror_debug_flag) - printf("RESYNC_FINISH (mnum = %x), " - "Resync *NOT* active", - p->mnum); - } -#endif - - if ((un->c.un_status & MD_UN_RESYNC_ACTIVE) && - (p->rs_originator != md_mn_mynode_id)) { - mutex_enter(&un->un_rs_thread_mx); - un->c.un_status &= ~MD_UN_RESYNC_CANCEL; - un->un_rs_thread_flags |= MD_RI_SHUTDOWN; - un->un_rs_thread_flags &= - ~(MD_RI_BLOCK|MD_RI_BLOCK_OWNER); - cv_signal(&un->un_rs_thread_cv); - mutex_exit(&un->un_rs_thread_mx); - } - if (is_ABR) { - /* Resync finished, if ABR set owner to NULL */ - mutex_enter(&un->un_owner_mx); - un->un_mirror_owner = 0; - mutex_exit(&un->un_owner_mx); - } - (void) md_ioctl_writerlock(lockp, ui); - ps = un->un_rs_prev_overlap; - if (ps != NULL) { - /* Remove previous overlap range */ - if (ps->ps_flags & MD_MPS_ON_OVERLAP) - mirror_overlap_tree_remove(ps); - /* - * Release the overlap range reference - */ - un->un_rs_prev_overlap = NULL; - kmem_cache_free(mirror_parent_cache, - ps); - } - md_ioctl_writerexit(lockp); - - /* Mark the resync as complete in the metadb */ - un->un_rs_resync_done = p->rs_done; - un->un_rs_resync_2_do = p->rs_2_do; - un->un_rs_type = p->rs_type; - mutex_enter(&un->un_rs_progress_mx); - cv_signal(&un->un_rs_progress_cv); - mutex_exit(&un->un_rs_progress_mx); - - un = md_ioctl_writerlock(lockp, ui); - un->c.un_status &= ~MD_UN_RESYNC_ACTIVE; - /* Deal with any pending grow_unit */ - if (un->c.un_status & MD_UN_GROW_PENDING) { - if ((mirror_grow_unit(un, &mde) != 0) || - (! mdismderror(&mde, MDE_GROW_DELAYED))) { - un->c.un_status &= ~MD_UN_GROW_PENDING; - } - } - md_ioctl_writerexit(lockp); - break; - - case MD_MN_MSG_RESYNC_PHASE_DONE: - /* - * A phase of the resync, optimized. component or - * submirror is complete. Update mirror status. - * If the flag CLEAR_OPT_NOT_DONE is set, it means that the - * mirror owner is peforming a resync. If we have just snarfed - * this set, then we must clear any of the flags set at snarf - * time by unit_setup_resync(). - * Note that unit_setup_resync() sets up these flags to - * indicate that an optimized resync is required. These flags - * need to be reset because if we get here, the mirror owner - * will have handled the optimized resync. - * The flags that must be cleared are MD_UN_OPT_NOT_DONE and - * MD_UN_WAR. In addition, for each submirror, - * MD_SM_RESYNC_TARGET must be cleared and SMS_OFFLINE_RESYNC - * set to SMS_OFFLINE. - */ -#ifdef DEBUG - if (mirror_debug_flag) - printf("phase done mess received from %d, mnum=%x," - "type=%x, flags=%x\n", p->rs_originator, p->mnum, - p->rs_type, p->rs_flags); -#endif - /* - * Ignore the message if there is no active resync thread. - */ - if (!rs_active) - break; - - broke_out = p->rs_flags & MD_MN_RS_ERR; - switch (RS_TYPE(p->rs_type)) { - case MD_RS_OPTIMIZED: - un = md_ioctl_writerlock(lockp, ui); - if (p->rs_flags & MD_MN_RS_CLEAR_OPT_NOT_DONE) { - /* If we are originator, just clear rs_type */ - if (p->rs_originator == md_mn_mynode_id) { - SET_RS_TYPE_NONE(un->un_rs_type); - md_ioctl_writerexit(lockp); - break; - } - /* - * If CLEAR_OPT_NOT_DONE is set, only clear the - * flags if OPT_NOT_DONE is set *and* rs_type - * is MD_RS_NONE. - */ - if ((un->c.un_status & MD_UN_OPT_NOT_DONE) && - (RS_TYPE(un->un_rs_type) == MD_RS_NONE)) { - /* No resync in progress */ - un->c.un_status &= ~MD_UN_OPT_NOT_DONE; - un->c.un_status &= ~MD_UN_WAR; - } else { - /* - * We are in the middle of an - * optimized resync and this message - * should be ignored. - */ - md_ioctl_writerexit(lockp); - break; - } - } else { - /* - * This is the end of an optimized resync, - * clear the OPT_NOT_DONE and OFFLINE_SM flags - */ - - un->c.un_status &= ~MD_UN_KEEP_DIRTY; - if (!broke_out) - un->c.un_status &= ~MD_UN_WAR; - - /* - * Clear our un_resync_bm for the regions - * completed. The owner (originator) will - * take care of itself. - */ - if (p->rs_originator != md_mn_mynode_id && - (ps = un->un_rs_prev_overlap) != NULL) { - BLK_TO_RR(rr_start, ps->ps_firstblk, - un); - BLK_TO_RR(rr_end, ps->ps_lastblk, un); - mutex_enter(&un->un_resync_mx); - for (rr = rr_start; rr <= rr_end; - rr++) { - CLR_KEEPDIRTY(rr, un); - } - mutex_exit(&un->un_resync_mx); - } - } - - /* - * Set resync_completed to last resync type and then - * clear resync_type to indicate no resync in progress - */ - un->un_resync_completed = un->un_rs_type; - SET_RS_TYPE_NONE(un->un_rs_type); - - /* - * If resync is as a result of a submirror ONLINE, - * reset the submirror state to SMS_RUNNING if the - * resync was ok else set back to SMS_OFFLINE. - */ - for (smi = 0; smi < NMIRROR; smi++) { - un->un_sm[smi].sm_flags &= - ~MD_SM_RESYNC_TARGET; - if (SMS_BY_INDEX_IS(un, smi, - SMS_OFFLINE_RESYNC)) { - if (p->rs_flags & - MD_MN_RS_CLEAR_OPT_NOT_DONE) { - state = SMS_OFFLINE; - } else { - state = (broke_out ? - SMS_OFFLINE : SMS_RUNNING); - } - mirror_set_sm_state( - &un->un_sm[smi], - &un->un_smic[smi], state, - broke_out); - mirror_commit(un, NO_SUBMIRRORS, - 0); - } - /* - * If we still have an offline submirror, reset - * the OFFLINE_SM flag in the mirror status - */ - if (SMS_BY_INDEX_IS(un, smi, - SMS_OFFLINE)) - un->c.un_status |= - MD_UN_OFFLINE_SM; - } - md_ioctl_writerexit(lockp); - break; - case MD_RS_SUBMIRROR: - un = md_ioctl_writerlock(lockp, ui); - smi = RS_SMI(p->rs_type); - sm = &un->un_sm[smi]; - smic = &un->un_smic[smi]; - /* Clear RESYNC target */ - un->un_sm[smi].sm_flags &= ~MD_SM_RESYNC_TARGET; - /* - * Set resync_completed to last resync type and then - * clear resync_type to indicate no resync in progress - */ - un->un_resync_completed = un->un_rs_type; - SET_RS_TYPE_NONE(un->un_rs_type); - /* - * If the resync completed ok reset the submirror - * state to SMS_RUNNING else reset it to SMS_ATTACHED - */ - state = (broke_out ? - SMS_ATTACHED : SMS_RUNNING); - mirror_set_sm_state(sm, smic, state, broke_out); - un->c.un_status &= ~MD_UN_WAR; - mirror_commit(un, SMI2BIT(smi), 0); - md_ioctl_writerexit(lockp); - break; - case MD_RS_COMPONENT: - un = md_ioctl_writerlock(lockp, ui); - smi = RS_SMI(p->rs_type); - ci = RS_CI(p->rs_type); - sm = &un->un_sm[smi]; - smic = &un->un_smic[smi]; - shared = (md_m_shared_t *) - (*(smic->sm_shared_by_indx)) - (sm->sm_dev, sm, ci); - un->c.un_status &= ~MD_UN_WAR; - /* Clear RESYNC target */ - un->un_sm[smi].sm_flags &= ~MD_SM_RESYNC_TARGET; - /* - * Set resync_completed to last resync type and then - * clear resync_type to indicate no resync in progress - */ - un->un_resync_completed = un->un_rs_type; - SET_RS_TYPE_NONE(un->un_rs_type); - - /* - * If the resync completed ok, set the component state - * to CS_OKAY. - */ - if (broke_out) - shared->ms_flags |= MDM_S_RS_TRIED; - else { - /* - * As we don't transmit the changes, - * no need to drop the lock. - */ - set_sm_comp_state(un, smi, ci, CS_OKAY, 0, - MD_STATE_NO_XMIT, (IOLOCK *)NULL); - } - md_ioctl_writerexit(lockp); - default: - break; - } - /* - * If the purpose of this PHASE_DONE message is just to - * indicate to all other nodes that the optimized resync - * required (OPT_NOT_DONE) flag is to be cleared, there is - * no need to generate a notify event as there has not - * actually been a resync. - */ - if (!(p->rs_flags & MD_MN_RS_CLEAR_OPT_NOT_DONE)) { - if (broke_out) { - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_RESYNC_FAILED, - SVM_TAG_METADEVICE, MD_UN2SET(un), - MD_SID(un)); - } else { - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_RESYNC_DONE, - SVM_TAG_METADEVICE, MD_UN2SET(un), - MD_SID(un)); - } - } - break; - - default: -#ifdef DEBUG - cmn_err(CE_PANIC, "mirror_resync_message: Unknown message type" - " %x\n", p->msg_type); -#endif - return (EINVAL); - } - return (0); -} - -/* Return a -1 if snarf of optimized record failed and set should be released */ -static int -mirror_snarf(md_snarfcmd_t cmd, set_t setno) -{ - mddb_recid_t recid; - int gotsomething; - int all_mirrors_gotten; - mm_unit_t *un; - mddb_type_t typ1; - mddb_de_ic_t *dep; - mddb_rb32_t *rbp; - size_t newreqsize; - mm_unit_t *big_un; - mm_unit32_od_t *small_un; - int retval; - mdi_unit_t *ui; - - if (cmd == MD_SNARF_CLEANUP) { - if (md_get_setstatus(setno) & MD_SET_STALE) - return (0); - - recid = mddb_makerecid(setno, 0); - typ1 = (mddb_type_t)md_getshared_key(setno, - mirror_md_ops.md_driver.md_drivername); - while ((recid = mddb_getnextrec(recid, typ1, MIRROR_REC)) > 0) { - if (mddb_getrecprivate(recid) & MD_PRV_CLEANUP) { - un = (mm_unit_t *)mddb_getrecaddr(recid); - mirror_cleanup(un); - recid = mddb_makerecid(setno, 0); - } - } - return (0); - } - - all_mirrors_gotten = 1; - gotsomething = 0; - - recid = mddb_makerecid(setno, 0); - typ1 = (mddb_type_t)md_getshared_key(setno, - mirror_md_ops.md_driver.md_drivername); - - while ((recid = mddb_getnextrec(recid, typ1, MIRROR_REC)) > 0) { - if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) - continue; - - dep = mddb_getrecdep(recid); - dep->de_flags = MDDB_F_MIRROR; - rbp = dep->de_rb; - - switch (rbp->rb_revision) { - case MDDB_REV_RB: - case MDDB_REV_RBFN: - if ((rbp->rb_private & MD_PRV_CONVD) == 0) { - /* - * This means, we have an old and small - * record and this record hasn't already - * been converted. Before we create an - * incore metadevice from this we have to - * convert it to a big record. - */ - small_un = - (mm_unit32_od_t *)mddb_getrecaddr(recid); - newreqsize = sizeof (mm_unit_t); - big_un = (mm_unit_t *)kmem_zalloc(newreqsize, - KM_SLEEP); - mirror_convert((caddr_t)small_un, - (caddr_t)big_un, SMALL_2_BIG); - kmem_free(small_un, dep->de_reqsize); - - /* - * Update userdata and incore userdata - * incores are at the end of un - */ - dep->de_rb_userdata_ic = big_un; - dep->de_rb_userdata = big_un; - dep->de_icreqsize = newreqsize; - un = big_un; - rbp->rb_private |= MD_PRV_CONVD; - } else { - /* - * Unit already converted, just get the - * record address. - */ - un = (mm_unit_t *)mddb_getrecaddr_resize(recid, - sizeof (*un), 0); - } - un->c.un_revision &= ~MD_64BIT_META_DEV; - break; - case MDDB_REV_RB64: - case MDDB_REV_RB64FN: - /* Big device */ - un = (mm_unit_t *)mddb_getrecaddr_resize(recid, - sizeof (*un), 0); - un->c.un_revision |= MD_64BIT_META_DEV; - un->c.un_flag |= MD_EFILABEL; - break; - } - MDDB_NOTE_FN(rbp->rb_revision, un->c.un_revision); - - /* - * Create minor device node for snarfed entry. - */ - (void) md_create_minor_node(setno, MD_SID(un)); - - if (MD_UNIT(MD_SID(un)) != NULL) { - mddb_setrecprivate(recid, MD_PRV_PENDDEL); - continue; - } - all_mirrors_gotten = 0; - retval = mirror_build_incore(un, 1); - if (retval == 0) { - mddb_setrecprivate(recid, MD_PRV_GOTIT); - md_create_unit_incore(MD_SID(un), &mirror_md_ops, 0); - resync_start_timeout(setno); - gotsomething = 1; - } else { - return (retval); - } - /* - * Set flag to indicate that the mirror has not yet - * been through a reconfig. This flag is used for MN sets - * when determining whether to update the mirror state from - * the Master node. - */ - if (MD_MNSET_SETNO(setno)) { - ui = MDI_UNIT(MD_SID(un)); - ui->ui_tstate |= MD_RESYNC_NOT_DONE; - } - } - - if (!all_mirrors_gotten) - return (gotsomething); - - recid = mddb_makerecid(setno, 0); - while ((recid = mddb_getnextrec(recid, typ1, RESYNC_REC)) > 0) - if (!(mddb_getrecprivate(recid) & MD_PRV_GOTIT)) - mddb_setrecprivate(recid, MD_PRV_PENDDEL); - - return (0); -} - -static int -mirror_halt(md_haltcmd_t cmd, set_t setno) -{ - unit_t i; - mdi_unit_t *ui; - minor_t mnum; - int reset_mirror_flag = 0; - - if (cmd == MD_HALT_CLOSE) - return (0); - - if (cmd == MD_HALT_OPEN) - return (0); - - if (cmd == MD_HALT_UNLOAD) - return (0); - - if (cmd == MD_HALT_CHECK) { - for (i = 0; i < md_nunits; i++) { - mnum = MD_MKMIN(setno, i); - if ((ui = MDI_UNIT(mnum)) == NULL) - continue; - if (ui->ui_opsindex != mirror_md_ops.md_selfindex) - continue; - if (md_unit_isopen(ui)) - return (1); - } - return (0); - } - - if (cmd != MD_HALT_DOIT) - return (1); - - for (i = 0; i < md_nunits; i++) { - mnum = MD_MKMIN(setno, i); - if ((ui = MDI_UNIT(mnum)) == NULL) - continue; - if (ui->ui_opsindex != mirror_md_ops.md_selfindex) - continue; - reset_mirror((mm_unit_t *)MD_UNIT(mnum), mnum, 0); - - /* Set a flag if there is at least one mirror metadevice. */ - reset_mirror_flag = 1; - } - - /* - * Only wait for the global dr_timeout to finish - * - if there are mirror metadevices in this diskset or - * - if this is the local set since an unload of the md_mirror - * driver could follow a successful mirror halt in the local set. - */ - if ((reset_mirror_flag != 0) || (setno == MD_LOCAL_SET)) { - while ((mirror_md_ops.md_head == NULL) && - (mirror_timeout.dr_timeout_id != 0)) - delay(md_hz); - } - - return (0); -} - -/*ARGSUSED3*/ -static int -mirror_open(dev_t *dev, int flag, int otyp, cred_t *cred_p, int md_oflags) -{ - IOLOCK lock; - minor_t mnum = getminor(*dev); - set_t setno; - - /* - * When doing an open of a multi owner metadevice, check to see if this - * node is a starting node and if a reconfig cycle is underway. - * If so, the system isn't sufficiently set up enough to handle the - * open (which involves I/O during sp_validate), so fail with ENXIO. - */ - setno = MD_MIN2SET(mnum); - if ((md_set[setno].s_status & (MD_SET_MNSET | MD_SET_MN_START_RC)) == - (MD_SET_MNSET | MD_SET_MN_START_RC)) { - return (ENXIO); - } - - if (md_oflags & MD_OFLG_FROMIOCTL) { - /* - * This indicates that the caller is an ioctl service routine. - * In this case we initialise our stack-based IOLOCK and pass - * this into the internal open routine. This allows multi-owner - * metadevices to avoid deadlocking if an error is encountered - * during the open() attempt. The failure case is: - * s-p -> mirror -> s-p (with error). Attempting to metaclear - * this configuration would deadlock as the mirror code has to - * send a state-update to the other nodes when it detects the - * failure of the underlying submirror with an errored soft-part - * on it. As there is a class1 message in progress (metaclear) - * set_sm_comp_state() cannot send another class1 message; - * instead we do not send a state_update message as the - * metaclear is distributed and the failed submirror will be - * cleared from the configuration by the metaclear. - */ - IOLOCK_INIT(&lock); - return (mirror_internal_open(getminor(*dev), flag, otyp, - md_oflags, &lock)); - } else { - return (mirror_internal_open(getminor(*dev), flag, otyp, - md_oflags, (IOLOCK *)NULL)); - } -} - - -/*ARGSUSED1*/ -static int -mirror_close(dev_t dev, int flag, int otyp, cred_t *cred_p, int md_cflags) -{ - return (mirror_internal_close(getminor(dev), otyp, md_cflags, - (IOLOCK *)NULL)); -} - - -/* - * This routine dumps memory to the disk. It assumes that the memory has - * already been mapped into mainbus space. It is called at disk interrupt - * priority when the system is in trouble. - * - */ -static int -mirror_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) -{ - mm_unit_t *un; - dev_t mapdev; - int result; - int smi; - int any_succeed = 0; - int save_result = 0; - - /* - * Don't need to grab the unit lock. - * Cause nothing else is suppose to be happenning. - * Also dump is not suppose to sleep. - */ - un = (mm_unit_t *)MD_UNIT(getminor(dev)); - - if ((diskaddr_t)blkno >= un->c.un_total_blocks) - return (EINVAL); - - if ((diskaddr_t)blkno + nblk > un->c.un_total_blocks) - return (EINVAL); - - for (smi = 0; smi < NMIRROR; smi++) { - if (!SUBMIRROR_IS_WRITEABLE(un, smi)) - continue; - mapdev = md_dev64_to_dev(un->un_sm[smi].sm_dev); - result = bdev_dump(mapdev, addr, blkno, nblk); - if (result) - save_result = result; - - if (result == 0) - any_succeed++; - } - - if (any_succeed) - return (0); - - return (save_result); -} - -/* - * NAME: mirror_probe_dev - * - * DESCRITPION: force opens every component of a mirror. - * - * On entry the unit writerlock is held - */ -static int -mirror_probe_dev(mdi_unit_t *ui, minor_t mnum) -{ - int i; - int smi; - int ci; - mm_unit_t *un; - int md_devopen = 0; - set_t setno; - int sm_cnt; - int sm_unavail_cnt; - - if (md_unit_isopen(ui)) - md_devopen++; - - un = MD_UNIT(mnum); - setno = MD_UN2SET(un); - - sm_cnt = 0; - sm_unavail_cnt = 0; - for (i = 0; i < NMIRROR; i++) { - md_dev64_t tmpdev; - mdi_unit_t *sm_ui; - - if (!SMS_BY_INDEX_IS(un, i, SMS_INUSE)) { - continue; - } - - sm_cnt++; - tmpdev = un->un_sm[i].sm_dev; - (void) md_layered_open(mnum, &tmpdev, - MD_OFLG_CONT_ERRS | MD_OFLG_PROBEDEV); - un->un_sm[i].sm_dev = tmpdev; - - sm_ui = MDI_UNIT(getminor(md_dev64_to_dev(tmpdev))); - - /* - * Logic similar to that in mirror_open_all_devs. We set or - * clear the submirror Unavailable bit. - */ - (void) md_unit_writerlock(sm_ui); - if (submirror_unavailable(un, i, 1)) { - sm_ui->ui_tstate |= MD_INACCESSIBLE; - sm_unavail_cnt++; - } else { - sm_ui->ui_tstate &= ~MD_INACCESSIBLE; - } - md_unit_writerexit(sm_ui); - } - - /* - * If all of the submirrors are unavailable, the mirror is also - * unavailable. - */ - if (sm_cnt == sm_unavail_cnt) { - ui->ui_tstate |= MD_INACCESSIBLE; - } else { - ui->ui_tstate &= ~MD_INACCESSIBLE; - } - - /* - * Start checking from probe failures. If failures occur we - * set the appropriate erred state only if the metadevice is in - * use. This is specifically to prevent unnecessary resyncs. - * For instance if the disks were accidentally disconnected when - * the system booted up then until the metadevice is accessed - * (like file system mount) the user can shutdown, recable and - * reboot w/o incurring a potentially huge resync. - */ - - smi = 0; - ci = 0; - while (mirror_geterror(un, &smi, &ci, 1, 1) != 0) { - - if (mirror_other_sources(un, smi, ci, 0) == 1) { - /* - * Note that for a MN set, there is no need to call - * SE_NOTIFY as that is done when processing the - * state change - */ - if (md_devopen) { - /* - * Never called from ioctl context, - * so (IOLOCK *)NULL - */ - set_sm_comp_state(un, smi, ci, CS_LAST_ERRED, - 0, MD_STATE_XMIT, (IOLOCK *)NULL); - if (!MD_MNSET_SETNO(setno)) { - SE_NOTIFY(EC_SVM_STATE, - ESC_SVM_LASTERRED, - SVM_TAG_METADEVICE, setno, - MD_SID(un)); - } - continue; - } else { - (void) mirror_close_all_devs(un, - MD_OFLG_PROBEDEV); - if (!MD_MNSET_SETNO(setno)) { - SE_NOTIFY(EC_SVM_STATE, - ESC_SVM_OPEN_FAIL, - SVM_TAG_METADEVICE, setno, - MD_SID(un)); - } - mirror_openfail_console_info(un, smi, ci); - return (ENXIO); - } - } - - /* - * Note that for a MN set, there is no need to call - * SE_NOTIFY as that is done when processing the - * state change - */ - if (md_devopen) { - /* Never called from ioctl context, so (IOLOCK *)NULL */ - set_sm_comp_state(un, smi, ci, CS_ERRED, 0, - MD_STATE_XMIT, (IOLOCK *)NULL); - if (!MD_MNSET_SETNO(setno)) { - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ERRED, - SVM_TAG_METADEVICE, setno, - MD_SID(un)); - } - } - mirror_openfail_console_info(un, smi, ci); - ci++; - } - - if (MD_MNSET_SETNO(setno)) { - send_poke_hotspares(setno); - } else { - (void) poke_hotspares(); - } - (void) mirror_close_all_devs(un, MD_OFLG_PROBEDEV); - - return (0); -} - - -static int -mirror_imp_set( - set_t setno -) -{ - - mddb_recid_t recid; - int gotsomething, i; - mddb_type_t typ1; - mddb_de_ic_t *dep; - mddb_rb32_t *rbp; - mm_unit32_od_t *un32; - mm_unit_t *un64; - md_dev64_t self_devt; - minor_t *self_id; /* minor needs to be updated */ - md_parent_t *parent_id; /* parent needs to be updated */ - mddb_recid_t *record_id; /* record id needs to be updated */ - mddb_recid_t *optrec_id; - md_dev64_t tmpdev; - - - gotsomething = 0; - - typ1 = (mddb_type_t)md_getshared_key(setno, - mirror_md_ops.md_driver.md_drivername); - recid = mddb_makerecid(setno, 0); - - while ((recid = mddb_getnextrec(recid, typ1, MIRROR_REC)) > 0) { - if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) - continue; - - dep = mddb_getrecdep(recid); - rbp = dep->de_rb; - - switch (rbp->rb_revision) { - case MDDB_REV_RB: - case MDDB_REV_RBFN: - /* - * Small device - */ - un32 = (mm_unit32_od_t *)mddb_getrecaddr(recid); - self_id = &(un32->c.un_self_id); - parent_id = &(un32->c.un_parent); - record_id = &(un32->c.un_record_id); - optrec_id = &(un32->un_rr_dirty_recid); - - for (i = 0; i < un32->un_nsm; i++) { - tmpdev = md_expldev(un32->un_sm[i].sm_dev); - un32->un_sm[i].sm_dev = md_cmpldev - (md_makedevice(md_major, MD_MKMIN(setno, - MD_MIN2UNIT(md_getminor(tmpdev))))); - - if (!md_update_minor(setno, mddb_getsidenum - (setno), un32->un_sm[i].sm_key)) - goto out; - } - break; - case MDDB_REV_RB64: - case MDDB_REV_RB64FN: - un64 = (mm_unit_t *)mddb_getrecaddr(recid); - self_id = &(un64->c.un_self_id); - parent_id = &(un64->c.un_parent); - record_id = &(un64->c.un_record_id); - optrec_id = &(un64->un_rr_dirty_recid); - - for (i = 0; i < un64->un_nsm; i++) { - tmpdev = un64->un_sm[i].sm_dev; - un64->un_sm[i].sm_dev = md_makedevice - (md_major, MD_MKMIN(setno, MD_MIN2UNIT - (md_getminor(tmpdev)))); - - if (!md_update_minor(setno, mddb_getsidenum - (setno), un64->un_sm[i].sm_key)) - goto out; - } - break; - } - - /* - * If this is a top level and a friendly name metadevice, - * update its minor in the namespace. - */ - if ((*parent_id == MD_NO_PARENT) && - ((rbp->rb_revision == MDDB_REV_RBFN) || - (rbp->rb_revision == MDDB_REV_RB64FN))) { - - self_devt = md_makedevice(md_major, *self_id); - if (!md_update_top_device_minor(setno, - mddb_getsidenum(setno), self_devt)) - goto out; - } - - /* - * Update unit with the imported setno - * - */ - mddb_setrecprivate(recid, MD_PRV_GOTIT); - - *self_id = MD_MKMIN(setno, MD_MIN2UNIT(*self_id)); - if (*parent_id != MD_NO_PARENT) - *parent_id = MD_MKMIN(setno, MD_MIN2UNIT(*parent_id)); - *record_id = MAKERECID(setno, DBID(*record_id)); - *optrec_id = MAKERECID(setno, DBID(*optrec_id)); - - gotsomething = 1; - } - -out: - return (gotsomething); -} - -/* - * NAME: mirror_check_offline - * - * DESCRIPTION: return offline_status = 1 if any submirrors are offline - * - * Called from ioctl, so access to MD_UN_OFFLINE_SM in un_status is - * protected by the global ioctl lock as it is only set by the MD_IOCOFFLINE - * ioctl. - */ -int -mirror_check_offline(md_dev64_t dev, int *offline_status) -{ - mm_unit_t *un; - md_error_t mde = mdnullerror; - - if ((un = mirror_getun(getminor(dev), &mde, NO_LOCK, NULL)) == NULL) - return (EINVAL); - *offline_status = 0; - if (un->c.un_status & MD_UN_OFFLINE_SM) - *offline_status = 1; - return (0); -} - -/* - * NAME: mirror_inc_abr_count - * - * DESCRIPTION: increment the count of layered soft parts with ABR set - * - * Called from ioctl, so access to un_abr_count is protected by the global - * ioctl lock. It is only referenced in the MD_IOCOFFLINE ioctl. - */ -int -mirror_inc_abr_count(md_dev64_t dev) -{ - mm_unit_t *un; - md_error_t mde = mdnullerror; - - if ((un = mirror_getun(getminor(dev), &mde, NO_LOCK, NULL)) == NULL) - return (EINVAL); - un->un_abr_count++; - return (0); -} - -/* - * NAME: mirror_dec_abr_count - * - * DESCRIPTION: decrement the count of layered soft parts with ABR set - * - * Called from ioctl, so access to un_abr_count is protected by the global - * ioctl lock. It is only referenced in the MD_IOCOFFLINE ioctl. - */ -int -mirror_dec_abr_count(md_dev64_t dev) -{ - mm_unit_t *un; - md_error_t mde = mdnullerror; - - if ((un = mirror_getun(getminor(dev), &mde, NO_LOCK, NULL)) == NULL) - return (EINVAL); - un->un_abr_count--; - return (0); -} - -static md_named_services_t mirror_named_services[] = { - {(intptr_t (*)()) poke_hotspares, "poke hotspares" }, - {(intptr_t (*)()) mirror_rename_listkids, MDRNM_LIST_URKIDS }, - {mirror_rename_check, MDRNM_CHECK }, - {(intptr_t (*)()) mirror_renexch_update_kids, MDRNM_UPDATE_KIDS }, - {(intptr_t (*)()) mirror_exchange_parent_update_to, - MDRNM_PARENT_UPDATE_TO}, - {(intptr_t (*)()) mirror_exchange_self_update_from_down, - MDRNM_SELF_UPDATE_FROM_DOWN }, - {(intptr_t (*)())mirror_probe_dev, "probe open test" }, - {(intptr_t (*)())mirror_check_offline, MD_CHECK_OFFLINE }, - {(intptr_t (*)())mirror_inc_abr_count, MD_INC_ABR_COUNT }, - {(intptr_t (*)())mirror_dec_abr_count, MD_DEC_ABR_COUNT }, - { NULL, 0 } -}; - -md_ops_t mirror_md_ops = { - mirror_open, /* open */ - mirror_close, /* close */ - md_mirror_strategy, /* strategy */ - NULL, /* print */ - mirror_dump, /* dump */ - NULL, /* read */ - NULL, /* write */ - md_mirror_ioctl, /* mirror_ioctl, */ - mirror_snarf, /* mirror_snarf */ - mirror_halt, /* mirror_halt */ - NULL, /* aread */ - NULL, /* awrite */ - mirror_imp_set, /* import set */ - mirror_named_services -}; - -/* module specific initilization */ -static void -init_init() -{ - md_mirror_mcs_buf_off = sizeof (md_mcs_t) - sizeof (buf_t); - - /* Initialize the parent and child save memory pools */ - mirror_parent_cache = kmem_cache_create("md_mirror_parent", - sizeof (md_mps_t), 0, mirror_parent_constructor, - mirror_parent_destructor, mirror_run_queue, NULL, NULL, - 0); - - mirror_child_cache = kmem_cache_create("md_mirror_child", - sizeof (md_mcs_t) - sizeof (buf_t) + biosize(), 0, - mirror_child_constructor, mirror_child_destructor, - mirror_run_queue, NULL, NULL, 0); - - /* - * Insure wowbuf_size is a multiple of DEV_BSIZE, - * then initialize wowbuf memory pool. - */ - md_wowbuf_size = roundup(md_wowbuf_size, DEV_BSIZE); - if (md_wowbuf_size <= 0) - md_wowbuf_size = 2 * DEV_BSIZE; - if (md_wowbuf_size > (32 * DEV_BSIZE)) - md_wowbuf_size = (32 * DEV_BSIZE); - - md_wowblk_size = md_wowbuf_size + sizeof (wowhdr_t); - mirror_wowblk_cache = kmem_cache_create("md_mirror_wow", - md_wowblk_size, 0, NULL, NULL, NULL, NULL, NULL, 0); - - mutex_init(&mirror_timeout.dr_mx, NULL, MUTEX_DEFAULT, NULL); - mutex_init(&hotspare_request.dr_mx, NULL, MUTEX_DEFAULT, NULL); - - mutex_init(&non_ff_drv_mutex, NULL, MUTEX_DEFAULT, NULL); -} - -/* module specific uninitilization (undo init_init()) */ -static void -fini_uninit() -{ - kmem_cache_destroy(mirror_parent_cache); - kmem_cache_destroy(mirror_child_cache); - kmem_cache_destroy(mirror_wowblk_cache); - mirror_parent_cache = mirror_child_cache = - mirror_wowblk_cache = NULL; - - mutex_destroy(&mirror_timeout.dr_mx); - mutex_destroy(&hotspare_request.dr_mx); - mutex_destroy(&non_ff_drv_mutex); -} - -/* define the module linkage */ -MD_PLUGIN_MISC_MODULE("mirrors module", init_init(), fini_uninit()) diff --git a/usr/src/uts/common/io/lvm/mirror/mirror_ioctl.c b/usr/src/uts/common/io/lvm/mirror/mirror_ioctl.c deleted file mode 100644 index 5b2592fd0d9f..000000000000 --- a/usr/src/uts/common/io/lvm/mirror/mirror_ioctl.c +++ /dev/null @@ -1,3932 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - * Copyright 2012 Milan Jurik. All rights reserved. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include - -#include -#include -#include - -extern int md_status; -extern kmutex_t md_mx; -extern kcondvar_t md_cv; - -extern unit_t md_nunits; -extern set_t md_nsets; -extern md_set_t md_set[]; - -extern md_ops_t mirror_md_ops; -extern int md_ioctl_cnt; -extern md_krwlock_t md_unit_array_rw; -extern major_t md_major; -extern mdq_anchor_t md_ff_daemonq; -extern void md_probe_one(probe_req_t *); -extern void mirror_openfail_console_info(mm_unit_t *, int, int); - -#ifdef DEBUG -extern int mirror_debug_flag; -#endif - -static void -mirror_resume_writes(mm_unit_t *un) -{ - /* - * Release the block on writes to the mirror and resume any blocked - * resync thread. - * This is only required for MN sets - */ - if (MD_MNSET_SETNO(MD_UN2SET(un))) { -#ifdef DEBUG - if (mirror_debug_flag) - printf("mirror_resume_writes: mnum %x\n", MD_SID(un)); -#endif - mutex_enter(&un->un_suspend_wr_mx); - un->un_suspend_wr_flag = 0; - cv_broadcast(&un->un_suspend_wr_cv); - mutex_exit(&un->un_suspend_wr_mx); - mutex_enter(&un->un_rs_thread_mx); - un->un_rs_thread_flags &= ~MD_RI_BLOCK; - cv_signal(&un->un_rs_thread_cv); - mutex_exit(&un->un_rs_thread_mx); - } -} - -mm_unit_t * -mirror_getun(minor_t mnum, md_error_t *mde, int flags, IOLOCK *lock) -{ - mm_unit_t *un; - mdi_unit_t *ui; - set_t setno = MD_MIN2SET(mnum); - - if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits)) { - (void) mdmderror(mde, MDE_INVAL_UNIT, mnum); - return (NULL); - } - - if (!(flags & STALE_OK)) { - if (md_get_setstatus(setno) & MD_SET_STALE) { - (void) mdmddberror(mde, MDE_DB_STALE, mnum, setno); - return (NULL); - } - } - - ui = MDI_UNIT(mnum); - if (flags & NO_OLD) { - if (ui != NULL) { - (void) mdmderror(mde, MDE_UNIT_ALREADY_SETUP, mnum); - return (NULL); - } - return ((mm_unit_t *)1); - } - - if (ui == NULL) { - (void) mdmderror(mde, MDE_UNIT_NOT_SETUP, mnum); - return (NULL); - } - - if (flags & ARRAY_WRITER) - md_array_writer(lock); - else if (flags & ARRAY_READER) - md_array_reader(lock); - - if (!(flags & NO_LOCK)) { - if (flags & WR_LOCK) - (void) md_ioctl_writerlock(lock, ui); - else /* RD_LOCK */ - (void) md_ioctl_readerlock(lock, ui); - } - un = (mm_unit_t *)MD_UNIT(mnum); - - if (un->c.un_type != MD_METAMIRROR) { - (void) mdmderror(mde, MDE_NOT_MM, mnum); - return (NULL); - } - - return (un); -} - -static int -mirror_set( - void *d, - int mode -) -{ - minor_t mnum; - mm_unit_t *un; - mddb_recid_t recid; - mddb_type_t typ1; - int err; - int i; - set_t setno; - md_set_params_t *msp = d; - - - mnum = msp->mnum; - - mdclrerror(&msp->mde); - - if (mirror_getun(mnum, &msp->mde, NO_OLD, NULL) == NULL) - return (0); - - setno = MD_MIN2SET(mnum); - - typ1 = (mddb_type_t)md_getshared_key(setno, - mirror_md_ops.md_driver.md_drivername); - - /* - * Create the db record for this mdstruct - * We don't store incore elements ondisk - */ - - if (msp->options & MD_CRO_64BIT) { -#if defined(_ILP32) - return (mdmderror(&msp->mde, MDE_UNIT_TOO_LARGE, mnum)); -#else - recid = mddb_createrec((size_t)msp->size, typ1, MIRROR_REC, - MD_CRO_64BIT | MD_CRO_MIRROR | MD_CRO_FN, setno); -#endif - } else { - /* - * It's important to use the correct size here - */ - msp->size = sizeof (mm_unit32_od_t); - recid = mddb_createrec((size_t)msp->size, typ1, MIRROR_REC, - MD_CRO_32BIT | MD_CRO_MIRROR | MD_CRO_FN, setno); - } - if (recid < 0) - return (mddbstatus2error(&msp->mde, (int)recid, - mnum, setno)); - - /* Resize to include incore fields */ - un = (mm_unit_t *)mddb_getrecaddr_resize(recid, sizeof (*un), 0); - /* - * It is okay that we muck with the mdstruct here, - * since no one else will know about the mdstruct - * until we commit it. If we crash, the record will - * be automatically purged, since we haven't - * committed it yet. - */ - - /* copy in the user's mdstruct */ - if (err = ddi_copyin((caddr_t)(uintptr_t)msp->mdp, un, - (uint_t)msp->size, mode)) { - mddb_deleterec_wrapper(recid); - return (EFAULT); - } - /* All 64 bit metadevices only support EFI labels. */ - if (msp->options & MD_CRO_64BIT) { - un->c.un_flag |= MD_EFILABEL; - } - - un->c.un_revision |= MD_FN_META_DEV; - MD_RECID(un) = recid; - MD_CAPAB(un) = MD_CAN_PARENT | MD_CAN_META_CHILD | MD_CAN_SP; - MD_PARENT(un) = MD_NO_PARENT; - - for (i = 0; i < NMIRROR; i++) { - struct mm_submirror *sm; - - sm = &un->un_sm[i]; - if (!SMS_IS(sm, SMS_INUSE)) - continue; - - /* ensure that the submirror is a metadevice */ - if (md_getmajor(sm->sm_dev) != md_major) - return (mdmderror(&msp->mde, MDE_INVAL_UNIT, - md_getminor(sm->sm_dev))); - - if (md_get_parent(sm->sm_dev) == MD_NO_PARENT) - continue; - - /* mirror creation should fail here */ - md_nblocks_set(mnum, -1ULL); - MD_UNIT(mnum) = NULL; - - mddb_deleterec_wrapper(recid); - return (mdmderror(&msp->mde, MDE_IN_USE, - md_getminor(sm->sm_dev))); - } - - if (err = mirror_build_incore(un, 0)) { - md_nblocks_set(mnum, -1ULL); - MD_UNIT(mnum) = NULL; - - mddb_deleterec_wrapper(recid); - return (err); - } - - /* - * Update unit availability - */ - md_set[setno].s_un_avail--; - - mirror_commit(un, ALL_SUBMIRRORS, 0); - md_create_unit_incore(MD_SID(un), &mirror_md_ops, 0); - mirror_check_failfast(mnum); - SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_CREATE, SVM_TAG_METADEVICE, setno, - MD_SID(un)); - - resync_start_timeout(setno); - return (0); -} - -static int -mirror_get( - void *migp, - int mode, - IOLOCK *lock -) -{ - mm_unit_t *un; - md_i_get_t *migph = migp; - - mdclrerror(&migph->mde); - - if ((un = mirror_getun(migph->id, &migph->mde, RD_LOCK, lock)) == NULL) - return (0); - - if (migph->size == 0) { - migph->size = un->c.un_size; - return (0); - } - - if (migph->size < un->c.un_size) { - return (EFAULT); - } - if (ddi_copyout(un, (caddr_t)(uintptr_t)migph->mdp, - un->c.un_size, mode)) - return (EFAULT); - return (0); -} - -static int -mirror_getdevs( - void *mgdp, - int mode, - IOLOCK *lock -) -{ - mm_unit_t *un; - md_dev64_t *udevs; - int cnt; - int i; - md_dev64_t unit_dev; - md_getdevs_params_t *mgdph = mgdp; - - - mdclrerror(&mgdph->mde); - - if ((un = mirror_getun(mgdph->mnum, - &mgdph->mde, RD_LOCK, lock)) == NULL) - return (0); - - udevs = (md_dev64_t *)(uintptr_t)mgdph->devs; - - for (cnt = 0, i = 0; i < NMIRROR; i++) { - if (!SMS_BY_INDEX_IS(un, i, SMS_INUSE)) - continue; - if (cnt < mgdph->cnt) { - unit_dev = un->un_sm[i].sm_dev; - if (md_getmajor(unit_dev) != md_major) { - unit_dev = md_xlate_mini_2_targ(unit_dev); - if (unit_dev == NODEV64) - return (ENODEV); - } - - if (ddi_copyout((caddr_t)&unit_dev, (caddr_t)udevs, - sizeof (*udevs), mode) != 0) - return (EFAULT); - ++udevs; - } - ++cnt; - } - - mgdph->cnt = cnt; - return (0); -} - -static int -mirror_reset( - md_i_reset_t *mirp -) -{ - minor_t mnum = mirp->mnum; - mm_unit_t *un; - mdi_unit_t *ui; - set_t setno = MD_MIN2SET(mnum); - - mdclrerror(&mirp->mde); - - if ((un = mirror_getun(mnum, &mirp->mde, NO_LOCK, NULL)) == NULL) - return (0); - - if (MD_HAS_PARENT(un->c.un_parent)) { - return (mdmderror(&mirp->mde, MDE_IN_USE, mnum)); - } - - rw_enter(&md_unit_array_rw.lock, RW_WRITER); - - /* single thread */ - ui = MDI_UNIT(mnum); - (void) md_unit_openclose_enter(ui); - - if (md_unit_isopen(ui)) { - md_unit_openclose_exit(ui); - rw_exit(&md_unit_array_rw.lock); - return (mdmderror(&mirp->mde, MDE_IS_OPEN, mnum)); - } - - md_unit_openclose_exit(ui); - - if (!mirp->force) { - int smi; - for (smi = 0; smi < NMIRROR; smi++) { - if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE)) - continue; - - if (!SMS_BY_INDEX_IS(un, smi, SMS_RUNNING)) { - rw_exit(&md_unit_array_rw.lock); - return (mdmderror(&mirp->mde, - MDE_C_WITH_INVAL_SM, mnum)); - } - } - } - - reset_mirror(un, mnum, 1); - - /* - * Update unit availability - */ - md_set[setno].s_un_avail++; - - /* - * If MN set, reset s_un_next so all nodes can have - * the same view of the next available slot when - * nodes are -w and -j - */ - if (MD_MNSET_SETNO(setno)) { - (void) md_upd_set_unnext(setno, MD_MIN2UNIT(mnum)); - } - - rw_exit(&md_unit_array_rw.lock); - return (0); -} - -static int -mirror_get_geom( - mm_unit_t *un, - struct dk_geom *geomp -) -{ - md_get_geom((md_unit_t *)un, geomp); - - return (0); -} - -static int -mirror_get_vtoc( - mm_unit_t *un, - struct vtoc *vtocp -) -{ - md_get_vtoc((md_unit_t *)un, vtocp); - - return (0); -} - -static int -mirror_set_vtoc( - mm_unit_t *un, - struct vtoc *vtocp -) -{ - return (md_set_vtoc((md_unit_t *)un, vtocp)); -} - -static int -mirror_get_extvtoc( - mm_unit_t *un, - struct extvtoc *vtocp -) -{ - md_get_extvtoc((md_unit_t *)un, vtocp); - - return (0); -} - -static int -mirror_set_extvtoc( - mm_unit_t *un, - struct extvtoc *vtocp -) -{ - return (md_set_extvtoc((md_unit_t *)un, vtocp)); -} - -static int -mirror_get_cgapart( - mm_unit_t *un, - struct dk_map *dkmapp -) -{ - md_get_cgapart((md_unit_t *)un, dkmapp); - return (0); -} - -static int -mirror_getcomp_by_dev(mm_unit_t *un, replace_params_t *params, - int *smi, int *cip) -{ - mm_submirror_t *sm; - mm_submirror_ic_t *smic; - ms_comp_t *comp; - ms_unit_t *mous; - int ci; - int i; - int compcnt; - ms_cd_info_t cd; - void (*get_dev)(); - md_dev64_t dev = md_expldev(params->old_dev); - md_error_t *ep = ¶ms->mde; - minor_t mnum = params->mnum; - mdkey_t devkey; - int nkeys; - set_t setno; - side_t side; - - setno = MD_MIN2SET(MD_SID(un)); - side = mddb_getsidenum(setno); - - if (md_getkeyfromdev(setno, side, dev, &devkey, &nkeys) != 0) - return (mddeverror(ep, MDE_NAME_SPACE, dev)); - - for (i = 0; i < NMIRROR; i++) { - sm = &un->un_sm[i]; - smic = &un->un_smic[i]; - - if (!SMS_IS(sm, SMS_INUSE)) - continue; - - get_dev = - (void (*)())md_get_named_service(sm->sm_dev, 0, - "get device", 0); - compcnt = (*(smic->sm_get_component_count))(sm->sm_dev, un); - - /* - * For each of the underlying stripe components get - * the info. - */ - for (ci = 0; ci < compcnt; ci++) { - (void) (*get_dev)(sm->sm_dev, sm, ci, &cd); - if ((cd.cd_dev == dev) || (cd.cd_orig_dev == dev)) { - *cip = ci; - *smi = i; - return (1); - } - } - - /* - * now we rescan looking only for NODEV. If we find - * NODEV then we will check the keys to see if its a match. - * - * If no key was found to match dev, then there is - * no way to compare keys - so continue. - */ - if (nkeys == 0) { - continue; - } - mous = MD_UNIT(md_getminor(sm->sm_dev)); - - for (ci = 0; ci < compcnt; ci++) { - - comp = (struct ms_comp *) - ((void *)&((char *)mous)[mous->un_ocomp]); - - (void) (*get_dev)(sm->sm_dev, sm, ci, &cd); - - if (cd.cd_dev == NODEV64 || cd.cd_orig_dev == NODEV64) { - comp += ci; - if (comp->un_key == devkey) { - if (nkeys > 1) { - return (mddeverror( - ep, MDE_MULTNM, dev)); - } - *cip = ci; - *smi = i; - return (1); - } - } - } - } - return (mdcomperror(ep, MDE_CANT_FIND_COMP, mnum, dev)); -} - -/* - * comp_replace: - * ---------------- - * Called to implement the component replace function - * - * Owner is returned in the parameter block passed in by the caller. - * - * Returns: - * 0 success - * error code if the functions fails - * - * For a MN set, on entry all writes to the mirror are suspended, on exit - * from this function, writes must be resumed when not a dryrun. - */ -static int -comp_replace( - replace_params_t *params, - IOLOCK *lock -) -{ - minor_t mnum = params->mnum; - set_t setno; - side_t side; - mm_unit_t *un; - mdi_unit_t *ui; - ms_unit_t *ms_un; - mdi_unit_t *ms_ui; - ms_comp_t *comp; - mm_submirror_t *sm; - md_dev64_t smdev; - mddb_recid_t recids[6]; /* recids for stripe on SP */ - int smi, ci; - ms_new_dev_t nd; - int (*repl_dev)(); - void (*repl_done)(); - void *repl_data; - int err = 0; - ms_cd_info_t cd; - void (*get_dev)(); - - mdclrerror(¶ms->mde); - - if ((un = mirror_getun(mnum, ¶ms->mde, WRITERS, lock)) == NULL) { - return (0); - } - - ui = MDI_UNIT(mnum); - if (ui->ui_tstate & MD_INACCESSIBLE) { - (void) mdmderror(¶ms->mde, MDE_IN_UNAVAIL_STATE, mnum); - goto errexit; - } - - /* - * replace cannot be done while a resync is active or we are - * still waiting for an optimized resync to be started - */ - if (MD_STATUS(un) & (MD_UN_RESYNC_ACTIVE | MD_UN_OPT_NOT_DONE)) { - (void) mdmderror(¶ms->mde, MDE_RESYNC_ACTIVE, mnum); - goto errexit; - } - - if (mirror_getcomp_by_dev(un, params, &smi, &ci) == 0) { - goto errexit; - } - - if (un->un_nsm == 1) { - (void) mdmderror(¶ms->mde, MDE_LAST_SM_RE, mnum); - goto errexit; - } - - if (mirror_other_sources(un, smi, ci, 0) != 0) { - (void) mdcomperror(¶ms->mde, MDE_REPL_INVAL_STATE, - mnum, md_expldev(params->old_dev)); - goto errexit; - } - - sm = &un->un_sm[smi]; - if (sm->sm_state & (SMS_OFFLINE | SMS_OFFLINE_RESYNC)) { - (void) mdmderror(¶ms->mde, MDE_ILLEGAL_SM_STATE, mnum); - goto errexit; - } - - get_dev = (void (*)())md_get_named_service(sm->sm_dev, 0, - "get device", 0); - (void) (*get_dev)(sm->sm_dev, sm, ci, &cd); - - repl_dev = (int (*)())md_get_named_service(sm->sm_dev, 0, - "replace device", 0); - - smdev = sm->sm_dev; - ms_un = MD_UNIT(md_getminor(smdev)); - - if (params->cmd == ENABLE_COMP) { - md_dev64_t this_dev; - int numkeys; - mdkey_t this_key; - - this_dev = ((cd.cd_orig_dev == 0) ? cd.cd_dev : - cd.cd_orig_dev); - setno = MD_MIN2SET(md_getminor(smdev)); - side = mddb_getsidenum(setno); - comp = (struct ms_comp *) - ((void *)&((char *)ms_un)[ms_un->un_ocomp]); - comp += ci; - /* - * We trust the dev_t because we cannot determine the - * dev_t from the device id since a new disk is in the - * same location. Since this is a call from metareplace -e dx - * AND it is SCSI a new dev_t is not generated. So the - * dev_t from the mddb is used. Before enabling the device - * we check to make sure that multiple entries for the same - * device does not exist in the namespace. If they do we - * fail the ioctl. - * One of the many ways multiple entries in the name space - * can occur is if one removed the failed component in the - * stripe of a mirror and put another disk that was part of - * another metadevice. After reboot metadevadm would correctly - * update the device name for the metadevice whose component - * has moved. However now in the metadb there are two entries - * for the same name (ctds) that belong to different - * metadevices. One is valid, the other is a ghost or "last - * know as" ctds. - */ - this_dev = md_getdevnum(setno, side, - comp->un_key, MD_TRUST_DEVT); - - /* - * Verify that multiple keys for the same - * dev_t don't exist - */ - - if (md_getkeyfromdev(setno, side, this_dev, - &this_key, &numkeys) != 0) { - (void) mddeverror(¶ms->mde, MDE_NAME_SPACE, - md_expldev(params->old_dev)); - goto errexit; - } - /* - * Namespace has multiple entries - * for the same devt - */ - if (numkeys > 1) { - (void) mddeverror(¶ms->mde, MDE_MULTNM, - md_expldev(params->old_dev)); - goto errexit; - } - if ((numkeys == 0) || (comp->un_key != this_key)) { - (void) mdcomperror(¶ms->mde, MDE_CANT_FIND_COMP, - mnum, this_dev); - goto errexit; - } - - if ((md_getmajor(this_dev) != md_major) && - (md_devid_found(setno, side, this_key) == 1)) { - if (md_update_namespace_did(setno, side, - this_key, ¶ms->mde) != 0) { - (void) mddeverror(¶ms->mde, MDE_NAME_SPACE, - this_dev); - goto errexit; - } - } - - if (md_expldev(params->new_dev) != this_dev) { - (void) mddeverror(¶ms->mde, MDE_FIX_INVAL_STATE, - md_expldev(params->new_dev)); - goto errexit; - } - - /* in case of dryrun, don't actually do anything */ - if ((params->options & MDIOCTL_DRYRUN) == 0) { - err = (*repl_dev)(sm->sm_dev, 0, ci, NULL, recids, 6, - &repl_done, &repl_data); - } - } else if ((params->options & MDIOCTL_DRYRUN) == 0) { - nd.nd_dev = md_expldev(params->new_dev); - nd.nd_key = params->new_key; - nd.nd_start_blk = params->start_blk; - nd.nd_nblks = params->number_blks; - nd.nd_labeled = params->has_label; - nd.nd_hs_id = 0; - - err = (*repl_dev)(sm->sm_dev, 0, ci, &nd, recids, 6, - &repl_done, &repl_data); - - } - - if (err != 0) { - (void) mdcomperror(¶ms->mde, err, mnum, - md_expldev(params->new_dev)); - goto errexit; - } - /* In case of a dryun we're done. */ - if (params->options & MDIOCTL_DRYRUN) { - mdclrerror(¶ms->mde); - return (0); - } - - /* set_sm_comp_state() commits the modified records */ - set_sm_comp_state(un, smi, ci, CS_RESYNC, recids, MD_STATE_NO_XMIT, - lock); - - (*repl_done)(sm->sm_dev, repl_data); - - /* - * If the mirror is open then need to make sure that the submirror, - * on which the replace ran, is also open and if not then open it. - * This is only a concern for a single component sub-mirror stripe - * as it may not be open due to the failure of the single component. - * - * This check has to be done after the call to (*repl_done) - * as that function releases the writer lock on the submirror. - */ - if (md_unit_isopen(ui)) { - minor_t ms_mnum = md_getminor(sm->sm_dev); - - ms_ui = MDI_UNIT(ms_mnum); - - if (!md_unit_isopen(ms_ui)) { - /* - * Underlying submirror is not open so open it. - */ - if (md_layered_open(ms_mnum, &smdev, MD_OFLG_NULL)) { - mirror_openfail_console_info(un, smi, ci); - goto errexit; - } - } - } - - mirror_check_failfast(mnum); - - if (params->cmd == ENABLE_COMP) { - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ENABLE, SVM_TAG_METADEVICE, - MD_UN2SET(un), MD_SID(un)); - } else { - SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REPLACE, SVM_TAG_METADEVICE, - MD_UN2SET(un), MD_SID(un)); - } - - md_ioctl_writerexit(lock); - /* - * Reset any saved resync location flags as we've now replaced the - * component. This means we have to resync the _whole_ component. - */ - un->un_rs_resync_done = un->un_rs_resync_2_do = 0; - un->un_rs_type = MD_RS_NONE; - mirror_resume_writes(un); - if (!MD_MNSET_SETNO(MD_UN2SET(un))) - (void) mirror_resync_unit(mnum, NULL, ¶ms->mde, lock); - mdclrerror(¶ms->mde); - return (0); -errexit: - /* We need to resume writes unless this is a dryrun */ - if (!(params->options & MDIOCTL_DRYRUN)) - mirror_resume_writes(un); - return (0); -} - -/* - * mirror_attach: - * ---------------- - * Called to implement the submirror attach function - * - * Owner is returned in the parameter block passed in by the caller. - * - * Returns: - * 0 success - * error code if the functions fails - * - * For a MN set, on entry all writes to the mirror are suspended, on exit - * from this function, writes must be resumed when not a dryrun. - */ -static int -mirror_attach( - md_att_struct_t *att, - IOLOCK *lock -) -{ - minor_t mnum = att->mnum; - mm_unit_t *un; - md_unit_t *su; - mm_submirror_t *sm; - mm_submirror_ic_t *smic; - int smi; - md_dev64_t sm_dev; - minor_t sm_mnum; - mdkey_t indx; - set_t setno; - uint_t options; - - /* - * This routine should not be called during upgrade. - */ - if (MD_UPGRADE) { - return (0); - } - - mdclrerror(&att->mde); - options = att->options; - - if ((un = mirror_getun(mnum, &att->mde, WRITERS, lock)) == NULL) { - return (0); - } - - setno = MD_UN2SET(un); - - for (smi = 0; smi < NMIRROR; smi++) - if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE)) - break; - - if (smi == NMIRROR) { - (void) mdmderror(&att->mde, MDE_MIRROR_FULL, mnum); - goto errexit; - } - - sm = &un->un_sm[smi]; - smic = &un->un_smic[smi]; - sm_dev = att->submirror; - sm_mnum = md_getminor(sm_dev); - - if (md_get_parent(sm_dev) != MD_NO_PARENT) { - (void) mdmderror(&att->mde, MDE_IN_USE, sm_mnum); - goto errexit; - } - - if (md_unit_isopen(MDI_UNIT(sm_mnum))) { - (void) mdmderror(&att->mde, MDE_IS_OPEN, sm_mnum); - goto errexit; - } - - /* Check the size */ - su = (md_unit_t *)MD_UNIT(sm_mnum); - if (un->c.un_total_blocks > su->c.un_total_blocks) { - (void) mdmderror(&att->mde, MDE_SM_TOO_SMALL, sm_mnum); - goto errexit; - } - - /* Don't attach labeled sm to unlabeled mirrors */ - if ((su->c.un_flag & MD_LABELED) && !(un->c.un_flag & MD_LABELED)) { - (void) mdmderror(&att->mde, MDE_NO_LABELED_SM, sm_mnum); - goto errexit; - } - - indx = md_setshared_name(setno, - ddi_major_to_name(md_getmajor(sm_dev)), 0L); - - /* Open the sm, only if the mirror is open */ - if (md_unit_isopen(MDI_UNIT(mnum))) { - if (md_layered_open(mnum, &sm_dev, MD_OFLG_NULL)) { - (void) md_remshared_name(setno, indx); - (void) mdmderror(&att->mde, MDE_SM_OPEN_ERR, - md_getminor(att->submirror)); - goto errexit; - } - /* in dryrun mode, don't leave the device open */ - if (options & MDIOCTL_DRYRUN) { - md_layered_close(sm_dev, MD_OFLG_NULL); - } - } - - /* - * After this point the checks are done and action is taken. - * So, clean up and return in case of dryrun. - */ - - if (options & MDIOCTL_DRYRUN) { - md_ioctl_writerexit(lock); - mdclrerror(&att->mde); - return (0); - } - - sm->sm_key = att->key; - sm->sm_dev = sm_dev; - md_set_parent(sm_dev, MD_SID(un)); - mirror_set_sm_state(sm, smic, SMS_ATTACHED_RESYNC, 1); - build_submirror(un, smi, 0); - un->un_nsm++; - mirror_commit(un, SMI2BIT(smi), 0); - mirror_check_failfast(mnum); - SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_ATTACH, SVM_TAG_METADEVICE, - MD_UN2SET(un), MD_SID(un)); - - mirror_resume_writes(un); - md_ioctl_writerexit(lock); - if (!MD_MNSET_SETNO(setno)) - (void) mirror_resync_unit(mnum, NULL, &att->mde, lock); - mdclrerror(&att->mde); - return (0); -errexit: - /* We need to resume writes unless this is a dryrun */ - if (!(options & MDIOCTL_DRYRUN)) - mirror_resume_writes(un); - return (0); -} - - -void -reset_comp_states(mm_submirror_t *sm, mm_submirror_ic_t *smic) -{ - int compcnt; - int i; - md_m_shared_t *shared; - - compcnt = (*(smic->sm_get_component_count)) (sm->sm_dev, sm); - for (i = 0; i < compcnt; i++) { - shared = (md_m_shared_t *)(*(smic->sm_shared_by_indx)) - (sm->sm_dev, sm, i); - - shared->ms_state = CS_OKAY; - shared->ms_flags &= ~MDM_S_NOWRITE; - shared->ms_lasterrcnt = 0; - } -} - - -/* - * mirror_detach: - * ---------------- - * Called to implement the submirror detach function - * - * Owner is returned in the parameter block passed in by the caller. - * - * Returns: - * 0 success - * error code if the functions fails - * - * For a MN set, on entry all writes to the mirror are suspended, on exit - * from this function, writes must be resumed. - */ -static int -mirror_detach( - md_detach_params_t *det, - IOLOCK *lock -) -{ - minor_t mnum = det->mnum; - mm_unit_t *un; - mdi_unit_t *ui; - mm_submirror_t *sm; - mm_submirror_t *old_sm; - mm_submirror_t *new_sm; - mm_submirror_ic_t *smic; - int smi; - md_dev64_t sm_dev; - md_unit_t *su; - sv_dev_t sv; - mddb_recid_t recids[2]; - int nsv = 0; - int smi_remove; - mm_submirror_ic_t *old_smic; - mm_submirror_ic_t *new_smic; - - mdclrerror(&det->mde); - - if ((un = mirror_getun(mnum, &det->mde, WRITERS, lock)) == NULL) { - return (0); - } - - ui = MDI_UNIT(mnum); - if (ui->ui_tstate & MD_INACCESSIBLE) { - mirror_resume_writes(un); - return (mdmderror(&det->mde, MDE_IN_UNAVAIL_STATE, mnum)); - } - /* - * detach cannot be done while a resync is active or we are - * still waiting for an optimized resync to be started - */ - if (MD_STATUS(un) & (MD_UN_RESYNC_ACTIVE | MD_UN_OPT_NOT_DONE)) { - mirror_resume_writes(un); - return (mdmderror(&det->mde, MDE_RESYNC_ACTIVE, mnum)); - } - - for (smi = 0; smi < NMIRROR; smi++) { - if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE)) { - continue; - } - if (un->un_sm[smi].sm_dev == det->submirror) { - smi_remove = smi; - break; - } - } - - if (smi == NMIRROR) { - mirror_resume_writes(un); - return (mdmderror(&det->mde, MDE_CANT_FIND_SM, mnum)); - } - - if (un->un_nsm == 1) { - mirror_resume_writes(un); - return (mdmderror(&det->mde, MDE_LAST_SM, mnum)); - } - - if (mirror_other_sources(un, smi, WHOLE_SM, 0) != 0) { - mirror_resume_writes(un); - return (mdmderror(&det->mde, MDE_NO_READABLE_SM, mnum)); - } - - sm = &un->un_sm[smi]; - smic = &un->un_smic[smi]; - sm_dev = sm->sm_dev; - su = (md_unit_t *)MD_UNIT(md_getminor(sm_dev)); - - /* - * Need to pass in the extra record id, - * cause mirror_commit() will not commit - * a sm (from the smmask) if the slot is unused. - * Which it is, since we are detaching. - */ - recids[0] = ((md_unit_t *)MD_UNIT(md_getminor(sm_dev)))->c.un_record_id; - recids[1] = 0; - - mirror_set_sm_state(sm, smic, SMS_UNUSED, det->force_detach); - /* - * If there are any erred components - * then make the detach fail and do not unparent the - * submirror. - */ - if (sm->sm_state == SMS_UNUSED) { - /* reallow soft partitioning of submirror */ - MD_CAPAB(su) |= MD_CAN_SP; - md_reset_parent(sm_dev); - reset_comp_states(sm, smic); - un->un_nsm--; - /* Close the sm, only if the mirror is open */ - if (md_unit_isopen(MDI_UNIT(mnum))) - md_layered_close(sm_dev, MD_OFLG_NULL); - sv.setno = MD_UN2SET(un); - sv.key = sm->sm_key; - nsv = 1; - } else - (void) mdmderror(&det->mde, MDE_SM_FAILED_COMPS, mnum); - - /* - * Perhaps the mirror changed it's size due to this detach. - * (void) mirror_grow_unit(un, &mde); - */ - - /* - * NOTE: We are passing the detached sm recid - * and not the smmask field. This is correct. - */ - mirror_commit(un, 0, recids); - md_rem_names(&sv, nsv); - if (sm->sm_state == SMS_UNUSED) { - SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DETACH, SVM_TAG_METADEVICE, - MD_UN2SET(un), MD_SID(un)); - } - - /* - * Reshuffle the submirror devices in the array as we potentially - * have a dead record in the middle of it. - */ - for (smi = 0; nsv && (smi < NMIRROR); smi++) { - if (smi < smi_remove) { - continue; - } - if (smi > smi_remove) { - old_sm = &un->un_sm[smi]; - new_sm = &un->un_sm[smi - 1]; - new_sm->sm_key = old_sm->sm_key; - new_sm->sm_dev = old_sm->sm_dev; - new_sm->sm_state = old_sm->sm_state; - new_sm->sm_flags = old_sm->sm_flags; - new_sm->sm_shared = old_sm->sm_shared; - new_sm->sm_hsp_id = old_sm->sm_hsp_id; - new_sm->sm_timestamp = old_sm->sm_timestamp; - bzero(old_sm, sizeof (mm_submirror_t)); - old_smic = &un->un_smic[smi]; - new_smic = &un->un_smic[smi - 1]; - bcopy(old_smic, new_smic, sizeof (mm_submirror_ic_t)); - bzero(old_smic, sizeof (mm_submirror_ic_t)); - } - } - mirror_commit(un, 0, NULL); - mirror_resume_writes(un); - return (0); -} - -/* - * mirror_offline: - * ---------------- - * Called to implement the submirror offline function - * - * Owner is returned in the parameter block passed in by the caller. - * - * Returns: - * 0 success - * error code if the functions fails - * - * For a MN set, on entry all writes to the mirror are suspended, on exit - * from this function, writes must be resumed. - */ -static int -mirror_offline( - md_i_off_on_t *miop, - IOLOCK *lock -) -{ - minor_t mnum = miop->mnum; - mm_unit_t *un; - mm_submirror_t *sm; - mm_submirror_ic_t *smic; - int smi; - mdi_unit_t *ui = MDI_UNIT(mnum); - - mdclrerror(&miop->mde); - - if ((un = mirror_getun(mnum, &miop->mde, WR_LOCK, lock)) == NULL) { - return (0); - } - - /* - * offline cannot be done while a resync is active or we are - * still waiting for an optimized resync to be started - */ - if (MD_STATUS(un) & (MD_UN_RESYNC_ACTIVE | MD_UN_OPT_NOT_DONE)) { - mirror_resume_writes(un); - return (mdmderror(&miop->mde, MDE_RESYNC_ACTIVE, mnum)); - } - - /* - * Reject mirror_offline if ABR is set - */ - if ((ui->ui_tstate & MD_ABR_CAP) || un->un_abr_count) { - mirror_resume_writes(un); - return (mderror(&miop->mde, MDE_ABR_SET)); - } - - for (smi = 0; smi < NMIRROR; smi++) { - if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE)) - continue; - if (un->un_sm[smi].sm_dev == miop->submirror) - break; - } - - if (smi == NMIRROR) { - mirror_resume_writes(un); - return (mdmderror(&miop->mde, MDE_CANT_FIND_SM, mnum)); - } - - sm = &un->un_sm[smi]; - smic = &un->un_smic[smi]; - if (!SMS_IS(sm, SMS_RUNNING) && !miop->force_offline) { - mirror_resume_writes(un); - return (mdmderror(&miop->mde, MDE_ILLEGAL_SM_STATE, mnum)); - } - - if (mirror_other_sources(un, smi, WHOLE_SM, 0) != 0) { - mirror_resume_writes(un); - return (mdmderror(&miop->mde, MDE_NO_READABLE_SM, mnum)); - } - mirror_set_sm_state(sm, smic, SMS_OFFLINE, 1); - mirror_resume_writes(un); - - MD_STATUS(un) |= MD_UN_OFFLINE_SM; - mirror_commit(un, NO_SUBMIRRORS, 0); - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_OFFLINE, SVM_TAG_METADEVICE, - MD_UN2SET(un), MD_SID(un)); - return (0); -} - -/* - * mirror_online: - * ---------------- - * Called to implement the submirror online function - * - * Owner is returned in the parameter block passed in by the caller. - * - * Returns: - * 0 success - * error code if the functions fails - * - * For a MN set, on entry all writes to the mirror are suspended, on exit - * from this function, writes must be resumed. - */ -static int -mirror_online( - md_i_off_on_t *miop, - IOLOCK *lock -) -{ - minor_t mnum = miop->mnum; - mm_unit_t *un; - mm_submirror_t *sm; - mm_submirror_ic_t *smic; - int smi; - set_t setno = MD_MIN2SET(mnum); - - mdclrerror(&miop->mde); - - if ((un = mirror_getun(mnum, &miop->mde, WR_LOCK, lock)) == NULL) { - return (0); - } - - for (smi = 0; smi < NMIRROR; smi++) { - if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE)) - continue; - if (un->un_sm[smi].sm_dev == miop->submirror) - break; - } - if (smi == NMIRROR) { - mirror_resume_writes(un); - return (mdmderror(&miop->mde, MDE_CANT_FIND_SM, mnum)); - } - - sm = &un->un_sm[smi]; - smic = &un->un_smic[smi]; - if (!SMS_IS(sm, SMS_OFFLINE)) { - mirror_resume_writes(un); - return (mdmderror(&miop->mde, MDE_ILLEGAL_SM_STATE, mnum)); - } - - /* - * online cannot be done while a resync is active or we are - * still waiting for an optimized resync to be started - */ - if (MD_STATUS(un) & (MD_UN_RESYNC_ACTIVE | MD_UN_OPT_NOT_DONE)) { - mirror_resume_writes(un); - return (mdmderror(&miop->mde, MDE_RESYNC_ACTIVE, mnum)); - } - - mirror_set_sm_state(sm, smic, SMS_OFFLINE_RESYNC, 1); - mirror_commit(un, NO_SUBMIRRORS, 0); - mirror_check_failfast(mnum); - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ONLINE, SVM_TAG_METADEVICE, - MD_UN2SET(un), MD_SID(un)); - - - /* for MN sets, re-read the resync record from disk */ - if (MD_MNSET_SETNO(MD_UN2SET(un))) - (void) mddb_reread_rr(setno, un->un_rr_dirty_recid); - - bcopy((caddr_t)un->un_dirty_bm, (caddr_t)un->un_resync_bm, - howmany(un->un_rrd_num, NBBY)); - MD_STATUS(un) |= MD_UN_OPT_NOT_DONE; - sm->sm_flags |= MD_SM_RESYNC_TARGET; - mirror_resume_writes(un); - md_ioctl_writerexit(lock); - if (!MD_MNSET_SETNO(setno)) - return (mirror_resync_unit(mnum, NULL, &miop->mde, lock)); - else return (0); -} - -int -mirror_grow_unit( - mm_unit_t *un, - md_error_t *ep -) -{ - md_unit_t *su; - mm_submirror_t *sm; - int smi; - diskaddr_t total_blocks; - diskaddr_t current_tb; - int spc; /* sectors per head */ - minor_t mnum = MD_SID(un); - - /* - * grow_unit cannot be done while a resync is active or we are - * still waiting for an optimized resync to be started. Set - * flag to indicate GROW_PENDING and once the resync is complete - * the grow_unit function will be executed. - */ - if (MD_STATUS(un) & (MD_UN_RESYNC_ACTIVE | MD_UN_OPT_NOT_DONE)) { - MD_STATUS(un) |= MD_UN_GROW_PENDING; - mirror_commit(un, NO_SUBMIRRORS, 0); - return (mdmderror(ep, MDE_GROW_DELAYED, MD_SID(un))); - } - - /* - * Find the smallest submirror - */ - total_blocks = 0; - for (smi = 0; smi < NMIRROR; smi++) { - if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE)) - continue; - sm = &un->un_sm[smi]; - /* - * Growth is not possible if there is one or more - * submirrors made up of non-Metadevices. - */ - if (md_getmajor(sm->sm_dev) != md_major) - return (0); - - su = MD_UNIT(md_getminor(sm->sm_dev)); - if ((total_blocks == 0) || - (su->c.un_total_blocks < total_blocks)) - total_blocks = su->c.un_total_blocks; - } - - /* - * If the smallest submirror is not larger - * than the mirror, we are all done. - */ - if (total_blocks <= un->c.un_total_blocks) - return (0); - - /* - * Growing the mirror now. - * First: Round down the actual_tb to be a multiple - * of nheads * nsects. - */ - spc = un->c.un_nhead * un->c.un_nsect; - current_tb = (total_blocks/spc) * spc; - - un->c.un_total_blocks = current_tb; - md_nblocks_set(mnum, un->c.un_total_blocks); - un->c.un_actual_tb = total_blocks; - - /* Is the mirror growing from 32 bit device to 64 bit device? */ - if (((un->c.un_revision & MD_64BIT_META_DEV) == 0) && - (un->c.un_total_blocks > MD_MAX_BLKS_FOR_SMALL_DEVS)) { -#if defined(_ILP32) - return (mdmderror(ep, MDE_UNIT_TOO_LARGE, mnum)); -#else - mddb_type_t typ1; - mddb_recid_t recid; - set_t setno; - mddb_recid_t old_recid = un->c.un_record_id; - mddb_recid_t old_vtoc; - mddb_de_ic_t *dep, *old_dep; - md_create_rec_option_t options; - - /* yup, new device size. So we need to replace the record */ - typ1 = (mddb_type_t)md_getshared_key(MD_UN2SET(un), - mirror_md_ops.md_driver.md_drivername); - setno = MD_MIN2SET(mnum); - - /* Preserve the friendly name properties of growing unit */ - options = MD_CRO_64BIT | MD_CRO_MIRROR; - if (un->c.un_revision & MD_FN_META_DEV) - options |= MD_CRO_FN; - recid = mddb_createrec(offsetof(mm_unit_t, un_smic), typ1, - MIRROR_REC, options, setno); - /* Resize to include incore fields */ - un->c.un_revision |= MD_64BIT_META_DEV; - /* All 64 bit metadevices only support EFI labels. */ - un->c.un_flag |= MD_EFILABEL; - /* - * If the device had a vtoc record attached to it, we remove - * the vtoc record, because the layout has changed completely. - */ - old_vtoc = un->c.un_vtoc_id; - if (old_vtoc != 0) { - un->c.un_vtoc_id = - md_vtoc_to_efi_record(old_vtoc, setno); - } - MD_RECID(un) = recid; - dep = mddb_getrecdep(recid); - old_dep = mddb_getrecdep(old_recid); - kmem_free(dep->de_rb_userdata, dep->de_reqsize); - dep->de_rb_userdata = old_dep->de_rb_userdata; - dep->de_reqsize = old_dep->de_reqsize; - dep->de_rb_userdata_ic = old_dep->de_rb_userdata_ic; - dep->de_icreqsize = old_dep->de_icreqsize; - mirror_commit(un, NO_SUBMIRRORS, 0); - old_dep->de_rb_userdata = NULL; - old_dep->de_rb_userdata_ic = NULL; - mddb_deleterec_wrapper(old_recid); - /* - * If there was a vtoc record, it is no longer needed, because - * a new efi record has been created for this un. - */ - if (old_vtoc != 0) { - mddb_deleterec_wrapper(old_vtoc); - } -#endif - } - - if ((current_tb/un->un_rrd_blksize) > MD_MAX_NUM_RR) { - if (mirror_resize_resync_regions(un, current_tb)) { - return (mdmderror(ep, MDE_RR_ALLOC_ERROR, MD_SID(un))); - } - mirror_check_failfast(mnum); - SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_GROW, SVM_TAG_METADEVICE, - MD_UN2SET(un), MD_SID(un)); - return (0); - } - - if (mirror_add_resync_regions(un, current_tb)) { - return (mdmderror(ep, MDE_RR_ALLOC_ERROR, MD_SID(un))); - } - - mirror_check_failfast(mnum); - SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_GROW, SVM_TAG_METADEVICE, - MD_UN2SET(un), MD_SID(un)); - - return (0); -} - -static int -mirror_grow( - void *mgp, - IOLOCK *lock -) -{ - mm_unit_t *un; - md_grow_params_t *mgph = mgp; - - mdclrerror(&mgph->mde); - - if ((un = mirror_getun(mgph->mnum, - &mgph->mde, WR_LOCK, lock)) == NULL) - return (0); - - if (MD_STATUS(un) & MD_UN_GROW_PENDING) - return (0); - - return (mirror_grow_unit(un, &mgph->mde)); -} - -static int -mirror_change( - md_mirror_params_t *mmp, - IOLOCK *lock -) -{ - mm_params_t *pp = &mmp->params; - mm_unit_t *un; - - mdclrerror(&mmp->mde); - - if ((un = mirror_getun(mmp->mnum, &mmp->mde, WR_LOCK, lock)) == NULL) - return (0); - - if (pp->change_read_option) - un->un_read_option = pp->read_option; - - if (pp->change_write_option) - un->un_write_option = pp->write_option; - - if (pp->change_pass_num) - un->un_pass_num = pp->pass_num; - - mirror_commit(un, NO_SUBMIRRORS, 0); - - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_CHANGE, SVM_TAG_METADEVICE, - MD_UN2SET(un), MD_SID(un)); - return (0); -} - -static int -mirror_get_resync( - md_resync_ioctl_t *ri -) -{ - minor_t mnum = ri->ri_mnum; - mm_unit_t *un; - u_longlong_t percent; - uint_t cnt; - uint_t rr; - diskaddr_t d; - - mdclrerror(&ri->mde); - - if ((un = mirror_getun(mnum, &ri->mde, STALE_OK|NO_LOCK, NULL)) == NULL) - return (0); - - ri->ri_flags = 0; - if (md_get_setstatus(MD_MIN2SET(mnum)) & MD_SET_STALE) { - ri->ri_percent_done = 0; - ri->ri_percent_dirty = 0; - return (0); - } - - if (MD_STATUS(un) & (MD_UN_RESYNC_ACTIVE|MD_UN_RESYNC_CANCEL)) { - if (MD_STATUS(un) & MD_UN_RESYNC_ACTIVE) - ri->ri_flags |= MD_RI_INPROGRESS; - /* Return state of resync thread */ - ri->ri_flags |= (un->un_rs_thread_flags & MD_RI_BLOCK); - d = un->un_rs_resync_2_do; - if (d) { - percent = un->un_rs_resync_done; - if (un->c.un_total_blocks > - MD_MAX_BLKS_FOR_SMALL_DEVS) { - percent *= 1000; - percent /= d; - if (percent > 1000) - percent = 1000; - } else { - percent *= 100; - percent /= d; - } - ri->ri_percent_done = (int)percent; - } else { - ri->ri_percent_done = 0; - } - } - if (un->un_nsm < 2) { - ri->ri_percent_dirty = 0; - return (0); - } - cnt = 0; - for (rr = 0; rr < un->un_rrd_num; rr++) - if (IS_REGION_DIRTY(rr, un)) - cnt++; - d = un->un_rrd_num; - if (d) { - percent = cnt; - percent *= 100; - percent += d - 1; /* round up */ - percent /= d; - } else - percent = 0; - ri->ri_percent_dirty = (int)percent; - return (0); -} - -/* - * mirror_get_owner: - * ---------------- - * Called to obtain the current owner of a mirror. - * - * Owner is returned in the parameter block passed in by the caller. - * - * Returns: - * 0 success - * EINVAL metadevice does not exist or is not a member of a multi-owned - * set. - */ -static int -mirror_get_owner(md_set_mmown_params_t *p, IOLOCK *lock) -{ - mm_unit_t *un; - set_t setno; - - if ((un = mirror_getun(p->d.mnum, &p->mde, RD_LOCK, lock)) == NULL) - return (EINVAL); - - setno = MD_UN2SET(un); - if (!MD_MNSET_SETNO(setno)) { - return (EINVAL); - } - p->d.owner = un->un_mirror_owner; - return (0); -} - -/* - * mirror_choose_owner_thread: - * -------------------------- - * Called to send a CHOOSE_OWNER message to the commd running on the master - * node. This needs to run in a separate context so that mutex livelock is - * avoided. This can occur because the original request is issued from a call - * to metaioctl() which acquires the global ioctl lock, calls down into the - * mirror_ioctl code and then attempts to mdmn_ksend_message() to the master - * node. As the handler for the choose_owner message needs to send another - * ioctl through the metaioctl() entry point, any other use (by rpc.metad or - * mdcommd checking on set ownership) will deadlock the system leading to - * cluster reconfiguration timeouts and eventually a node or (at worst) a - * cluster-wide panic - */ -static void -mirror_choose_owner_thread(md_mn_msg_chooseid_t *msg) -{ - int rval; - md_mn_kresult_t *kres; - set_t setno = MD_MIN2SET(msg->msg_chooseid_mnum); - - kres = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP); - rval = mdmn_ksend_message(setno, MD_MN_MSG_CHOOSE_OWNER, - MD_MSGF_NO_BCAST | MD_MSGF_NO_LOG, 0, (char *)msg, - sizeof (md_mn_msg_chooseid_t), kres); - if (!MDMN_KSEND_MSG_OK(rval, kres)) { - mdmn_ksend_show_error(rval, kres, "CHOOSE OWNER"); - cmn_err(CE_WARN, "ksend_message failure: CHOOSE_OWNER"); - } - - kmem_free(kres, sizeof (md_mn_kresult_t)); - kmem_free(msg, sizeof (md_mn_msg_chooseid_t)); - thread_exit(); -} - -/* - * mirror_owner_thread: - * ------------------- - * Called to request an ownership change from a thread context. This issues - * a mdmn_ksend_message() and then completes the appropriate ownership change - * on successful completion of the message transport. - * The originating application must poll for completion on the 'flags' member - * of the MD_MN_MM_OWNER_STATUS ioctl() parameter block. - * Success is marked by a return value of MD_MN_MM_RES_OK, Failure by - * MD_MN_MM_RES_FAIL - */ -static void -mirror_owner_thread(md_mn_req_owner_t *ownp) -{ - int rval; - set_t setno = MD_MIN2SET(ownp->mnum); - mm_unit_t *un = MD_UNIT(ownp->mnum); - md_mn_kresult_t *kresult; - md_mps_t *ps1; - - un->un_mirror_owner_status = 0; - - mutex_enter(&un->un_owner_mx); - un->un_owner_state |= MM_MN_OWNER_SENT; - mutex_exit(&un->un_owner_mx); - - kresult = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP); - rval = mdmn_ksend_message(setno, MD_MN_MSG_REQUIRE_OWNER, - MD_MSGF_NO_LOG, 0, (char *)ownp, sizeof (md_mn_req_owner_t), - kresult); - - if (!MDMN_KSEND_MSG_OK(rval, kresult)) { - /* - * Message transport layer failed. Return the failure code to - * the application. - */ - mdmn_ksend_show_error(rval, kresult, "CHANGE OWNER"); - mutex_enter(&un->un_owner_mx); - un->un_owner_state &= ~(MM_MN_BECOME_OWNER|MM_MN_OWNER_SENT); - mutex_exit(&un->un_owner_mx); - un->un_mirror_owner_status = - MD_MN_MM_RESULT | MD_MN_MM_RES_FAIL; - } else { - /* - * Ownership change succeeded. Update in-core version of - * mirror owner. - */ - mutex_enter(&un->un_owner_mx); - if (un->un_owner_state & MM_MN_BECOME_OWNER) { - un->un_mirror_owner = md_mn_mynode_id; - /* Sets node owner of un_rr_dirty record */ - if (un->un_rr_dirty_recid) - (void) mddb_setowner(un->un_rr_dirty_recid, - md_mn_mynode_id); - /* - * Release the block on the current resync region if it - * is blocked - */ - ps1 = un->un_rs_prev_overlap; - if ((ps1 != NULL) && - (ps1->ps_flags & MD_MPS_ON_OVERLAP)) - mirror_overlap_tree_remove(ps1); - } - - un->un_owner_state &= ~(MM_MN_OWNER_SENT|MM_MN_BECOME_OWNER); - mutex_exit(&un->un_owner_mx); - un->un_mirror_owner_status = - MD_MN_MM_RESULT | MD_MN_MM_RES_OK; - - /* Restart the resync thread if it was previously blocked */ - if (un->un_rs_thread_flags & MD_RI_BLOCK_OWNER) { - mutex_enter(&un->un_rs_thread_mx); - un->un_rs_thread_flags &= ~MD_RI_BLOCK_OWNER; - cv_signal(&un->un_rs_thread_cv); - mutex_exit(&un->un_rs_thread_mx); - } - } - kmem_free(kresult, sizeof (md_mn_kresult_t)); - kmem_free(ownp, sizeof (md_mn_req_owner_t)); - thread_exit(); -} - -/* - * mirror_set_owner: - * ---------------- - * Called to change the owner of a mirror to the specified node. If we - * are not the owner of the mirror, we do nothing apart from update the in-core - * ownership. It can also be used to choose a new owner for the resync of a - * mirror, this case is specified by the flag MD_MN_MM_CHOOSE_OWNER, see below. - * - * The p->d.flags bitfield controls how subsequent ownership changes will be - * handled: - * MD_MN_MM_SPAWN_THREAD - * a separate thread is created which emulates the behaviour of - * become_owner() [mirror.c]. This is needed when changing the - * ownership from user context as there needs to be a controlling - * kernel thread which updates the owner info on the originating - * node. Successful completion of the mdmn_ksend_message() means - * that the owner field can be changed. - * - * MD_MN_MM_PREVENT_CHANGE - * Disallow any change of ownership once this ownership change has - * been processed. The only way of changing the owner away from - * the p->d.owner node specified in the call is to issue a request - * with MD_MN_MM_ALLOW_CHANGE set in the flags. Any request to - * become owner from a different node while the PREVENT_CHANGE - * is in operation will result in an EAGAIN return value. - * un->un_owner_state has MM_MN_PREVENT_CHANGE set. - * - * MD_MN_MM_ALLOW_CHANGE - * Allow the owner to be changed by a subsequent request. - * un->un_owner_state has MM_MN_PREVENT_CHANGE cleared. - * - * MD_MN_MM_CHOOSE_OWNER - * Choose a new owner for a mirror resync. In this case, the new - * owner argument is not used. The selection of a new owner - * is a round robin allocation using a resync owner count. This - * ioctl passes this value in a message to the master node - * which uses it to select a node from the node list and then - * sends it a message to become the owner. - * - * If we are the current owner, we must stop further i/o from being scheduled - * and wait for any pending i/o to drain. We wait for any in-progress resync - * bitmap updates to complete and we can then set the owner. If an update to - * the resync bitmap is attempted after this we simply don't write this out to - * disk until the ownership is restored. - * - * If we are the node that wants to become the owner we update the in-core - * owner and return. The i/o that initiated the ownership change will complete - * on successful return from this ioctl. - * - * Return Value: - * 0 Success - * EINVAL Invalid unit referenced - * EAGAIN Ownership couldn't be transferred away or change of - * ownership is prevented. Caller should retry later on. - */ -static int -mirror_set_owner(md_set_mmown_params_t *p, IOLOCK *lock) -{ - mdi_unit_t *ui; - mm_unit_t *un; - set_t setno; - - if ((un = mirror_getun(p->d.mnum, &p->mde, RD_LOCK, lock)) == NULL) - return (EINVAL); - ui = MDI_UNIT(p->d.mnum); - setno = MD_MIN2SET(p->d.mnum); - if (!MD_MNSET_SETNO(setno)) { - return (EINVAL); - } - - /* - * If we are choosing a new resync owner, send a message to the master - * to make the choice. - */ - if (p->d.flags & MD_MN_MM_CHOOSE_OWNER) { - /* Release ioctl lock before we call ksend_message() */ - md_ioctl_readerexit(lock); - /* If we're resetting the owner pass the node id in */ - if (p->d.owner != MD_MN_MIRROR_UNOWNED) { - return (mirror_choose_owner(un, &p->d)); - } else { - return (mirror_choose_owner(un, NULL)); - } - } - - /* - * Check for whether we have to spawn a thread to issue this request. - * If set we issue a mdmn_ksend_message() to cause the appropriate - * ownership change. On completion of this request the calling - * application _must_ poll the structure 'flags' field to determine the - * result of the request. All this is necessary until we have true - * multi-entrant ioctl support. - * If we are just clearing the owner, then MD_MN_MM_SPAWN_THREAD can - * be ignored. - */ - if ((p->d.flags & MD_MN_MM_SPAWN_THREAD) && (p->d.owner != 0)) { - md_mn_req_owner_t *ownp; - ownp = kmem_zalloc(sizeof (md_mn_req_owner_t), KM_SLEEP); - p->d.flags &= ~MD_MN_MM_SPAWN_THREAD; - bcopy(&p->d, ownp, sizeof (md_mn_req_owner_t)); - if (thread_create(NULL, 0, mirror_owner_thread, (caddr_t)ownp, - 0, &p0, TS_RUN, 60) == NULL) { - kmem_free(ownp, sizeof (md_mn_req_owner_t)); - return (EFAULT); - } else { - return (0); - } - } - - /* - * If setting owner to NULL, this is being done because the owner has - * died and therefore we set OPT_NOT_DONE to ensure that the - * mirror is marked as "Needs Maintenance" and that an optimized - * resync will be done when we resync the mirror, Also clear the - * PREVENT_CHANGE flag and remove the last resync region from the - * overlap tree. - */ - if (p->d.owner == 0) { - md_mps_t *ps; - int i; - - md_ioctl_readerexit(lock); - un = md_ioctl_writerlock(lock, ui); - /* - * If the ABR capability is not set and the pass_num is non-zero - * there is need to perform an optimized resync - * Therefore set OPT_NOT_DONE, setup the resync_bm and set - * the submirrors as resync targets. - */ - if (!(ui->ui_tstate & MD_ABR_CAP) && un->un_pass_num) { - MD_STATUS(un) |= MD_UN_OPT_NOT_DONE; - - (void) mddb_reread_rr(setno, un->un_rr_dirty_recid); - bcopy((caddr_t)un->un_dirty_bm, - (caddr_t)un->un_resync_bm, - howmany(un->un_rrd_num, NBBY)); - for (i = 0; i < NMIRROR; i++) { - if ((SUBMIRROR_IS_READABLE(un, i)) || - SMS_BY_INDEX_IS(un, i, - SMS_OFFLINE_RESYNC)) - un->un_sm[i].sm_flags |= - MD_SM_RESYNC_TARGET; - } - } - mutex_enter(&un->un_owner_mx); - un->un_owner_state &= ~MD_MN_MM_PREVENT_CHANGE; - mutex_exit(&un->un_owner_mx); - ps = un->un_rs_prev_overlap; - if ((ps != NULL) && (ps->ps_flags & MD_MPS_ON_OVERLAP)) { - mirror_overlap_tree_remove(ps); - ps->ps_firstblk = 0; - ps->ps_lastblk = 0; - } - md_ioctl_writerexit(lock); - un = md_ioctl_readerlock(lock, ui); - } - - mutex_enter(&un->un_owner_mx); - if (!(un->un_owner_state & MM_MN_BECOME_OWNER)) { - /* - * If we are not trying to become owner ourselves check - * to see if we have to change the owner - */ - if (un->un_mirror_owner == p->d.owner) { - /* - * No need to change owner, - * Clear/set PREVENT_CHANGE bit - */ - if (p->d.flags & MD_MN_MM_PREVENT_CHANGE) { - un->un_owner_state |= MM_MN_PREVENT_CHANGE; - } else if (p->d.flags & MD_MN_MM_ALLOW_CHANGE) { - un->un_owner_state &= ~MM_MN_PREVENT_CHANGE; - } - mutex_exit(&un->un_owner_mx); - return (0); - } - } - - /* - * Disallow ownership change if previously requested to. This can only - * be reset by issuing a request with MD_MN_MM_ALLOW_CHANGE set in the - * flags field. - */ - if ((un->un_owner_state & MM_MN_PREVENT_CHANGE) && - !(p->d.flags & MD_MN_MM_ALLOW_CHANGE)) { - mutex_exit(&un->un_owner_mx); -#ifdef DEBUG - cmn_err(CE_WARN, "mirror_ioctl: Node %x attempted to become " - "owner while node %x has exclusive access to %s", - p->d.owner, un->un_mirror_owner, md_shortname(MD_SID(un))); -#endif - return (EAGAIN); - } - if (p->d.owner == md_mn_mynode_id) { - /* - * I'm becoming the mirror owner. Flag this so that the - * message sender can change the in-core owner when all - * nodes have processed this message - */ - un->un_owner_state &= ~MM_MN_OWNER_SENT; - un->un_owner_state |= MM_MN_BECOME_OWNER; - un->un_owner_state |= (p->d.flags & MD_MN_MM_PREVENT_CHANGE) ? - MM_MN_PREVENT_CHANGE : 0; - un->un_owner_state &= (p->d.flags & MD_MN_MM_ALLOW_CHANGE) ? - ~MM_MN_PREVENT_CHANGE : ~0; - - mutex_exit(&un->un_owner_mx); - } else if ((un->un_mirror_owner == md_mn_mynode_id) || - un->un_owner_state & MM_MN_BECOME_OWNER) { - mutex_exit(&un->un_owner_mx); - - /* - * I'm releasing ownership. Block and drain i/o. This also - * blocks until any in-progress resync record update completes. - */ - md_ioctl_readerexit(lock); - un = md_ioctl_writerlock(lock, ui); - /* Block the resync thread */ - mutex_enter(&un->un_rs_thread_mx); - un->un_rs_thread_flags |= MD_RI_BLOCK_OWNER; - mutex_exit(&un->un_rs_thread_mx); - mutex_enter(&un->un_owner_mx); - un->un_mirror_owner = p->d.owner; - - /* Sets node owner of un_rr_dirty record */ - if (un->un_rr_dirty_recid) - (void) mddb_setowner(un->un_rr_dirty_recid, p->d.owner); - un->un_owner_state &= ~MM_MN_BECOME_OWNER; - un->un_owner_state |= (p->d.flags & MD_MN_MM_PREVENT_CHANGE) ? - MM_MN_PREVENT_CHANGE : 0; - un->un_owner_state &= (p->d.flags & MD_MN_MM_ALLOW_CHANGE) ? - ~MM_MN_PREVENT_CHANGE : ~0; - mutex_exit(&un->un_owner_mx); - /* - * Allow further i/o to occur. Any write() from another node - * will now cause another ownership change to occur. - */ - md_ioctl_writerexit(lock); - } else { - /* Update the in-core mirror owner */ - un->un_mirror_owner = p->d.owner; - /* Sets node owner of un_rr_dirty record */ - if (un->un_rr_dirty_recid) - (void) mddb_setowner(un->un_rr_dirty_recid, p->d.owner); - un->un_owner_state |= (p->d.flags & MD_MN_MM_PREVENT_CHANGE) ? - MM_MN_PREVENT_CHANGE : 0; - un->un_owner_state &= (p->d.flags & MD_MN_MM_ALLOW_CHANGE) ? - ~MM_MN_PREVENT_CHANGE : ~0; - mutex_exit(&un->un_owner_mx); - } - return (0); -} -/* - * mirror_allocate_hotspare: - * ------------------------ - * Called to allocate a hotspare for a failed component. This function is - * called by the MD_MN_ALLOCATE_HOTSPARE ioctl. - */ -static int -mirror_allocate_hotspare(md_alloc_hotsp_params_t *p, IOLOCK *lockp) -{ - set_t setno; - mm_unit_t *un; - -#ifdef DEBUG - if (mirror_debug_flag) - printf("mirror_allocate_hotspare: mnum,sm,comp = %x, %x, %x\n", - p->mnum, p->sm, p->comp); -#endif - - if ((un = mirror_getun(p->mnum, &p->mde, WR_LOCK, lockp)) == NULL) - return (EINVAL); - - /* This function is only valid for a multi-node set */ - setno = MD_MIN2SET(p->mnum); - if (!MD_MNSET_SETNO(setno)) { - return (EINVAL); - } - (void) check_comp_4_hotspares(un, p->sm, p->comp, MD_HOTSPARE_NO_XMIT, - p->hs_id, lockp); - md_ioctl_writerexit(lockp); - return (0); -} - -/* - * mirror_get_owner_status: - * ----------------------- - * Return the status of a previously issued ioctl to change ownership. This is - * required for soft-partition support as the request to change mirror owner - * needs to be run from a separate daemon thread. - * - * Returns: - * 0 Success (contents of un_mirror_owner_status placed in 'flags') - * EINVAL Invalid unit - */ -static int -mirror_get_owner_status(md_mn_own_status_t *p, IOLOCK *lock) -{ - mm_unit_t *un; - set_t setno; - - if ((un = mirror_getun(p->mnum, &p->mde, RD_LOCK, lock)) == NULL) - return (EINVAL); - - setno = MD_MIN2SET(p->mnum); - if (!MD_MNSET_SETNO(setno)) { - return (EINVAL); - } - - p->flags = un->un_mirror_owner_status; - return (0); -} - -/* - * mirror_set_state: - * --------------- - * Called to set the state of the component of a submirror to the specified - * value. This function is called by the MD_MN_SET_STATE ioctl. - */ -static int -mirror_set_state(md_set_state_params_t *p, IOLOCK *lockp) -{ - mm_unit_t *un; - mm_submirror_t *sm; - mm_submirror_ic_t *smic; - md_m_shared_t *shared; - set_t setno; - -#ifdef DEBUG - if (mirror_debug_flag) - printf("mirror_set_state: mnum,sm,comp,state, hs_id = %x, " - "%x, %x, %x %x\n", p->mnum, p->sm, p->comp, - p->state, p->hs_id); -#endif - if ((un = mirror_getun(p->mnum, &p->mde, WR_LOCK, lockp)) == NULL) - return (EINVAL); - - /* This function is only valid for a multi-node set */ - setno = MD_MIN2SET(p->mnum); - if (!MD_MNSET_SETNO(setno)) { - return (EINVAL); - } - sm = &un->un_sm[p->sm]; - smic = &un->un_smic[p->sm]; - - /* Set state in component and update ms_flags */ - shared = (md_m_shared_t *) - (*(smic->sm_shared_by_indx))(sm->sm_dev, sm, p->comp); - /* - * If a CS_ERRED state is being sent, verify that the sender - * has the same view of the component that this node currently has. - * - * There is a case where the sender was sending a CS_ERRED when a - * component was in error, but before the sender returns from - * ksend_message the component has been hotspared and resync'd. - * - * In this case, the hs_id will be different from the shared ms_hs_id, - * so the component has already been hotspared. Just return in this - * case. - */ - if (p->state == CS_ERRED) { - if (shared->ms_hs_id != p->hs_id) { -#ifdef DEBUG - if (mirror_debug_flag) { - printf("mirror_set_state: short circuit " - "hs_id=0x%x, ms_hs_id=0x%x\n", - p->hs_id, shared->ms_hs_id); - } -#endif - /* release the block on writes to the mirror */ - mirror_resume_writes(un); - md_ioctl_writerexit(lockp); - return (0); - } - } - - /* - * If the device is newly errored then make sure that it is - * closed. Closing the device allows for the RCM framework - * to unconfigure the device if required. - */ - if (!(shared->ms_state & CS_ERRED) && (p->state & CS_ERRED) && - (shared->ms_flags & MDM_S_ISOPEN)) { - void (*get_dev)(); - ms_cd_info_t cd; - - get_dev = (void (*)())md_get_named_service(sm->sm_dev, 0, - "get device", 0); - (void) (*get_dev)(sm->sm_dev, sm, p->comp, &cd); - - md_layered_close(cd.cd_dev, MD_OFLG_NULL); - shared->ms_flags &= ~MDM_S_ISOPEN; - } - - shared->ms_state = p->state; - uniqtime32(&shared->ms_timestamp); - - if (p->state == CS_ERRED) { - shared->ms_flags |= MDM_S_NOWRITE; - } else - shared->ms_flags &= ~MDM_S_NOWRITE; - - shared->ms_flags &= ~MDM_S_IOERR; - un->un_changecnt++; - shared->ms_lasterrcnt = un->un_changecnt; - - /* Update state in submirror */ - mirror_set_sm_state(sm, smic, SMS_RUNNING, 0); - /* - * Commit the state change to the metadb, only the master will write - * to disk - */ - mirror_commit(un, SMI2BIT(p->sm), 0); - - /* release the block on writes to the mirror */ - mirror_resume_writes(un); - - /* generate NOTIFY events for error state changes */ - if (p->state == CS_ERRED) { - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ERRED, SVM_TAG_METADEVICE, - MD_UN2SET(un), MD_SID(un)); - } else if (p->state == CS_LAST_ERRED) { - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_LASTERRED, SVM_TAG_METADEVICE, - MD_UN2SET(un), MD_SID(un)); - } - md_ioctl_writerexit(lockp); - return (0); -} - -/* - * mirror_suspend_writes: - * --------------------- - * Called to suspend writes to a mirror region. The flag un_suspend_wr_flag is - * tested in mirror_write_strategy, and if set all writes are blocked. - * This function is called by the MD_MN_SUSPEND_WRITES ioctl. - */ -static int -mirror_suspend_writes(md_suspend_wr_params_t *p) -{ - set_t setno; - mm_unit_t *un; - -#ifdef DEBUG - if (mirror_debug_flag) - printf("mirror_suspend_writes: mnum = %x\n", p->mnum); -#endif - if ((un = mirror_getun(p->mnum, &p->mde, NO_LOCK, NULL)) == NULL) - return (EINVAL); /* No unit */ - - /* This function is only valid for a multi-node set */ - setno = MD_MIN2SET(p->mnum); - if (!MD_MNSET_SETNO(setno)) { - return (EINVAL); - } - - /* - * Mark the resync as blocked. This will stop any currently running - * thread and will prevent a new resync from attempting to perform - * i/o - */ - mutex_enter(&un->un_rs_thread_mx); - un->un_rs_thread_flags |= MD_RI_BLOCK; - mutex_exit(&un->un_rs_thread_mx); - - mutex_enter(&un->un_suspend_wr_mx); - un->un_suspend_wr_flag = 1; - mutex_exit(&un->un_suspend_wr_mx); - - return (0); -} - -/* - * mirror_set_capability: - * ------------------------ - * Called to set or clear a capability for a mirror - * called by the MD_MN_SET_CAP ioctl. - */ -static int -mirror_set_capability(md_mn_setcap_params_t *p, IOLOCK *lockp) -{ - set_t setno; - mm_unit_t *un; - mdi_unit_t *ui; - -#ifdef DEBUG - if (mirror_debug_flag) - printf("mirror_set_capability: mnum = %x\n", p->mnum); -#endif - if ((un = mirror_getun(p->mnum, &p->mde, RD_LOCK, lockp)) == NULL) - return (EINVAL); - - /* This function is only valid for a multi-node set */ - setno = MD_MIN2SET(p->mnum); - if (!MD_MNSET_SETNO(setno)) { - return (EINVAL); - } - ui = MDI_UNIT(p->mnum); - - if (p->sc_set & DKV_ABR_CAP) { - ui->ui_tstate |= MD_ABR_CAP; /* Set ABR capability */ - /* Clear DRL and set owner to 0 if no resync active */ - mirror_process_unit_resync(un); - if (!(un->c.un_status & MD_UN_RESYNC_ACTIVE)) { - mutex_enter(&un->un_owner_mx); - un->un_mirror_owner = 0; - mutex_exit(&un->un_owner_mx); - } - } else { - ui->ui_tstate &= ~MD_ABR_CAP; /* Clear ABR capability */ - } - if (p->sc_set & DKV_DMR_CAP) { - ui->ui_tstate |= MD_DMR_CAP; /* Set DMR capability */ - } else { - ui->ui_tstate &= ~MD_DMR_CAP; /* Clear DMR capability */ - } - return (0); -} - -/* - * mirror_choose_owner: - * ------------------------ - * Called to choose an owner for a mirror resync. Can be called when starting - * resync or by the MD_MN_SET_MM_OWNER ioctl with the MD_MN_MM_CHOOSE_OWNER flag - * set. The ioctl is called with this flag set when we are in the cluster - * reconfig and we wish to set a new owner for a resync whose owner has left - * the cluster. We use a resync owner count to implement a round robin - * allocation of resync owners. We send a message to the master including - * this count and the message handler uses it to select an owner from the - * nodelist and then sends a SET_MM_OWNER message to the chosen node to - * become the owner. - * - * Input: - * un - unit reference - * ownp - owner information (if non-NULL) - */ -int -mirror_choose_owner(mm_unit_t *un, md_mn_req_owner_t *ownp) -{ - set_t setno; - md_mn_msg_chooseid_t *msg; - - /* This function is only valid for a multi-node set */ - setno = MD_UN2SET(un); - if (!MD_MNSET_SETNO(setno)) { - return (EINVAL); - } - - -#ifdef DEBUG - if (mirror_debug_flag) - printf("send choose owner message, mnum = %x," - "rcnt = %d\n", MD_SID(un), md_set[setno].s_rcnt); -#endif - - /* - * setup message with current resync count - * and then increment the count. If we're called with a non-NULL - * owner then we are reestablishing the owner of the mirror. In this - * case we have to flag this to the message handler and set rcnt to - * the new owner node. - */ - msg = kmem_zalloc(sizeof (md_mn_msg_chooseid_t), KM_SLEEP); - msg->msg_chooseid_mnum = MD_SID(un); - if (ownp == NULL) { - mutex_enter(&md_mx); - msg->msg_chooseid_rcnt = md_set[setno].s_rcnt; - md_set[setno].s_rcnt++; - mutex_exit(&md_mx); - msg->msg_chooseid_set_node = B_FALSE; - } else { - msg->msg_chooseid_rcnt = ownp->owner; - msg->msg_chooseid_set_node = B_TRUE; - } - - /* - * Spawn a thread to issue the ksend_message() call so that we can - * drop the ioctl lock hierarchy that is blocking further rpc.metad and - * commd set ownership checking. - */ - if (thread_create(NULL, 0, mirror_choose_owner_thread, (caddr_t)msg, - 0, &p0, TS_RUN, 60) == NULL) { - kmem_free(msg, sizeof (md_mn_msg_chooseid_t)); - return (EFAULT); - } else { - return (0); - } -} - -/* - * mirror_get_status: - * ---------------------------------- - * Called by nodes which are not the master node of the cluster. Obtains the - * master abr state and the submirror status for each valid submirror of the - * unit so that the status returned by metastat is consistent across the - * cluster. - * We update tstate for the mirror and both the sm_flag and the sm_state for - * each submirror. - * - * Input: - * un mirror to obtain status from - * - * Calling Convention: - * writerlock (either ioctl or unit) must be held - */ -void -mirror_get_status(mm_unit_t *un, IOLOCK *lockp) -{ - mm_submirror_t *sm; - int smi; - int rval; - md_mn_kresult_t *kres; - md_mn_msg_mir_state_t msg; - md_mn_msg_mir_state_res_t *res; - set_t setno = MD_UN2SET(un); - mdi_unit_t *ui = MDI_UNIT(MD_SID(un)); - - - ASSERT(ui->ui_lock & MD_UL_WRITER); - - /* - * Get all of the information for the mirror. - */ - bzero(&msg, sizeof (msg)); - msg.mir_state_mnum = MD_SID(un); - - /* - * Must drop the writerlock over ksend_message since another - * thread on this node could be running a higher class message - * and be trying grab the readerlock. - * - * If we are in the context of an ioctl, drop the ioctl lock. - * lockp holds the list of locks held. - */ - if (lockp) { - IOLOCK_RETURN_RELEASE(0, lockp); - } else { - md_unit_writerexit(ui); - } - - kres = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP); - rval = mdmn_ksend_message(setno, MD_MN_MSG_GET_MIRROR_STATE, - MD_MSGF_NO_BCAST | MD_MSGF_NO_LOG, 0, (char *)&msg, - sizeof (msg), kres); - - /* if the node hasn't yet joined, it's Ok. */ - if ((!MDMN_KSEND_MSG_OK(rval, kres)) && - (kres->kmmr_comm_state != MDMNE_NOT_JOINED)) { - mdmn_ksend_show_error(rval, kres, "GET_MIRROR_STATE"); - cmn_err(CE_WARN, "ksend_message failure: GET_MIRROR_STATE"); - } - - /* if dropped the lock previously, regain it */ - if (lockp) { - IOLOCK_RETURN_REACQUIRE(lockp); - } else { - /* - * Reacquire dropped locks and update acquirecnts - * appropriately. - */ - (void) md_unit_writerlock(ui); - } - - /* - * Check to see if we've got a believable amount of returned data. - * If not, we simply return as there is no usable information. - */ - if (kres->kmmr_res_size < sizeof (*res)) { - cmn_err(CE_WARN, "GET_MIRROR_STATE: returned %d bytes, expected" - " %d\n", kres->kmmr_res_size, (int)sizeof (*res)); - kmem_free(kres, sizeof (md_mn_kresult_t)); - return; - } - - /* - * Copy the results from the call back into our sm_state/sm_flags - */ - res = (md_mn_msg_mir_state_res_t *)kres->kmmr_res_data; -#ifdef DEBUG - if (mirror_debug_flag) - printf("mirror_get_status: %s\n", md_shortname(MD_SID(un))); -#endif - for (smi = 0; smi < NMIRROR; smi++) { - sm = &un->un_sm[smi]; -#ifdef DEBUG - if (mirror_debug_flag) { - printf("curr state %4x, new state %4x\n", sm->sm_state, - res->sm_state[smi]); - printf("curr_flags %4x, new flags %4x\n", sm->sm_flags, - res->sm_flags[smi]); - } -#endif - sm->sm_state = res->sm_state[smi]; - sm->sm_flags = res->sm_flags[smi]; - } - - /* Set ABR if set on the Master node */ - ui->ui_tstate |= (res->mir_tstate & MD_ABR_CAP); - - kmem_free(kres, sizeof (md_mn_kresult_t)); -} - -/* - * mirror_get_mir_state: - * ------------------- - * Obtain the ABR state of a mirror and the state of all submirrors from the - * master node for the unit specified in sm_state->mnum. - * Called by MD_MN_GET_MIRROR_STATE ioctl. - */ -static int -mirror_get_mir_state(md_mn_get_mir_state_t *p, IOLOCK *lockp) -{ - mm_unit_t *un; - set_t setno; - md_error_t mde; - - mdclrerror(&mde); - - if ((un = mirror_getun(p->mnum, &mde, WR_LOCK, lockp)) == NULL) { - return (EINVAL); - } - setno = MD_MIN2SET(p->mnum); - if (!MD_MNSET_SETNO(setno)) { - return (EINVAL); - } - - /* - * We've now got a writerlock on the unit structure (so no-one can - * modify the incore values) and we'll now send the message to the - * master node. Since we're only called as part of a reconfig cycle - * we don't need to release the unit locks across the ksend_message as - * only the master node will process it, and we never send this to - * ourselves if we're the master. - */ - - mirror_get_status(un, lockp); - - return (0); -} - -static int -mirror_admin_ioctl(int cmd, void *data, int mode, IOLOCK *lockp) -{ - size_t sz = 0; - void *d = NULL; - int err = 0; - - /* We can only handle 32-bit clients for internal commands */ - if ((mode & DATAMODEL_MASK) != DATAMODEL_ILP32) { - return (EINVAL); - } - /* dispatch ioctl */ - switch (cmd) { - - case MD_IOCSET: - { - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (md_set_params_t); - - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = mirror_set(d, mode); - break; - } - - case MD_IOCGET: - { - if (! (mode & FREAD)) - return (EACCES); - - sz = sizeof (md_i_get_t); - - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = mirror_get(d, mode, lockp); - break; - } - - case MD_IOCRESET: - { - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (md_i_reset_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = mirror_reset((md_i_reset_t *)d); - break; - } - - case MD_IOCSETSYNC: - case MD_MN_SETSYNC: - { - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (md_resync_ioctl_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = mirror_ioctl_resync((md_resync_ioctl_t *)d, lockp); - break; - } - - case MD_IOCGETSYNC: - { - if (! (mode & FREAD)) - return (EACCES); - - sz = sizeof (md_resync_ioctl_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = mirror_get_resync((md_resync_ioctl_t *)d); - break; - } - - case MD_IOCREPLACE: - { - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (replace_params_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = comp_replace((replace_params_t *)d, lockp); - break; - } - - case MD_IOCOFFLINE: - { - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (md_i_off_on_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = mirror_offline((md_i_off_on_t *)d, lockp); - break; - } - - case MD_IOCONLINE: - { - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (md_i_off_on_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = mirror_online((md_i_off_on_t *)d, lockp); - break; - } - - case MD_IOCDETACH: - { - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (md_detach_params_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = mirror_detach((md_detach_params_t *)d, lockp); - break; - } - - case MD_IOCATTACH: - { - - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (md_att_struct_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = mirror_attach((md_att_struct_t *)d, lockp); - break; - } - - case MD_IOCGET_DEVS: - { - if (! (mode & FREAD)) - return (EACCES); - - sz = sizeof (md_getdevs_params_t); - - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = mirror_getdevs(d, mode, lockp); - break; - } - - case MD_IOCGROW: - { - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (md_grow_params_t); - - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = mirror_grow(d, lockp); - break; - } - - case MD_IOCCHANGE: - { - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (md_mirror_params_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = mirror_change((md_mirror_params_t *)d, lockp); - break; - } - - case MD_IOCPROBE_DEV: - { - md_probedev_impl_t *p = NULL; - md_probedev_t *ph = NULL; - daemon_queue_t *hdr = NULL; - int i; - size_t sz2 = 0; - - if (! (mode & FREAD)) - return (EACCES); - - - sz = sizeof (md_probedev_t); - d = kmem_alloc(sz, KM_SLEEP); - - /* now copy in the data */ - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - goto free_mem; - } - - /* - * Sanity test the args. Test name should have the keyword - * probe. - */ - - p = kmem_alloc(sizeof (md_probedev_impl_t), KM_SLEEP); - - p->probe_sema = NULL; - p->probe_mx = NULL; - p->probe.mnum_list = (uint64_t)NULL; - - ph = (struct md_probedev *)d; - - p->probe.nmdevs = ph->nmdevs; - (void) strcpy(p->probe.test_name, ph->test_name); - bcopy(&ph->md_driver, &(p->probe.md_driver), - sizeof (md_driver_t)); - - if ((p->probe.nmdevs < 1) || - (strstr(p->probe.test_name, "probe") == NULL)) { - err = EINVAL; - goto free_mem; - } - - - sz2 = sizeof (minor_t) * p->probe.nmdevs; - p->probe.mnum_list = (uint64_t)(uintptr_t)kmem_alloc(sz2, - KM_SLEEP); - - if (ddi_copyin((void *)(uintptr_t)ph->mnum_list, - (void *)(uintptr_t)p->probe.mnum_list, sz2, mode)) { - err = EFAULT; - goto free_mem; - } - - if (err = md_init_probereq(p, &hdr)) - goto free_mem; - - /* - * put the request on the queue and wait. - */ - - daemon_request_new(&md_ff_daemonq, md_probe_one, hdr, REQ_NEW); - - (void) IOLOCK_RETURN(0, lockp); - /* wait for the events to occur */ - for (i = 0; i < p->probe.nmdevs; i++) { - sema_p(PROBE_SEMA(p)); - } - while (md_ioctl_lock_enter() == EINTR) - ; - - /* - * clean up. The hdr list is freed in the probe routines - * since the list is NULL by the time we get here. - */ -free_mem: - if (p) { - if (p->probe_sema != NULL) { - sema_destroy(PROBE_SEMA(p)); - kmem_free(p->probe_sema, sizeof (ksema_t)); - } - if (p->probe_mx != NULL) { - mutex_destroy(PROBE_MX(p)); - kmem_free(p->probe_mx, sizeof (kmutex_t)); - } - if ((uintptr_t)p->probe.mnum_list) - kmem_free((void *)(uintptr_t) - p->probe.mnum_list, sz2); - - kmem_free(p, sizeof (md_probedev_impl_t)); - } - break; - } - - case MD_MN_SET_MM_OWNER: - { - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (md_set_mmown_params_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode) != 0) { - err = EFAULT; - break; - } - - err = mirror_set_owner((md_set_mmown_params_t *)d, lockp); - break; - } - - case MD_MN_GET_MM_OWNER: - { - if (! (mode & FREAD)) - return (EACCES); - - sz = sizeof (md_set_mmown_params_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode) != 0) { - err = EFAULT; - break; - } - - err = mirror_get_owner((md_set_mmown_params_t *)d, lockp); - break; - } - - case MD_MN_MM_OWNER_STATUS: - { - if (! (mode & FREAD)) - return (EACCES); - - sz = sizeof (md_mn_own_status_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode) != 0) { - err = EFAULT; - break; - } - - err = mirror_get_owner_status((md_mn_own_status_t *)d, lockp); - break; - } - - case MD_MN_SET_STATE: - { - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (md_set_state_params_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = mirror_set_state((md_set_state_params_t *)d, lockp); - break; - } - - case MD_MN_SUSPEND_WRITES: - { - if (! (mode & FREAD)) - return (EACCES); - - sz = sizeof (md_suspend_wr_params_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode) != 0) { - err = EFAULT; - break; - } - - err = mirror_suspend_writes((md_suspend_wr_params_t *)d); - break; - } - - case MD_MN_RESYNC: - { - sz = sizeof (md_mn_rs_params_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode) != 0) { - err = EFAULT; - break; - } - - err = mirror_resync_message((md_mn_rs_params_t *)d, lockp); - break; - } - - case MD_MN_ALLOCATE_HOTSPARE: - { - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (md_alloc_hotsp_params_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = mirror_allocate_hotspare((md_alloc_hotsp_params_t *)d, - lockp); - break; - } - - case MD_MN_POKE_HOTSPARES: - { - (void) poke_hotspares(); - break; - } - - case MD_MN_SET_CAP: - { - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (md_mn_setcap_params_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = mirror_set_capability((md_mn_setcap_params_t *)d, - lockp); - break; - } - - case MD_MN_GET_MIRROR_STATE: - { - sz = sizeof (md_mn_get_mir_state_t); - d = kmem_zalloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = mirror_get_mir_state((md_mn_get_mir_state_t *)d, - lockp); - break; - } - - case MD_MN_RR_DIRTY: - { - sz = sizeof (md_mn_rr_dirty_params_t); - d = kmem_zalloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = mirror_set_dirty_rr((md_mn_rr_dirty_params_t *)d); - break; - } - - case MD_MN_RR_CLEAN: - { - md_mn_rr_clean_params_t tmp; - - /* get the first part of the structure to find the size */ - if (ddi_copyin(data, &tmp, sizeof (tmp), mode)) { - err = EFAULT; - break; - } - - sz = MDMN_RR_CLEAN_PARAMS_SIZE(&tmp); - d = kmem_zalloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = mirror_set_clean_rr((md_mn_rr_clean_params_t *)d); - break; - } - - default: - return (ENOTTY); - } - - /* - * copyout and free any args - */ - if (sz != 0) { - if (err == 0) { - if (ddi_copyout(d, data, sz, mode) != 0) { - err = EFAULT; - } - } - kmem_free(d, sz); - } - return (err); -} - -int -md_mirror_ioctl( - dev_t ddi_dev, - int cmd, - void *data, - int mode, - IOLOCK *lockp -) -{ - minor_t mnum = getminor(ddi_dev); - mm_unit_t *un; - int err = 0; - - /* handle admin ioctls */ - if (mnum == MD_ADM_MINOR) - return (mirror_admin_ioctl(cmd, data, mode, lockp)); - - /* check unit */ - if ((MD_MIN2SET(mnum) >= md_nsets) || - (MD_MIN2UNIT(mnum) >= md_nunits) || - ((un = MD_UNIT(mnum)) == NULL)) - return (ENXIO); - /* is this a supported ioctl? */ - err = md_check_ioctl_against_unit(cmd, un->c); - if (err != 0) { - return (err); - } - - /* dispatch ioctl */ - switch (cmd) { - - case DKIOCINFO: - { - struct dk_cinfo *p; - - if (! (mode & FREAD)) - return (EACCES); - - p = kmem_alloc(sizeof (*p), KM_SLEEP); - - get_info(p, mnum); - if (ddi_copyout((caddr_t)p, data, sizeof (*p), mode) != 0) - err = EFAULT; - - kmem_free(p, sizeof (*p)); - return (err); - } - - case DKIOCGMEDIAINFO: - { - struct dk_minfo p; - - if (! (mode & FREAD)) - return (EACCES); - - get_minfo(&p, mnum); - if (ddi_copyout(&p, data, sizeof (struct dk_minfo), mode) != 0) - err = EFAULT; - - return (err); - } - - case DKIOCGGEOM: - { - struct dk_geom *p; - - if (! (mode & FREAD)) - return (EACCES); - - p = kmem_alloc(sizeof (*p), KM_SLEEP); - - if ((err = mirror_get_geom(un, p)) == 0) { - if (ddi_copyout((caddr_t)p, data, sizeof (*p), - mode) != 0) - err = EFAULT; - } - - kmem_free(p, sizeof (*p)); - return (err); - } - - case DKIOCGVTOC: - { - struct vtoc *vtoc; - - if (! (mode & FREAD)) - return (EACCES); - - vtoc = kmem_zalloc(sizeof (*vtoc), KM_SLEEP); - - if ((err = mirror_get_vtoc(un, vtoc)) != 0) { - kmem_free(vtoc, sizeof (*vtoc)); - return (err); - } - - if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) { - if (ddi_copyout(vtoc, data, sizeof (*vtoc), mode)) - err = EFAULT; - } -#ifdef _SYSCALL32 - else { - struct vtoc32 *vtoc32; - - vtoc32 = kmem_zalloc(sizeof (*vtoc32), KM_SLEEP); - - vtoctovtoc32((*vtoc), (*vtoc32)); - if (ddi_copyout(vtoc32, data, sizeof (*vtoc32), mode)) - err = EFAULT; - kmem_free(vtoc32, sizeof (*vtoc32)); - } -#endif /* _SYSCALL32 */ - - kmem_free(vtoc, sizeof (*vtoc)); - return (err); - } - - case DKIOCSVTOC: - { - struct vtoc *vtoc; - - if (! (mode & FWRITE)) - return (EACCES); - - vtoc = kmem_zalloc(sizeof (*vtoc), KM_SLEEP); - - if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) { - if (ddi_copyin(data, vtoc, sizeof (*vtoc), mode)) { - err = EFAULT; - } - } -#ifdef _SYSCALL32 - else { - struct vtoc32 *vtoc32; - - vtoc32 = kmem_zalloc(sizeof (*vtoc32), KM_SLEEP); - - if (ddi_copyin(data, vtoc32, sizeof (*vtoc32), mode)) { - err = EFAULT; - } else { - vtoc32tovtoc((*vtoc32), (*vtoc)); - } - kmem_free(vtoc32, sizeof (*vtoc32)); - } -#endif /* _SYSCALL32 */ - - if (err == 0) - err = mirror_set_vtoc(un, vtoc); - - kmem_free(vtoc, sizeof (*vtoc)); - return (err); - } - - case DKIOCGEXTVTOC: - { - struct extvtoc *extvtoc; - - if (! (mode & FREAD)) - return (EACCES); - - extvtoc = kmem_zalloc(sizeof (*extvtoc), KM_SLEEP); - - if ((err = mirror_get_extvtoc(un, extvtoc)) != 0) { - kmem_free(extvtoc, sizeof (*extvtoc)); - return (err); - } - - if (ddi_copyout(extvtoc, data, sizeof (*extvtoc), mode)) - err = EFAULT; - - kmem_free(extvtoc, sizeof (*extvtoc)); - return (err); - } - - case DKIOCSEXTVTOC: - { - struct extvtoc *extvtoc; - - if (! (mode & FWRITE)) - return (EACCES); - - extvtoc = kmem_zalloc(sizeof (*extvtoc), KM_SLEEP); - - if (ddi_copyin(data, extvtoc, sizeof (*extvtoc), mode)) { - err = EFAULT; - } - - if (err == 0) - err = mirror_set_extvtoc(un, extvtoc); - - kmem_free(extvtoc, sizeof (*extvtoc)); - return (err); - } - - case DKIOCGAPART: - { - struct dk_map dmp; - - if ((err = mirror_get_cgapart(un, &dmp)) != 0) { - return (err); - } - - if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) { - if (ddi_copyout((caddr_t)&dmp, data, sizeof (dmp), - mode) != 0) - err = EFAULT; - } -#ifdef _SYSCALL32 - else { - struct dk_map32 dmp32; - - dmp32.dkl_cylno = dmp.dkl_cylno; - dmp32.dkl_nblk = dmp.dkl_nblk; - - if (ddi_copyout((caddr_t)&dmp32, data, sizeof (dmp32), - mode) != 0) - err = EFAULT; - } -#endif /* _SYSCALL32 */ - - return (err); - } - case DKIOCGETEFI: - { - /* - * This one can be done centralized, - * no need to put in the same code for all types of metadevices - */ - return (md_dkiocgetefi(mnum, data, mode)); - } - case DKIOCSETEFI: - { - /* - * This one can be done centralized, - * no need to put in the same code for all types of metadevices - */ - return (md_dkiocsetefi(mnum, data, mode)); - } - case DKIOCPARTITION: - { - return (md_dkiocpartition(mnum, data, mode)); - } - - case DKIOCGETVOLCAP: - { - volcap_t vc; - mdi_unit_t *ui; - - /* Only valid for MN sets */ - if (!MD_MNSET_SETNO(MD_MIN2SET(mnum))) - return (EINVAL); - - ui = MDI_UNIT(mnum); - if (! (mode & FREAD)) - return (EACCES); - - vc.vc_info = DKV_ABR_CAP | DKV_DMR_CAP; - vc.vc_set = 0; - if (ui->ui_tstate & MD_ABR_CAP) { - vc.vc_set |= DKV_ABR_CAP; - } - if (ddi_copyout(&vc, data, sizeof (volcap_t), mode)) - err = EFAULT; - return (err); - } - - case DKIOCSETVOLCAP: - { - volcap_t vc; - volcapset_t volcap = 0; - mdi_unit_t *ui; - - /* Only valid for MN sets */ - if (!MD_MNSET_SETNO(MD_MIN2SET(mnum))) - return (EINVAL); - - ui = MDI_UNIT(mnum); - if (! (mode & FWRITE)) - return (EACCES); - - if (ddi_copyin(data, &vc, sizeof (volcap_t), mode)) - return (EFAULT); - - /* Not valid if a submirror is offline */ - if (un->c.un_status & MD_UN_OFFLINE_SM) { - return (EINVAL); - } - if (ui->ui_tstate & MD_ABR_CAP) - volcap |= DKV_ABR_CAP; - /* Only send capability message if there is a change */ - if ((vc.vc_set & (DKV_ABR_CAP)) != volcap) - err = mdmn_send_capability_message(mnum, vc, lockp); - return (err); - } - - case DKIOCDMR: - { - vol_directed_rd_t *vdr; - -#ifdef _MULTI_DATAMODEL - vol_directed_rd32_t *vdr32; -#endif /* _MULTI_DATAMODEL */ - - /* Only valid for MN sets */ - if (!MD_MNSET_SETNO(MD_MIN2SET(mnum))) - return (EINVAL); - - vdr = kmem_zalloc(sizeof (vol_directed_rd_t), KM_NOSLEEP); - if (vdr == NULL) - return (ENOMEM); - -#ifdef _MULTI_DATAMODEL - vdr32 = kmem_zalloc(sizeof (vol_directed_rd32_t), KM_NOSLEEP); - if (vdr32 == NULL) { - kmem_free(vdr, sizeof (vol_directed_rd_t)); - return (ENOMEM); - } - - switch (ddi_model_convert_from(mode & FMODELS)) { - case DDI_MODEL_ILP32: - /* - * If we're called from a higher-level driver we don't - * need to manipulate the data. Its already been done by - * the caller. - */ - if (!(mode & FKIOCTL)) { - if (ddi_copyin(data, vdr32, sizeof (*vdr32), - mode)) { - kmem_free(vdr, sizeof (*vdr)); - return (EFAULT); - } - vdr->vdr_flags = vdr32->vdr_flags; - vdr->vdr_offset = vdr32->vdr_offset; - vdr->vdr_nbytes = vdr32->vdr_nbytes; - vdr->vdr_data = - (void *)(uintptr_t)vdr32->vdr_data; - vdr->vdr_side = vdr32->vdr_side; - break; - } - /* FALLTHROUGH */ - - case DDI_MODEL_NONE: - if (ddi_copyin(data, vdr, sizeof (*vdr), mode)) { - kmem_free(vdr32, sizeof (*vdr32)); - kmem_free(vdr, sizeof (*vdr)); - return (EFAULT); - } - break; - - default: - kmem_free(vdr32, sizeof (*vdr32)); - kmem_free(vdr, sizeof (*vdr)); - return (EFAULT); - } -#else /* ! _MULTI_DATAMODEL */ - if (ddi_copyin(data, vdr, sizeof (*vdr), mode)) { - kmem_free(vdr, sizeof (*vdr)); - return (EFAULT); - } -#endif /* _MULTI_DATAMODEL */ - - err = mirror_directed_read(ddi_dev, vdr, mode); - - if (err == 0) { -#ifdef _MULTI_DATAMODEL - switch (ddi_model_convert_from(mode & FMODELS)) { - case DDI_MODEL_ILP32: - if (!(mode & FKIOCTL)) { - vdr32->vdr_flags = vdr->vdr_flags; - vdr32->vdr_offset = vdr->vdr_offset; - vdr32->vdr_side = vdr->vdr_side; - vdr32->vdr_bytesread = - vdr->vdr_bytesread; - bcopy(vdr->vdr_side_name, - vdr32->vdr_side_name, - sizeof (vdr32->vdr_side_name)); - - if (ddi_copyout(vdr32, data, - sizeof (*vdr32), mode)) { - err = EFAULT; - } - break; - } - /* FALLTHROUGH */ - - case DDI_MODEL_NONE: - if (ddi_copyout(vdr, data, sizeof (*vdr), mode)) - err = EFAULT; - break; - } -#else /* ! _MULTI_DATAMODEL */ - if (ddi_copyout(vdr, data, sizeof (*vdr), mode)) - err = EFAULT; -#endif /* _MULTI_DATAMODEL */ - if (vdr->vdr_flags & DKV_DMR_ERROR) - err = EIO; - } - -#ifdef _MULTI_DATAMODEL - kmem_free(vdr32, sizeof (*vdr32)); -#endif /* _MULTI_DATAMODEL */ - - kmem_free(vdr, sizeof (*vdr)); - - return (err); - } - - default: - return (ENOTTY); - } -} - -/* - * rename named service entry points and support functions - */ - -/* - * rename/exchange role swap functions - * - * most of these are handled by generic role swap functions - */ - -/* - * MDRNM_UPDATE_KIDS - * rename/exchange of our child or grandchild - */ -void -mirror_renexch_update_kids(md_rendelta_t *delta, md_rentxn_t *rtxnp) -{ - mm_submirror_t *sm; - int smi; - - ASSERT(rtxnp); - ASSERT((MDRNOP_RENAME == rtxnp->op) || (rtxnp->op == MDRNOP_EXCHANGE)); - ASSERT(rtxnp->recids); - ASSERT(delta); - ASSERT(delta->unp); - ASSERT(delta->old_role == MDRR_PARENT); - ASSERT(delta->new_role == MDRR_PARENT); - - /* - * since our role isn't changing (parent->parent) - * one of our children must be changing - * find the child being modified, and update - * our notion of it - */ - for (smi = 0; smi < NMIRROR; smi++) { - mm_unit_t *un = (mm_unit_t *)delta->unp; - - if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE)) { - continue; - } - sm = &un->un_sm[smi]; - - if (md_getminor(sm->sm_dev) == rtxnp->from.mnum) { - sm->sm_dev = md_makedevice(md_major, rtxnp->to.mnum); - sm->sm_key = rtxnp->to.key; - break; - } - } - - md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp); -} - -/* - * exchange down (self->child) - */ -void -mirror_exchange_self_update_from_down( - md_rendelta_t *delta, - md_rentxn_t *rtxnp -) -{ - int smi; - mm_submirror_t *found; - minor_t from_min, to_min; - sv_dev_t sv; - - ASSERT(rtxnp); - ASSERT(MDRNOP_EXCHANGE == rtxnp->op); - ASSERT(rtxnp->recids); - ASSERT(rtxnp->rec_idx >= 0); - ASSERT(delta); - ASSERT(delta->unp); - ASSERT(delta->uip); - ASSERT(delta->old_role == MDRR_SELF); - ASSERT(delta->new_role == MDRR_CHILD); - ASSERT(md_getminor(delta->dev) == rtxnp->from.mnum); - - from_min = rtxnp->from.mnum; - to_min = rtxnp->to.mnum; - - /* - * self id changes in our own unit struct - */ - - MD_SID(delta->unp) = to_min; - - /* - * parent identifier need not change - */ - - /* - * point the set array pointers at the "new" unit and unit in-cores - * Note: the other half of this transfer is done in the "update_to" - * exchange named service. - */ - - MDI_VOIDUNIT(to_min) = delta->uip; - MD_VOIDUNIT(to_min) = delta->unp; - - /* - * transfer kstats - */ - - delta->uip->ui_kstat = rtxnp->to.kstatp; - - /* - * the unit in-core reference to the get next link's id changes - */ - - delta->uip->ui_link.ln_id = to_min; - - /* - * find the child whose identity we're assuming - */ - - for (found = NULL, smi = 0; !found && smi < NMIRROR; smi++) { - mm_submirror_t *sm; - mm_unit_t *un = (mm_unit_t *)delta->unp; - - if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE)) { - continue; - } - sm = &un->un_sm[smi]; - - if (md_getminor(sm->sm_dev) == to_min) { - found = sm; - } - } - ASSERT(found); - - /* - * Update the sub-mirror's identity - */ - found->sm_dev = md_makedevice(md_major, rtxnp->from.mnum); - sv.key = found->sm_key; - - ASSERT(rtxnp->from.key != MD_KEYWILD); - ASSERT(rtxnp->from.key != MD_KEYBAD); - - found->sm_key = rtxnp->from.key; - - /* - * delete the key for the old sub-mirror from the name space - */ - - sv.setno = MD_MIN2SET(from_min); - md_rem_names(&sv, 1); - - /* - * and store the record id (from the unit struct) into recids - */ - - md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp); -} - -/* - * exchange down (parent->self) - */ -void -mirror_exchange_parent_update_to( - md_rendelta_t *delta, - md_rentxn_t *rtxnp -) -{ - int smi; - mm_submirror_t *found; - minor_t from_min, to_min; - sv_dev_t sv; - - ASSERT(rtxnp); - ASSERT(MDRNOP_EXCHANGE == rtxnp->op); - ASSERT(rtxnp->recids); - ASSERT(rtxnp->rec_idx >= 0); - ASSERT(delta); - ASSERT(delta->unp); - ASSERT(delta->uip); - ASSERT(delta->old_role == MDRR_PARENT); - ASSERT(delta->new_role == MDRR_SELF); - ASSERT(md_getminor(delta->dev) == rtxnp->to.mnum); - - from_min = rtxnp->from.mnum; - to_min = rtxnp->to.mnum; - - /* - * self id changes in our own unit struct - */ - - MD_SID(delta->unp) = from_min; - - /* - * parent identifier need not change - */ - - /* - * point the set array pointers at the "new" unit and unit in-cores - * Note: the other half of this transfer is done in the "update_to" - * exchange named service. - */ - - MDI_VOIDUNIT(from_min) = delta->uip; - MD_VOIDUNIT(from_min) = delta->unp; - - /* - * transfer kstats - */ - - delta->uip->ui_kstat = rtxnp->from.kstatp; - - /* - * the unit in-core reference to the get next link's id changes - */ - - delta->uip->ui_link.ln_id = from_min; - - /* - * find the child whose identity we're assuming - */ - - for (found = NULL, smi = 0; !found && smi < NMIRROR; smi++) { - mm_submirror_t *sm; - mm_unit_t *un = (mm_unit_t *)delta->unp; - - if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE)) { - continue; - } - sm = &un->un_sm[smi]; - - if (md_getminor(sm->sm_dev) == from_min) { - found = sm; - } - } - ASSERT(found); - - /* - * Update the sub-mirror's identity - */ - found->sm_dev = md_makedevice(md_major, rtxnp->to.mnum); - sv.key = found->sm_key; - - ASSERT(rtxnp->to.key != MD_KEYWILD); - ASSERT(rtxnp->to.key != MD_KEYBAD); - - found->sm_key = rtxnp->to.key; - - /* - * delete the key for the old sub-mirror from the name space - */ - - sv.setno = MD_MIN2SET(to_min); - md_rem_names(&sv, 1); - - /* - * and store the record id (from the unit struct) into recids - */ - - md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp); -} - -/* - * MDRNM_LIST_URKIDS: named svc entry point - * all all delta entries appropriate for our children onto the - * deltalist pointd to by dlpp - */ -int -mirror_rename_listkids(md_rendelta_t **dlpp, md_rentxn_t *rtxnp) -{ - minor_t from_min, to_min; - mm_unit_t *from_un; - md_rendelta_t *new, *p; - int smi; - int n_children; - mm_submirror_t *sm; - - ASSERT(rtxnp); - ASSERT(dlpp); - ASSERT((rtxnp->op == MDRNOP_EXCHANGE) || (rtxnp->op == MDRNOP_RENAME)); - - from_min = rtxnp->from.mnum; - to_min = rtxnp->to.mnum; - n_children = 0; - - if (!MDI_UNIT(from_min) || !(from_un = MD_UNIT(from_min))) { - (void) mdmderror(&rtxnp->mde, MDE_UNIT_NOT_SETUP, from_min); - return (-1); - } - - for (p = *dlpp; p && p->next != NULL; p = p->next) { - /* NULL */ - } - - for (smi = 0; smi < NMIRROR; smi++) { - minor_t child_min; - - if (!SMS_BY_INDEX_IS(from_un, smi, SMS_INUSE)) { - continue; - } - - sm = &from_un->un_sm[smi]; - child_min = md_getminor(sm->sm_dev); - - p = new = md_build_rendelta(MDRR_CHILD, - to_min == child_min? MDRR_SELF: MDRR_CHILD, - sm->sm_dev, p, - MD_UNIT(child_min), MDI_UNIT(child_min), - &rtxnp->mde); - - if (!new) { - if (mdisok(&rtxnp->mde)) { - (void) mdsyserror(&rtxnp->mde, ENOMEM); - } - return (-1); - } - ++n_children; - } - - return (n_children); -} - -/* - * support routine for MDRNM_CHECK - */ -static int -mirror_may_renexch_self( - mm_unit_t *un, - mdi_unit_t *ui, - md_rentxn_t *rtxnp) -{ - minor_t from_min; - minor_t to_min; - bool_t toplevel; - bool_t related; - int smi; - mm_submirror_t *sm; - - from_min = rtxnp->from.mnum; - to_min = rtxnp->to.mnum; - - if (!un || !ui) { - (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR, - from_min); - return (EINVAL); - } - - ASSERT(MD_CAPAB(un) & MD_CAN_META_CHILD); - if (!(MD_CAPAB(un) & MD_CAN_META_CHILD)) { - (void) mdmderror(&rtxnp->mde, MDE_RENAME_SOURCE_BAD, from_min); - return (EINVAL); - } - - if (MD_PARENT(un) == MD_MULTI_PARENT) { - (void) mdmderror(&rtxnp->mde, MDE_RENAME_SOURCE_BAD, from_min); - return (EINVAL); - } - - toplevel = !MD_HAS_PARENT(MD_PARENT(un)); - - /* we're related if trying to swap with our parent */ - related = (!toplevel) && (MD_PARENT(un) == to_min); - - switch (rtxnp->op) { - case MDRNOP_EXCHANGE: - /* - * check for a swap with our child - */ - for (smi = 0; smi < NMIRROR; smi++) { - - if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE)) { - continue; - } - - sm = &un->un_sm[smi]; - if (md_getminor(sm->sm_dev) == to_min) { - related |= TRUE; - } - } - if (!related) { - (void) mdmderror(&rtxnp->mde, - MDE_RENAME_TARGET_UNRELATED, to_min); - return (EINVAL); - } - - break; - - case MDRNOP_RENAME: - /* - * if from is top-level and is open, then the kernel is using - * the md_dev64_t. - */ - - if (toplevel && md_unit_isopen(ui)) { - (void) mdmderror(&rtxnp->mde, MDE_RENAME_BUSY, - from_min); - return (EBUSY); - } - break; - - default: - (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR, - from_min); - return (EINVAL); - } - - return (0); /* ok */ -} - -/* - * Named service entry point: MDRNM_CHECK - */ -intptr_t -mirror_rename_check( - md_rendelta_t *delta, - md_rentxn_t *rtxnp) -{ - mm_submirror_t *sm; - mm_submirror_ic_t *smic; - md_m_shared_t *shared; - int ci; - int i; - int compcnt; - mm_unit_t *un; - int err = 0; - - ASSERT(delta); - ASSERT(rtxnp); - ASSERT(delta->unp); - ASSERT(delta->uip); - ASSERT((rtxnp->op == MDRNOP_RENAME) || (rtxnp->op == MDRNOP_EXCHANGE)); - - if (!delta || !rtxnp || !delta->unp || !delta->uip) { - (void) mdsyserror(&rtxnp->mde, EINVAL); - return (EINVAL); - } - - un = (mm_unit_t *)delta->unp; - - for (i = 0; i < NMIRROR; i++) { - sm = &un->un_sm[i]; - smic = &un->un_smic[i]; - - if (!SMS_IS(sm, SMS_INUSE)) - continue; - - ASSERT(smic->sm_get_component_count); - if (!smic->sm_get_component_count) { - (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR, - md_getminor(delta->dev)); - return (ENXIO); - } - - compcnt = (*(smic->sm_get_component_count))(sm->sm_dev, un); - - for (ci = 0; ci < compcnt; ci++) { - - ASSERT(smic->sm_shared_by_indx); - if (!smic->sm_shared_by_indx) { - (void) mdmderror(&rtxnp->mde, - MDE_RENAME_CONFIG_ERROR, - md_getminor(delta->dev)); - return (ENXIO); - } - - shared = (md_m_shared_t *)(*(smic->sm_shared_by_indx)) - (sm->sm_dev, sm, ci); - - ASSERT(shared); - if (!shared) { - (void) mdmderror(&rtxnp->mde, - MDE_RENAME_CONFIG_ERROR, - md_getminor(delta->dev)); - return (ENXIO); - } - - if (shared->ms_hs_id != 0) { - (void) mdmderror(&rtxnp->mde, - MDE_SM_FAILED_COMPS, - md_getminor(delta->dev)); - return (EIO); - } - - switch (shared->ms_state) { - case CS_OKAY: - break; - - case CS_RESYNC: - (void) mdmderror(&rtxnp->mde, - MDE_RESYNC_ACTIVE, - md_getminor(delta->dev)); - return (EBUSY); - - default: - (void) mdmderror(&rtxnp->mde, - MDE_SM_FAILED_COMPS, - md_getminor(delta->dev)); - return (EINVAL); - } - - } - } - - /* self does additional checks */ - if (delta->old_role == MDRR_SELF) { - err = mirror_may_renexch_self(un, delta->uip, rtxnp); - } - - return (err); -} - -/* end of rename/exchange */ diff --git a/usr/src/uts/common/io/lvm/mirror/mirror_resync.c b/usr/src/uts/common/io/lvm/mirror/mirror_resync.c deleted file mode 100644 index 8630593ec964..000000000000 --- a/usr/src/uts/common/io/lvm/mirror/mirror_resync.c +++ /dev/null @@ -1,3887 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -extern int md_status; -extern kmutex_t md_status_mx; -extern kmutex_t md_mx; - -extern unit_t md_nunits; -extern set_t md_nsets; -extern md_set_t md_set[]; -extern major_t md_major; - -extern md_ops_t mirror_md_ops; -extern kmem_cache_t *mirror_child_cache; /* mirror child memory pool */ -extern mdq_anchor_t md_mto_daemon; -extern daemon_request_t mirror_timeout; -extern md_resync_t md_cpr_resync; -extern clock_t md_hz; -extern int md_mtioctl_cnt; - -extern kmem_cache_t *mirror_parent_cache; -#ifdef DEBUG -extern int mirror_debug_flag; -#endif - -/* - * Tunable resync thread timeout. This is used as the time interval for updating - * the resync progress to the mddb. This allows restartable resyncs to be - * continued across a system reboot. - * Default is to update the resync progress every 5 minutes. - */ -int md_mirror_resync_update_intvl = MD_DEF_MIRROR_RESYNC_INTVL; - -/* - * Settable mirror resync buffer size. Specified in 512 byte - * blocks. This is set to MD_DEF_RESYNC_BUF_SIZE by default. - */ -int md_resync_bufsz = MD_DEF_RESYNC_BUF_SIZE; - -/* - * Tunables for dirty region processing when - * closing down a mirror. - * - * Dirty region processing during close of a - * mirror is basically monitoring the state - * of the resync region bitmaps and the number - * of outstanding i/o's per submirror to - * determine that there are no more dirty - * regions left over. - * - * The approach taken is a retry logic over - * md_mirror_rr_cleans iterations to monitor - * the progress. - * - * There are two methods of polling the progress - * on dirty bitmap processing: busy-waits and - * non-busy-waits. - * - * Busy-waits are used at the beginning to - * determine the final state as quick as - * possible; md_mirror_rr_polls defines the - * number of busy-waits. - * - * In case the number of busy-waits got exhausted - * with dirty regions left over, the retry logic - * switches over to non-busy-waits, thus giving - * relief to an obviously heavily loaded system. - * The timeout value is defined by the tunable - * md_mirror_rr_sleep_timo in seconds. - * - * The number of non-busy-waits is given by: - * md_mirror_rr_cleans - md_mirror_rr_polls. - * - * The values were found by testing on a - * 'typical' system and may require tuning - * to meet specific customer's requirements. - */ - -int md_mirror_rr_cleans = 13; -int md_mirror_rr_polls = 3; -int md_mirror_rr_sleep_timo = 1; - -/* - * The value is not #defined because it will be computed - * in the future. - */ -int md_max_xfer_bufsz = 2048; - -/* - * mirror_generate_rr_bitmap: - * ------------------- - * Generate a compressed bitmap md_mn_msg_rr_clean_t for the given clean - * bitmap associated with mirror 'un' - * - * Input: - * un - mirror unit to get bitmap data from - * *msgp - location to return newly allocated md_mn_msg_rr_clean_t - * *activep- location to return # of active i/os - * - * Returns: - * 1 => dirty bits cleared from un_dirty_bm and DRL flush required - * *msgp contains bitmap of to-be-cleared bits - * 0 => no bits cleared - * *msgp == NULL - */ -static int -mirror_generate_rr_bitmap(mm_unit_t *un, md_mn_msg_rr_clean_t **msgp, - int *activep) -{ - unsigned int i, next_bit, data_bytes, start_bit; - int cleared_dirty = 0; - - /* Skip any initial 0s. */ -retry_dirty_scan: - if ((start_bit = un->un_rr_clean_start_bit) >= un->un_rrd_num) - un->un_rr_clean_start_bit = start_bit = 0; - - /* - * Handle case where NO bits are set in PERNODE_DIRTY but the - * un_dirty_bm[] map does have entries set (after a 1st resync) - */ - for (; start_bit < un->un_rrd_num && - !IS_PERNODE_DIRTY(md_mn_mynode_id, start_bit, un) && - (un->un_pernode_dirty_sum[start_bit] != (uchar_t)0); start_bit++) - ; - - if (start_bit >= un->un_rrd_num) { - if (un->un_rr_clean_start_bit == 0) { - return (0); - } else { - un->un_rr_clean_start_bit = 0; - goto retry_dirty_scan; - } - } - - /* how much to fit into this message */ - data_bytes = MIN(howmany(un->un_rrd_num - start_bit, NBBY), - MDMN_MSG_RR_CLEAN_DATA_MAX_BYTES); - - (*msgp) = kmem_zalloc(MDMN_MSG_RR_CLEAN_SIZE_DATA(data_bytes), - KM_SLEEP); - - (*msgp)->rr_nodeid = md_mn_mynode_id; - (*msgp)->rr_mnum = MD_SID(un); - MDMN_MSG_RR_CLEAN_START_SIZE_SET(*msgp, start_bit, data_bytes); - - next_bit = MIN(start_bit + data_bytes * NBBY, un->un_rrd_num); - - for (i = start_bit; i < next_bit; i++) { - if (un->c.un_status & MD_UN_KEEP_DIRTY && IS_KEEPDIRTY(i, un)) { - continue; - } - if (!IS_REGION_DIRTY(i, un)) { - continue; - } - if (un->un_outstanding_writes[i] != 0) { - (*activep)++; - continue; - } - - /* - * Handle the case where a resync has completed and we still - * have the un_dirty_bm[] entries marked as dirty (these are - * the most recent DRL re-read from the replica). They need - * to be cleared from our un_dirty_bm[] but they will not have - * corresponding un_pernode_dirty[] entries set unless (and - * until) further write()s have been issued to the area. - * This handles the case where only the un_dirty_bm[] entry is - * set. Without this we'd not clear this region until a local - * write is issued to the affected area. - */ - if (IS_PERNODE_DIRTY(md_mn_mynode_id, i, un) || - (un->un_pernode_dirty_sum[i] == (uchar_t)0)) { - if (!IS_GOING_CLEAN(i, un)) { - SET_GOING_CLEAN(i, un); - (*activep)++; - continue; - } - /* - * Now we've got a flagged pernode_dirty, _or_ a clean - * bitmap entry to process. Update the bitmap to flush - * the REGION_DIRTY / GOING_CLEAN bits when we send the - * cross-cluster message. - */ - cleared_dirty++; - setbit(MDMN_MSG_RR_CLEAN_DATA(*msgp), i - start_bit); - } else { - /* - * Not marked as active in the pernode bitmap, so skip - * any update to this. We just increment the 0 count - * and adjust the active count by any outstanding - * un_pernode_dirty_sum[] entries. This means we don't - * leave the mirror permanently dirty. - */ - (*activep) += (int)un->un_pernode_dirty_sum[i]; - } - } - if (!cleared_dirty) { - kmem_free(*msgp, MDMN_MSG_RR_CLEAN_SIZE_DATA(data_bytes)); - *msgp = NULL; - } - un->un_rr_clean_start_bit = next_bit; - return (cleared_dirty); -} - -/* - * There are three paths into here: - * - * md_daemon -> check_resync_regions -> prr - * mirror_internal_close -> mirror_process_unit_resync -> prr - * mirror_set_capability -> mirror_process_unit_resync -> prr - * - * The first one is a kernel daemon, the other two result from system calls. - * Thus, only the first case needs to deal with kernel CPR activity. This - * is indicated by the cprinfop being non-NULL for kernel daemon calls, and - * NULL for system call paths. - */ -static int -process_resync_regions_non_owner(mm_unit_t *un, callb_cpr_t *cprinfop) -{ - int i, start, end; - int cleared_dirty = 0; - /* Number of reasons why we can not proceed shutting down the mirror. */ - int active = 0; - set_t setno = MD_UN2SET(un); - md_mn_msg_rr_clean_t *rmsg; - md_mn_kresult_t *kres; - int rval; - minor_t mnum = MD_SID(un); - mdi_unit_t *ui = MDI_UNIT(mnum); - md_mn_nodeid_t owner_node; - - /* - * We drop the readerlock here to assist lock ordering with - * update_resync. Once we have the un_rrp_inflight_mx, we - * can re-acquire it. - */ - md_unit_readerexit(ui); - - /* - * Resync region processing must be single threaded. We can't use - * un_resync_mx for this purpose since this mutex gets released - * when blocking on un_resync_cv. - */ - mutex_enter(&un->un_rrp_inflight_mx); - - (void) md_unit_readerlock(ui); - - mutex_enter(&un->un_resync_mx); - - rw_enter(&un->un_pernode_dirty_mx[md_mn_mynode_id - 1], RW_READER); - cleared_dirty = mirror_generate_rr_bitmap(un, &rmsg, &active); - rw_exit(&un->un_pernode_dirty_mx[md_mn_mynode_id - 1]); - - if (cleared_dirty) { - owner_node = un->un_mirror_owner; - mutex_exit(&un->un_resync_mx); - - /* - * Transmit the 'to-be-cleared' bitmap to all cluster nodes. - * Receipt of the message will cause the mirror owner to - * update the on-disk DRL. - */ - - kres = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP); - - /* release readerlock before sending message */ - md_unit_readerexit(ui); - - if (cprinfop) { - mutex_enter(&un->un_prr_cpr_mx); - CALLB_CPR_SAFE_BEGIN(cprinfop); - } - - rval = mdmn_ksend_message(setno, MD_MN_MSG_RR_CLEAN, - MD_MSGF_NO_LOG|MD_MSGF_BLK_SIGNAL|MD_MSGF_KSEND_NORETRY| - MD_MSGF_DIRECTED, un->un_mirror_owner, - (char *)rmsg, MDMN_MSG_RR_CLEAN_MSG_SIZE(rmsg), kres); - - if (cprinfop) { - CALLB_CPR_SAFE_END(cprinfop, &un->un_prr_cpr_mx); - mutex_exit(&un->un_prr_cpr_mx); - } - - /* reacquire readerlock after message */ - (void) md_unit_readerlock(ui); - - if ((!MDMN_KSEND_MSG_OK(rval, kres)) && - (kres->kmmr_comm_state != MDMNE_NOT_JOINED)) { - /* if commd is gone, no point in printing a message */ - if (md_mn_is_commd_present()) - mdmn_ksend_show_error(rval, kres, "RR_CLEAN"); - kmem_free(kres, sizeof (md_mn_kresult_t)); - kmem_free(rmsg, MDMN_MSG_RR_CLEAN_MSG_SIZE(rmsg)); - mutex_exit(&un->un_rrp_inflight_mx); - return (active); - } - kmem_free(kres, sizeof (md_mn_kresult_t)); - - /* - * If ownership changed while we were sending, we probably - * sent the message to the wrong node. Leave fixing that for - * the next cycle. - */ - if (un->un_mirror_owner != owner_node) { - mutex_exit(&un->un_rrp_inflight_mx); - return (active); - } - - /* - * Now that we've sent the message, clear them from the - * pernode_dirty arrays. These are ONLY cleared on a - * successful send, and failure has no impact. - */ - cleared_dirty = 0; - start = MDMN_MSG_RR_CLEAN_START_BIT(rmsg); - end = start + MDMN_MSG_RR_CLEAN_DATA_BYTES(rmsg) * NBBY; - mutex_enter(&un->un_resync_mx); - rw_enter(&un->un_pernode_dirty_mx[md_mn_mynode_id - 1], - RW_READER); - for (i = start; i < end; i++) { - if (isset(MDMN_MSG_RR_CLEAN_DATA(rmsg), - i - start)) { - if (IS_PERNODE_DIRTY(md_mn_mynode_id, i, un)) { - un->un_pernode_dirty_sum[i]--; - CLR_PERNODE_DIRTY(md_mn_mynode_id, i, - un); - } - if (IS_REGION_DIRTY(i, un)) { - cleared_dirty++; - CLR_REGION_DIRTY(i, un); - CLR_GOING_CLEAN(i, un); - } - } - } - rw_exit(&un->un_pernode_dirty_mx[md_mn_mynode_id - 1]); - - kmem_free(rmsg, MDMN_MSG_RR_CLEAN_MSG_SIZE(rmsg)); - } - mutex_exit(&un->un_resync_mx); - - mutex_exit(&un->un_rrp_inflight_mx); - - return (active); -} - -static int -process_resync_regions_owner(mm_unit_t *un) -{ - int i, start, end; - int cleared_dirty = 0; - /* Number of reasons why we can not proceed shutting down the mirror. */ - int active = 0; - set_t setno = MD_UN2SET(un); - int mnset = MD_MNSET_SETNO(setno); - md_mn_msg_rr_clean_t *rmsg; - minor_t mnum = MD_SID(un); - mdi_unit_t *ui = MDI_UNIT(mnum); - - /* - * We drop the readerlock here to assist lock ordering with - * update_resync. Once we have the un_rrp_inflight_mx, we - * can re-acquire it. - */ - md_unit_readerexit(ui); - - /* - * Resync region processing must be single threaded. We can't use - * un_resync_mx for this purpose since this mutex gets released - * when blocking on un_resync_cv. - */ - mutex_enter(&un->un_rrp_inflight_mx); - - (void) md_unit_readerlock(ui); - - mutex_enter(&un->un_resync_mx); - un->un_waiting_to_clear++; - while (un->un_resync_flg & MM_RF_STALL_CLEAN) - cv_wait(&un->un_resync_cv, &un->un_resync_mx); - un->un_waiting_to_clear--; - - if (mnset) { - rw_enter(&un->un_pernode_dirty_mx[md_mn_mynode_id - 1], - RW_READER); - cleared_dirty = mirror_generate_rr_bitmap(un, &rmsg, &active); - - if (cleared_dirty) { - /* - * Clear the bits from the pernode_dirty arrays. - * If that results in any being cleared from the - * un_dirty_bm, commit it. - */ - cleared_dirty = 0; - start = MDMN_MSG_RR_CLEAN_START_BIT(rmsg); - end = start + MDMN_MSG_RR_CLEAN_DATA_BYTES(rmsg) * NBBY; - for (i = start; i < end; i++) { - if (isset(MDMN_MSG_RR_CLEAN_DATA(rmsg), - i - start)) { - if (IS_PERNODE_DIRTY(md_mn_mynode_id, i, - un)) { - un->un_pernode_dirty_sum[i]--; - CLR_PERNODE_DIRTY( - md_mn_mynode_id, i, un); - } - if (un->un_pernode_dirty_sum[i] == 0) { - cleared_dirty++; - CLR_REGION_DIRTY(i, un); - CLR_GOING_CLEAN(i, un); - } - } - } - kmem_free(rmsg, MDMN_MSG_RR_CLEAN_MSG_SIZE(rmsg)); - } - rw_exit(&un->un_pernode_dirty_mx[md_mn_mynode_id - 1]); - } else { - for (i = 0; i < un->un_rrd_num; i++) { - if (un->c.un_status & MD_UN_KEEP_DIRTY) - if (IS_KEEPDIRTY(i, un)) - continue; - - if (!IS_REGION_DIRTY(i, un)) - continue; - if (un->un_outstanding_writes[i] != 0) { - active++; - continue; - } - - if (!IS_GOING_CLEAN(i, un)) { - SET_GOING_CLEAN(i, un); - active++; - continue; - } - CLR_REGION_DIRTY(i, un); - CLR_GOING_CLEAN(i, un); - cleared_dirty++; - } - } - - if (cleared_dirty) { - un->un_resync_flg |= MM_RF_GATECLOSED; - mutex_exit(&un->un_resync_mx); - mddb_commitrec_wrapper(un->un_rr_dirty_recid); - mutex_enter(&un->un_resync_mx); - un->un_resync_flg &= ~MM_RF_GATECLOSED; - - if (un->un_waiting_to_mark != 0 || - un->un_waiting_to_clear != 0) { - active++; - cv_broadcast(&un->un_resync_cv); - } - } - mutex_exit(&un->un_resync_mx); - - mutex_exit(&un->un_rrp_inflight_mx); - - return (active); -} - -static int -process_resync_regions(mm_unit_t *un, callb_cpr_t *cprinfop) -{ - int mnset = MD_MNSET_SETNO(MD_UN2SET(un)); - /* - * For a mirror we can only update the on-disk resync-record if we - * currently own the mirror. If we are called and there is no owner we - * bail out before scanning the outstanding_writes[] array. - * NOTE: we only need to check here (before scanning the array) as we - * are called with the readerlock held. This means that a change - * of ownership away from us will block until this resync check - * has completed. - */ - if (mnset && (MD_MN_NO_MIRROR_OWNER(un) || - (!MD_MN_MIRROR_OWNER(un) && !md_mn_is_commd_present_lite()))) { - return (0); - } else if (mnset && !MD_MN_MIRROR_OWNER(un)) { - return (process_resync_regions_non_owner(un, cprinfop)); - } else { - return (process_resync_regions_owner(un)); - } -} - -/* - * Function that is callable from other modules to provide - * ability to cleanup dirty region bitmap on demand. Used - * on last close of a unit to avoid massive device resyncs - * when coming back after rolling large amounts of data to - * a mirror (e.g. at umount with logging). - */ - -void -mirror_process_unit_resync(mm_unit_t *un) -{ - int cleans = 0; - - while (process_resync_regions(un, NULL)) { - - cleans++; - if (cleans >= md_mirror_rr_cleans) { - cmn_err(CE_NOTE, - "Could not clean resync regions\n"); - break; - } - if (cleans > md_mirror_rr_polls) { - /* - * We did not make it with md_mirror_rr_polls - * iterations. Give the system relief and - * switch over to non-busy-wait. - */ - delay(md_mirror_rr_sleep_timo * md_hz); - } - } -} - -static void -check_resync_regions(daemon_request_t *timeout) -{ - mdi_unit_t *ui; - mm_unit_t *un; - md_link_t *next; - callb_cpr_t cprinfo; - - rw_enter(&mirror_md_ops.md_link_rw.lock, RW_READER); - for (next = mirror_md_ops.md_head; next != NULL; next = next->ln_next) { - - if (md_get_setstatus(next->ln_setno) & MD_SET_STALE) - continue; - - un = MD_UNIT(next->ln_id); - - /* - * Register this resync thread with the CPR mechanism. This - * allows us to detect when the system is suspended and so - * keep track of the RPC failure condition. - */ - CALLB_CPR_INIT(&cprinfo, &un->un_prr_cpr_mx, callb_md_mrs_cpr, - "check_resync_regions"); - - ui = MDI_UNIT(next->ln_id); - (void) md_unit_readerlock(ui); - - /* - * Do not clean up resync regions if it is an ABR - * mirror, or if a submirror is offline (we will use the resync - * region to resync when back online) or if there is only one - * submirror. - */ - if ((ui->ui_tstate & MD_ABR_CAP) || - (un->c.un_status & MD_UN_OFFLINE_SM) || (un->un_nsm < 2)) { - md_unit_readerexit(ui); - /* Remove this thread from the CPR callback table. */ - mutex_enter(&un->un_prr_cpr_mx); - CALLB_CPR_EXIT(&cprinfo); - continue; - } - - (void) process_resync_regions(un, &cprinfo); - - md_unit_readerexit(ui); - - /* Remove this thread from the CPR callback table. */ - mutex_enter(&un->un_prr_cpr_mx); - CALLB_CPR_EXIT(&cprinfo); - } - - rw_exit(&mirror_md_ops.md_link_rw.lock); - - /* We are done */ - mutex_enter(&mirror_timeout.dr_mx); - timeout->dr_pending = 0; - mutex_exit(&mirror_timeout.dr_mx); -} - -static void -md_mirror_timeout(void *throwaway) -{ - - mutex_enter(&mirror_timeout.dr_mx); - if (!mirror_timeout.dr_pending) { - mirror_timeout.dr_pending = 1; - daemon_request(&md_mto_daemon, check_resync_regions, - (daemon_queue_t *)&mirror_timeout, REQ_OLD); - } - - if (mirror_md_ops.md_head != NULL) - mirror_timeout.dr_timeout_id = timeout(md_mirror_timeout, - throwaway, (int)MD_MDELAY*hz); - else - mirror_timeout.dr_timeout_id = 0; - - mutex_exit(&mirror_timeout.dr_mx); -} - -void -resync_start_timeout(set_t setno) -{ - if (md_get_setstatus(setno) & MD_SET_STALE) - return; - - mutex_enter(&mirror_timeout.dr_mx); - if (mirror_timeout.dr_timeout_id == 0) - mirror_timeout.dr_timeout_id = timeout(md_mirror_timeout, - (void *)NULL, (int)MD_MDELAY*hz); - mutex_exit(&mirror_timeout.dr_mx); -} - -static void -offlined_to_attached(mm_unit_t *un) -{ - int i; - int changed = 0; - - if (md_get_setstatus(MD_UN2SET(un)) & MD_SET_STALE) - return; - - for (i = 0; i < NMIRROR; i++) { - if (SMS_BY_INDEX_IS(un, i, SMS_OFFLINE)) { - mirror_set_sm_state(&un->un_sm[i], - &un->un_smic[i], SMS_ATTACHED, 1); - changed++; - } - if (SMS_BY_INDEX_IS(un, i, SMS_OFFLINE_RESYNC)) { - mirror_set_sm_state(&un->un_sm[i], - &un->un_smic[i], SMS_ATTACHED_RESYNC, 1); - changed++; - } - } - - if (changed != 0) { - un->c.un_status &= ~MD_UN_OFFLINE_SM; - mddb_setrecprivate(un->c.un_record_id, MD_PRV_PENDCOM); - } -} - -static void -get_unit_resync(mm_unit_t *un) -{ - mddb_recstatus_t status; - struct optim_resync *orp; - - if (un->un_rr_dirty_recid == 0) { - offlined_to_attached(un); - return; - } - - status = mddb_getrecstatus(un->un_rr_dirty_recid); - if ((status == MDDB_NORECORD) || (status == MDDB_NODATA)) { - un->un_rr_dirty_recid = 0; - offlined_to_attached(un); - return; - } - - mddb_setrecprivate(un->un_rr_dirty_recid, MD_PRV_GOTIT); - orp = (struct optim_resync *)mddb_getrecaddr(un->un_rr_dirty_recid); - un->un_dirty_bm = orp->or_rr; -} - -static int -create_unit_resync(mm_unit_t *un, int snarfing) -{ - diskaddr_t tb; - int i; - int blksize; /* rr size in blocks */ - int num_rr; - mddb_recid_t recid; - size_t size; /* bitmap size */ - optim_resync_t *orp; - mddb_type_t typ1; - set_t setno; - - tb = un->c.un_total_blocks; - - if (((tb + MD_MIN_RR_SIZE)/ MD_MIN_RR_SIZE) > MD_DEF_NUM_RR) { - blksize = (int)(tb / MD_DEF_NUM_RR); - num_rr = (int)((tb + (blksize)) / (blksize)); - } else { - blksize = MD_MIN_RR_SIZE; - num_rr = (int)((tb + MD_MIN_RR_SIZE) / MD_MIN_RR_SIZE); - } - - size = howmany(num_rr, NBBY) + sizeof (*orp) - sizeof (orp->or_rr); - - setno = MD_UN2SET(un); - - typ1 = (mddb_type_t)md_getshared_key(setno, - mirror_md_ops.md_driver.md_drivername); - - recid = mddb_createrec(size, typ1, RESYNC_REC, - MD_CRO_OPTIMIZE|MD_CRO_32BIT, setno); - if (recid < 0) { - if (snarfing && !(md_get_setstatus(setno) & MD_SET_STALE)) { - md_set_setstatus(setno, MD_SET_STALE); - cmn_err(CE_WARN, "md: state database is stale"); - } - return (-1); - } - - un->un_rr_dirty_recid = recid; - orp = (optim_resync_t *)mddb_getrecaddr(recid); - orp->or_magic = OR_MAGIC; - orp->or_blksize = blksize; - orp->or_num = num_rr; - - un->un_rrd_blksize = blksize; - un->un_rrd_num = num_rr; - un->un_dirty_bm = orp->or_rr; - - if (snarfing) - for (i = 0; i < howmany(num_rr, NBBY); i++) - orp->or_rr[i] = 0xFF; - - if (!snarfing) { - mddb_commitrec_wrapper(recid); - mirror_commit(un, NO_SUBMIRRORS, 0); - return (0); - } - mddb_setrecprivate(recid, MD_PRV_PENDCOM); - mddb_setrecprivate(un->c.un_record_id, MD_PRV_PENDCOM); - return (0); -} - -int -unit_setup_resync(mm_unit_t *un, int snarfing) -{ - int err; - int syncable; - int i; - mdi_unit_t *ui = MDI_UNIT(MD_SID(un)); - int nonABR = 1; /* only set if ABR marked in ui_tstate */ - - un->un_dirty_bm = NULL; - un->un_rs_buffer = NULL; - - mutex_init(&un->un_rrp_inflight_mx, "rrp mx", MUTEX_DEFAULT, NULL); - - mutex_init(&un->un_resync_mx, NULL, MUTEX_DEFAULT, NULL); - cv_init(&un->un_resync_cv, NULL, CV_DEFAULT, NULL); - un->un_resync_flg = 0; - un->un_waiting_to_mark = 0; - un->un_waiting_to_commit = 0; - un->un_waiting_to_clear = 0; - - un->un_goingclean_bm = NULL; - un->un_goingdirty_bm = NULL; - un->un_outstanding_writes = NULL; - un->un_resync_bm = NULL; - - if (snarfing) - get_unit_resync(un); - - if (un->un_rr_dirty_recid == 0) { - /* - * If a MN diskset and snarfing and this node is not the - * master, do not delete any records on snarf of the - * mirror records (create_unit_resync deletes records). - * - * Master node should have already handled this case. - */ - if (MD_MNSET_SETNO(MD_UN2SET(un)) && snarfing && - md_set[MD_UN2SET(un)].s_am_i_master == 0) { -#ifdef DEBUG - cmn_err(CE_NOTE, "unit_setup_resync: no rr for %s on" - " nodeid %d\n", md_shortname(MD_SID(un)), - md_set[MD_UN2SET(un)].s_nodeid); -#endif - return (-1); - } - if ((err = create_unit_resync(un, snarfing)) != 0) - return (err); - } - - un->un_goingclean_bm = (uchar_t *)kmem_zalloc((uint_t)(howmany( - un->un_rrd_num, NBBY)), KM_SLEEP); - un->un_goingdirty_bm = (uchar_t *)kmem_zalloc((uint_t)(howmany( - un->un_rrd_num, NBBY)), KM_SLEEP); - un->un_outstanding_writes = (short *)kmem_zalloc( - (uint_t)un->un_rrd_num * sizeof (short), KM_SLEEP); - un->un_resync_bm = (uchar_t *)kmem_zalloc((uint_t)(howmany( - un->un_rrd_num, NBBY)), KM_SLEEP); - - /* - * Allocate pernode bitmap for this node. All other nodes' maps will - * be created 'on-the-fly' in the ioctl message handler - */ - if (MD_MNSET_SETNO(MD_UN2SET(un))) { - un->un_pernode_dirty_sum = - (uchar_t *)kmem_zalloc(un->un_rrd_num, KM_SLEEP); - if (md_mn_mynode_id > 0) { - un->un_pernode_dirty_bm[md_mn_mynode_id-1] = (uchar_t *) - kmem_zalloc((uint_t)(howmany(un->un_rrd_num, NBBY)), - KM_SLEEP); - } - - /* - * Allocate taskq to process deferred (due to locking) RR_CLEAN - * requests. - */ - un->un_drl_task = (ddi_taskq_t *)md_create_taskq(MD_UN2SET(un), - MD_SID(un)); - } - - if (md_get_setstatus(MD_UN2SET(un)) & MD_SET_STALE) - return (0); - - /* - * Only mark mirror which has an associated DRL as requiring a resync. - * For ABR mirrors we need not set the resync record bitmap up. - */ - if (ui && (ui->ui_tstate & MD_ABR_CAP)) - nonABR = 0; - - for (i = 0, syncable = 0; i < NMIRROR; i++) { - if (nonABR) { - if ((SUBMIRROR_IS_READABLE(un, i) || - SMS_BY_INDEX_IS(un, i, - (SMS_OFFLINE | SMS_OFFLINE_RESYNC)))) - syncable++; - } - } - - if (snarfing && un->un_pass_num && (syncable > 1)) { - bcopy((caddr_t)un->un_dirty_bm, (caddr_t)un->un_resync_bm, - howmany(un->un_rrd_num, NBBY)); - - un->c.un_status |= (MD_UN_OPT_NOT_DONE | MD_UN_WAR); - un->c.un_status &= ~MD_UN_OFFLINE_SM; - for (i = 0; i < NMIRROR; i++) { - if ((SUBMIRROR_IS_READABLE(un, i)) || - SMS_BY_INDEX_IS(un, i, SMS_OFFLINE_RESYNC)) - un->un_sm[i].sm_flags |= MD_SM_RESYNC_TARGET; - - if (SMS_BY_INDEX_IS(un, i, SMS_OFFLINE)) { - un->un_sm[i].sm_flags |= MD_SM_RESYNC_TARGET; - mirror_set_sm_state(&un->un_sm[i], - &un->un_smic[i], SMS_OFFLINE_RESYNC, 1); - mddb_setrecprivate(un->c.un_record_id, - MD_PRV_PENDCOM); - } - } - } - return (0); -} - -/* - * resync_kill_pending: - * ------------------- - * Determine if the resync thread has been requested to terminate. - * Block if MD_RI_BLOCK or MD_RI_BLOCK_OWNER is set in un->un_rs_thread_flags. - * MD_RI_BLOCK is only set as a result of a user-initiated ioctl via metasync. - * MD_RI_BLOCK_OWNER is set by the ownership change of a multi-node mirror. - * - * Returns: - * 0 Kill not pending - * 1 Kill requested (set MD_UN_RESYNC_CANCEL in un->c.un_status) - * - * Note: this routine may block - * the writerlock for will be dropped and reacquired if - * is set to MD_WRITER_HELD. - * the readerlock for will be dropped and reacquired if - * is set to MD_READER_HELD. - */ -static int -resync_kill_pending( - mm_unit_t *un, - mdi_unit_t *ui, - uint_t mx_type) -{ - int retval = 0; - - /* Ensure that we don't block with any mutex held */ - if (mx_type == MD_WRITER_HELD) { - md_unit_writerexit(ui); - } else if (mx_type == MD_READER_HELD) { - md_unit_readerexit(ui); - } - mutex_enter(&un->un_rs_thread_mx); - while (un->un_rs_thread_flags & (MD_RI_BLOCK|MD_RI_BLOCK_OWNER)) { - cv_wait(&un->un_rs_thread_cv, &un->un_rs_thread_mx); - if (un->un_rs_thread_flags & (MD_RI_KILL|MD_RI_SHUTDOWN)) - break; - } - /* Determine if we've been asked to abort or shutdown gracefully */ - if (un->un_rs_thread_flags & MD_RI_KILL) { - un->c.un_status |= MD_UN_RESYNC_CANCEL; - retval = 1; - } else if (un->un_rs_thread_flags & MD_RI_SHUTDOWN) { - retval = 1; - } - mutex_exit(&un->un_rs_thread_mx); - - /* Reacquire mutex if dropped on entry */ - if (mx_type == MD_WRITER_HELD) { - (void) md_unit_writerlock(ui); - } else if (mx_type == MD_READER_HELD) { - (void) md_unit_readerlock(ui); - } - return (retval); -} - -/* - * resync_read_buffer: - * ------------------ - * Issue the resync source read for the specified start block and size. - * This will cause the mirror strategy routine to issue a write-after-read - * once this request completes successfully. - * If 'flag_err' is set we expect to see a write error flagged in the b_error - * field of the buffer created for this i/o request. If clear we do not expect - * to see the error flagged for write failures. - * Read failures will always set the B_ERROR bit which will stop the resync - * immediately. - */ -static int -resync_read_buffer(mm_unit_t *un, diskaddr_t blk, size_t cnt, int flag_err) -{ - md_mcs_t *sp; - buf_t *bp; - int ret = 0; - - sp = kmem_cache_alloc(mirror_child_cache, MD_ALLOCFLAGS); - mirror_child_init(sp); - - bp = &sp->cs_buf; - bp->b_edev = makedevice(md_major, MD_SID(un)); - bp->b_flags = B_READ; - bp->b_lblkno = blk; - bp->b_bcount = dbtob(cnt); - bp->b_un.b_addr = un->un_rs_buffer; - md_unit_readerexit(MDI_UNIT(MD_SID(un))); - - (void) md_mirror_strategy(bp, MD_STR_NOTTOP | MD_STR_MAPPED | - MD_STR_WAR | (flag_err ? MD_STR_FLAG_ERR : 0), NULL); - - (void) biowait(bp); - - (void) md_unit_readerlock(MDI_UNIT(MD_SID(un))); - if (bp->b_flags & B_ERROR) { - ret = 1; - } - kmem_cache_free(mirror_child_cache, sp); - return (ret); -} - -/* - * send_mn_resync_done_message - * - * At the end of a resync, send a message to all nodes to indicate that - * the resync is complete. The argument, flags, has the following values - * - * RESYNC_ERR - if an error occurred that terminated the resync - * CLEAR_OPT_NOT_DONE - Just need to clear the OPT_NOT_DONE flag - * - * unit writerlock set on entry - * Only send the message if the thread is not marked as shutting down: - * [un_rs_thread_flags & MD_RI_SHUTDOWN] or being killed: - * [un->c.un_status & MD_UN_RESYNC_CANCEL] - * or if there has been an error that terminated the resync: - * flags & RESYNC_ERR - * - */ -static void -send_mn_resync_done_message( - mm_unit_t *un, - int flags -) -{ - md_mn_msg_resync_t *rmsg = un->un_rs_msg; - set_t setno; - mdi_unit_t *ui = MDI_UNIT(MD_SID(un)); - md_mn_kresult_t *kres; - int dont_send = 0; - int rval; - int nretries = 0; - - rmsg = (md_mn_msg_resync_t *)un->un_rs_msg; - - /* - * Only send the message if this resync thread is still active. This - * handles the case where ownership changes to different nodes during - * a resync can cause multiple spurious resync_done messages to occur - * when the resync completes. This happens because only one node is - * the resync owner but other nodes will have their resync_unit thread - * blocked in 'resync_kill_pending' - */ - mutex_enter(&un->un_rs_thread_mx); - dont_send = (un->un_rs_thread_flags & (MD_RI_KILL|MD_RI_SHUTDOWN)) ? 1 - : 0; - mutex_exit(&un->un_rs_thread_mx); - dont_send |= (un->c.un_status & MD_UN_RESYNC_CANCEL) ? 1 : 0; - - /* - * Always send a message if we've encountered an error that terminated - * the resync. - */ - if (flags & RESYNC_ERR) - dont_send = 0; - - if (dont_send) { -#ifdef DEBUG - if (mirror_debug_flag) { - printf("Don't send resync done message, mnum = %x," - " type = %x, flags = %d\n", MD_SID(un), - un->un_rs_type, flags); - } -#endif /* DEBUG */ - return; - } - -#ifdef DEBUG - if (mirror_debug_flag) { - printf("send resync done message, mnum = %x, type = %x\n", - MD_SID(un), un->un_rs_type); - } -#endif - - rmsg->msg_resync_mnum = MD_SID(un); - rmsg->msg_resync_type = un->un_rs_type; - rmsg->msg_originator = md_mn_mynode_id; - rmsg->msg_resync_flags = 0; - if (flags & RESYNC_ERR) - rmsg->msg_resync_flags |= MD_MN_RS_ERR; - if (flags & CLEAR_OPT_NOT_DONE) - rmsg->msg_resync_flags |= MD_MN_RS_CLEAR_OPT_NOT_DONE; - - setno = MD_MIN2SET(MD_SID(un)); - md_unit_writerexit(ui); - kres = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP); - -smrd_msg: - mutex_enter(&un->un_rs_cpr_mx); - CALLB_CPR_SAFE_BEGIN(&un->un_rs_cprinfo); - - rval = mdmn_ksend_message(setno, MD_MN_MSG_RESYNC_PHASE_DONE, - MD_MSGF_NO_LOG, 0, (char *)rmsg, sizeof (md_mn_msg_resync_t), kres); - - CALLB_CPR_SAFE_END(&un->un_rs_cprinfo, &un->un_rs_cpr_mx); - mutex_exit(&un->un_rs_cpr_mx); - - /* if the node hasn't yet joined, it's Ok. */ - if ((!MDMN_KSEND_MSG_OK(rval, kres)) && - (kres->kmmr_comm_state != MDMNE_NOT_JOINED)) { - mdmn_ksend_show_error(rval, kres, "RESYNC_PHASE_DONE"); - /* If we're shutting down already, pause things here. */ - if (kres->kmmr_comm_state == MDMNE_RPC_FAIL) { - while (!md_mn_is_commd_present()) { - delay(md_hz); - } - /* - * commd is now available again. Retry the message once. - * If this fails we panic as the system is in an - * unexpected state. - */ - if (nretries++ == 0) - goto smrd_msg; - } - cmn_err(CE_PANIC, "ksend_message failure: RESYNC_PHASE_DONE"); - } - kmem_free(kres, sizeof (md_mn_kresult_t)); - (void) md_unit_writerlock(ui); -} - -/* - * send_mn_resync_next_message - * - * Sent a message to all nodes indicating the next region to be resynced. - * The message contains the region to be resynced and the current position in - * the resync as denoted by un_rs_resync_done and un_rs_resync_2_do. - * On entry the unit readerlock is held. - */ -static void -send_mn_resync_next_message( - mm_unit_t *un, - diskaddr_t currentblk, - size_t rsize, - int flags -) -{ - md_mn_msg_resync_t *rmsg = un->un_rs_msg; - set_t setno; - md_mn_kresult_t *kres; - mdi_unit_t *ui = MDI_UNIT(MD_SID(un)); - int rval; - md_mps_t *ps; - mm_submirror_t *sm; - int smi; - int nretries = 0; - - ASSERT(rmsg != NULL); -#ifdef DEBUG - if (mirror_debug_flag) { - printf("send resync next message, mnum = %x, start=%lld, " - "size=%ld, type=%x, done=%lld, 2_do=%lld\n", - MD_SID(un), currentblk, rsize, un->un_rs_type, - un->un_rs_resync_done, un->un_rs_resync_2_do); - } -#endif - rmsg->msg_resync_mnum = MD_SID(un); - rmsg->msg_resync_type = un->un_rs_type; - rmsg->msg_resync_start = currentblk; - rmsg->msg_resync_rsize = rsize; - rmsg->msg_resync_done = un->un_rs_resync_done; - rmsg->msg_resync_2_do = un->un_rs_resync_2_do; - rmsg->msg_originator = md_mn_mynode_id; - if (flags & MD_FIRST_RESYNC_NEXT) - rmsg->msg_resync_flags = MD_MN_RS_FIRST_RESYNC_NEXT; - - /* - * Copy current submirror state and flags into message. This provides - * a means of keeping all nodes that are currently active in the cluster - * synchronised with regards to their submirror state settings. If we - * did not pass this information here, the only time every node gets - * submirror state updated is at the end of a resync phase. This can be - * a significant amount of time for large metadevices. - */ - for (smi = 0; smi < NMIRROR; smi++) { - sm = &un->un_sm[smi]; - rmsg->msg_sm_state[smi] = sm->sm_state; - rmsg->msg_sm_flags[smi] = sm->sm_flags; - } - setno = MD_MIN2SET(MD_SID(un)); - md_unit_readerexit(ui); - kres = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP); - -smrn_msg: - mutex_enter(&un->un_rs_cpr_mx); - CALLB_CPR_SAFE_BEGIN(&un->un_rs_cprinfo); - - rval = mdmn_ksend_message(setno, MD_MN_MSG_RESYNC_NEXT, MD_MSGF_NO_LOG, - 0, (char *)rmsg, sizeof (md_mn_msg_resync_t), kres); - - CALLB_CPR_SAFE_END(&un->un_rs_cprinfo, &un->un_rs_cpr_mx); - mutex_exit(&un->un_rs_cpr_mx); - - if (!MDMN_KSEND_MSG_OK(rval, kres)) { - mdmn_ksend_show_error(rval, kres, "RESYNC_NEXT"); - /* If we're shutting down already, pause things here. */ - if (kres->kmmr_comm_state == MDMNE_RPC_FAIL) { - while (!md_mn_is_commd_present()) { - delay(md_hz); - } - /* - * commd is now available again. Retry the message once. - * If this fails we panic as the system is in an - * unexpected state. - */ - if (nretries++ == 0) - goto smrn_msg; - } - cmn_err(CE_PANIC, "ksend_message failure: RESYNC_NEXT"); - } - kmem_free(kres, sizeof (md_mn_kresult_t)); - (void) md_unit_readerlock(ui); - ps = un->un_rs_prev_overlap; - - /* Allocate previous overlap reference if needed */ - if (ps == NULL) { - ps = kmem_cache_alloc(mirror_parent_cache, MD_ALLOCFLAGS); - ps->ps_un = un; - ps->ps_ui = ui; - ps->ps_firstblk = 0; - ps->ps_lastblk = 0; - ps->ps_flags = 0; - md_unit_readerexit(ui); - (void) md_unit_writerlock(ui); - un->un_rs_prev_overlap = ps; - md_unit_writerexit(ui); - (void) md_unit_readerlock(ui); - } - - ps->ps_firstblk = currentblk; - ps->ps_lastblk = currentblk + rsize - 1; -} - -static int -resync_read_blk_range( - mm_unit_t *un, - diskaddr_t currentblk, - diskaddr_t stopbefore, - uint_t type, - int flags -) -{ - size_t copysize; /* limited by max xfer buf size */ - size_t rsize; /* size of resync block (for MN) */ - set_t setno; - diskaddr_t newstop; - diskaddr_t rs_startblk; - uint_t rs_type; - int flags1 = flags & MD_FIRST_RESYNC_NEXT; - - rs_type = un->un_rs_type; - rs_startblk = currentblk; - if (stopbefore > un->c.un_total_blocks) - stopbefore = un->c.un_total_blocks; - if (currentblk < un->un_resync_startbl) - currentblk = un->un_resync_startbl; - - copysize = un->un_rs_copysize; - rsize = MD_DEF_RESYNC_BLK_SZ; - - setno = MD_MIN2SET(MD_SID(un)); - while (currentblk < stopbefore) { - /* - * Split the block up into units of MD_DEF_RESYNC_BLK_SZ and - * if a MN device and sendflag is set, send a RESYNC_MESSAGE - * to all nodes. - */ - if ((currentblk + MD_DEF_RESYNC_BLK_SZ) > stopbefore) - rsize = stopbefore - currentblk; - if (MD_MNSET_SETNO(setno) && (flags & MD_SEND_MESS_XMIT)) { - un->un_resync_startbl = currentblk; - rs_startblk = currentblk; - send_mn_resync_next_message(un, currentblk, rsize, - flags1); - if (flags1) - flags1 = 0; - /* check to see if we've been asked to terminate */ - if (resync_kill_pending(un, MDI_UNIT(MD_SID(un)), type)) - return ((un->c.un_status & MD_UN_RESYNC_CANCEL) - ? 1:0); - /* - * Check to see if another node has completed this - * block, if so either the type or the resync region - * will have changed. If the resync type has changed, - * just exit. - * If the resync region has changed, reset currentblk - * to the start of the current resync region and - * continue. - */ - if (un->un_rs_type != rs_type) - return (0); - if (un->un_rs_prev_overlap->ps_firstblk > - rs_startblk) { - currentblk = - un->un_rs_prev_overlap->ps_firstblk; - continue; - } - } - newstop = currentblk + rsize; - while (currentblk < newstop) { - if ((currentblk + copysize) > stopbefore) - copysize = (size_t)(stopbefore - currentblk); - if (resync_read_buffer(un, currentblk, copysize, - (flags & MD_RESYNC_FLAG_ERR))) - return (1); - - /* resync_read_buffer releases/grabs a new lock */ - un = (mm_unit_t *)MD_UNIT(MD_SID(un)); - currentblk += copysize; - - /* check to see if we've been asked to terminate */ - if (resync_kill_pending(un, MDI_UNIT(MD_SID(un)), type)) - return ((un->c.un_status & MD_UN_RESYNC_CANCEL) - ? 1:0); - if (MD_MNSET_SETNO(setno)) { - /* - * Check to see if another node has completed - * this block, see above - */ - if (un->un_rs_type != rs_type) - return (0); - if (un->un_rs_prev_overlap->ps_firstblk > - rs_startblk) - currentblk = - un->un_rs_prev_overlap->ps_firstblk; - } - } - } - return (0); -} - -static void -optimized_resync(mm_unit_t *un) -{ - mdi_unit_t *ui; - minor_t mnum; - int rr, smi; - int resync_regions; - uchar_t *dirtyregions; - diskaddr_t first, stopbefore; - int err; - int cnt; - sm_state_t state; - int broke_out = 0; - set_t setno; - uint_t old_rs_type = un->un_rs_type; - uint_t old_rs_done; - uint_t flags1 = MD_FIRST_RESYNC_NEXT|MD_RESYNC_FLAG_ERR; - size_t start_rr; - - mnum = MD_SID(un); - ui = MDI_UNIT(mnum); - setno = MD_UN2SET(un); - - if (!(un->c.un_status & MD_UN_OPT_NOT_DONE)) { - /* - * We aren't marked as needing a resync so for multi-node - * sets we flag the completion so that all nodes see the same - * metadevice state. This is a problem when a new node joins - * an existing set as it has to perform a 'metasync -r' and - * we have to step through all of the resync phases. If we - * don't do this the nodes that were already in the set will - * have the metadevices marked as 'Okay' but the joining node - * will have 'Needs Maintenance' which is unclearable. - */ - if (MD_MNSET_SETNO(setno)) { - send_mn_resync_done_message(un, CLEAR_OPT_NOT_DONE); - } - return; - } - - /* - * No need for optimized resync if ABR set, clear rs_type and flags - * and exit - */ - if (ui->ui_tstate & MD_ABR_CAP) { - un->un_rs_type = MD_RS_NONE; - un->c.un_status &= ~(MD_UN_OPT_NOT_DONE | MD_UN_WAR); - return; - } - - un->un_rs_dropped_lock = 1; - un->c.un_status |= MD_UN_WAR; - resync_regions = un->un_rrd_num; - dirtyregions = un->un_resync_bm; - md_unit_writerexit(ui); - - /* For MN sets, resync NOTIFY is done when processing resync messages */ - if (!MD_MNSET_SETNO(setno)) { - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_RESYNC_START, - SVM_TAG_METADEVICE, setno, MD_SID(un)); - } - un = (mm_unit_t *)md_unit_readerlock(ui); - - /* check to see if we've been asked to terminate */ - if (resync_kill_pending(un, MDI_UNIT(MD_SID(un)), MD_READER_HELD)) { - if (un->c.un_status & MD_UN_RESYNC_CANCEL) - broke_out = RESYNC_ERR; - } - /* - * Check that we are still performing an optimized - * resync. If not, another node must have completed it - * so we have no more work to do. - */ - if (un->un_rs_type != old_rs_type) { - md_unit_readerexit(ui); - (void) md_unit_writerlock(ui); - return; - } - /* - * If rs_resync_done is non-zero, we must be completing an optimized - * resync that has already been partially done on another node. - * Therefore clear the bits in resync_bm for the resync regions - * already done. If resync_startbl is zero, calculate 2_do. - */ - if (un->un_rs_resync_done > 0) { - BLK_TO_RR(start_rr, un->un_resync_startbl, un); - for (rr = 0; rr < start_rr && rr < resync_regions; rr++) - CLR_KEEPDIRTY(rr, un); - } else { - un->un_rs_resync_2_do = 0; - for (rr = 0; rr < resync_regions; rr++) - if (isset(dirtyregions, rr)) - un->un_rs_resync_2_do++; - } - - for (rr = 0; (rr < resync_regions) && (broke_out != RESYNC_ERR); rr++) { - if (isset(dirtyregions, rr)) { - RR_TO_BLK(first, rr, un); - RR_TO_BLK(stopbefore, rr+1, un); - old_rs_type = un->un_rs_type; - old_rs_done = un->un_rs_resync_done; - err = resync_read_blk_range(un, first, stopbefore, - MD_READER_HELD, MD_SEND_MESS_XMIT | flags1); - flags1 = MD_RESYNC_FLAG_ERR; - - /* resync_read_blk_range releases/grabs a new lock */ - un = (mm_unit_t *)MD_UNIT(mnum); - - if (err) { - broke_out = RESYNC_ERR; - break; - } - - /* - * Check that we are still performing an optimized - * resync. If not, another node must have completed it - * so we have no more work to do. - */ - if (un->un_rs_type != old_rs_type) { - md_unit_readerexit(ui); - (void) md_unit_writerlock(ui); - return; - } - - /* - * If resync_done has increased, we must have - * blocked in resync_read_blk_range while another node - * continued with the resync. Therefore clear resync_bm - * for the blocks that have been resynced on another - * node and update rr to the next RR to be done. - */ - if (old_rs_done < un->un_rs_resync_done) { - int i; - BLK_TO_RR(start_rr, un->un_resync_startbl - 1, - un); - for (i = rr; i < start_rr; i++) - CLR_KEEPDIRTY(i, un); - rr = start_rr; - } else - un->un_rs_resync_done++; - - for (smi = 0, cnt = 0; smi < NMIRROR; smi++) - if (SUBMIRROR_IS_WRITEABLE(un, smi) && - !(SMS_BY_INDEX_IS(un, smi, SMS_ALL_ERRED))) - cnt++; - if (cnt < 2) { - broke_out = RESYNC_ERR; - break; - } - CLR_KEEPDIRTY(rr, un); - /* Check to see if we've completed the resync cleanly */ - if (un->un_rs_thread_flags & MD_RI_SHUTDOWN) - break; - - /* - * Check that we haven't exceeded un_rs_resync_2_do. If - * we have we've completed the resync. - */ - if (un->un_rs_resync_done > un->un_rs_resync_2_do) - break; - } - } - md_unit_readerexit(ui); - un = (mm_unit_t *)md_unit_writerlock(ui); - - /* - * If MN set send message to all nodes to indicate resync - * phase is complete. The processing of the message will update the - * mirror state - */ - if (MD_MNSET_SETNO(setno)) { - send_mn_resync_done_message(un, broke_out); - } else { - - if (!broke_out) - un->c.un_status &= ~MD_UN_WAR; - - un->c.un_status &= ~MD_UN_KEEP_DIRTY; - - setno = MD_UN2SET(un); - for (smi = 0; smi < NMIRROR; smi++) { - un->un_sm[smi].sm_flags &= ~MD_SM_RESYNC_TARGET; - if (SMS_BY_INDEX_IS(un, smi, SMS_OFFLINE_RESYNC)) { - state = (broke_out ? SMS_OFFLINE : SMS_RUNNING); - mirror_set_sm_state(&un->un_sm[smi], - &un->un_smic[smi], state, broke_out); - mirror_commit(un, NO_SUBMIRRORS, 0); - } - if (SMS_BY_INDEX_IS(un, smi, SMS_OFFLINE)) - un->c.un_status |= MD_UN_OFFLINE_SM; - } - } - - /* For MN sets, resync NOTIFY is done when processing resync messages */ - if (!MD_MNSET_SETNO(setno)) { - if (broke_out) { - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_RESYNC_FAILED, - SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); - } else { - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_RESYNC_DONE, - SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); - } - } -} - -/* - * recalc_resync_done - * - * This function deals with a change in value of un_rs_resync_2_do in a - * component resync. This may change if we are restarting a component - * resync on a single node having rebooted with a different value of - * md_resync_bufsz or if we are running in a multi-node with nodes having - * different values of md_resync_bufsz. - * If there is a change in un_rs_resync_2_do, we need to recalculate - * the value of un_rs_resync_done given the new value for resync_2_do. - * We have to calculate a new value for resync_done to be either - * if un_resync_startbl is set, (un_resync_startbl - initblock)/(blksize + skip) - * or if it is not set, we need to calculate it from un_rs_resync_done, - * (un_rs_resync_done/un_rs_resync_2_do) * resync_2_do - * In addition we need to deal with the overflow case by using a factor to - * prevent overflow - */ - -static void -recalc_resync_done(mm_unit_t *un, size_t resync_2_do, diskaddr_t initblock, - u_longlong_t blk_size, u_longlong_t skip) -{ - diskaddr_t x; - uint_t factor = 1; - - /* - * If resync_2_do has not yet been calculated, no need to modify - * resync_done - */ - if (un->un_rs_resync_2_do == 0) { - return; - } - if (un->un_rs_resync_2_do == resync_2_do) - return; /* No change, so nothing to do */ - /* - * If un_rs_startbl is set, another node must have already started - * this resync and hence we can calculate resync_done from - * resync_startbl - */ - if (un->un_resync_startbl) { - un->un_rs_resync_done = (un->un_resync_startbl - initblock) / - (blk_size + skip); - return; - } - /* - * un_resync_startbl is not set so we must calculate it from - * un_rs_resync_done. - * If the larger of the two values of resync_2_do is greater than 32 - * bits, calculate a factor to divide by to ensure that we don't - * overflow 64 bits when calculating the new value for resync_done - */ - x = (un->un_rs_resync_2_do > resync_2_do) ? un->un_rs_resync_2_do : - resync_2_do; - while (x > INT32_MAX) { - x = x >> 1; - factor = factor << 1; - } - un->un_rs_resync_done = ((un->un_rs_resync_done/factor) * - (resync_2_do/factor)) / - ((un->un_rs_resync_2_do + (factor * factor) - 1)/ - (factor * factor)); -} - -static void -check_comp_4_resync(mm_unit_t *un, int smi, int ci) -{ - mdi_unit_t *ui; - minor_t mnum; - mm_submirror_t *sm; - mm_submirror_ic_t *smic; - size_t count; - u_longlong_t skip; - u_longlong_t size; - u_longlong_t blk_size; - diskaddr_t initblock; - diskaddr_t block; - diskaddr_t frag = 0; - md_m_shared_t *shared; - int err; - set_t setno; - int broke_out = 0; - int blks; - uint_t old_rs_type = un->un_rs_type; - diskaddr_t old_rs_done; - uint_t flags1 = MD_FIRST_RESYNC_NEXT; - diskaddr_t resync_2_do; - - mnum = MD_SID(un); - ui = MDI_UNIT(mnum); - sm = &un->un_sm[smi]; - smic = &un->un_smic[smi]; - setno = MD_UN2SET(un); - - shared = (md_m_shared_t *)(*(smic->sm_shared_by_indx)) - (sm->sm_dev, sm, ci); - - if (shared->ms_state != CS_RESYNC) { - SET_RS_TYPE_NONE(un->un_rs_type); - return; - } - - if (shared->ms_flags & MDM_S_RS_TRIED) { - SET_RS_TYPE_NONE(un->un_rs_type); - return; - } - - (void) (*(smic->sm_get_bcss)) - (sm->sm_dev, sm, ci, &initblock, &count, &skip, &size); - - if ((count == 1) && (skip == 0)) { - count = (size_t)(size / un->un_rs_copysize); - if ((frag = (size - (count * un->un_rs_copysize))) != 0) - count++; - size = (u_longlong_t)un->un_rs_copysize; - } - blk_size = size; /* Save block size for this resync */ - - ASSERT(count >= 1); - resync_2_do = count; - /* - * If part way through a resync, un_rs_resync_done/un_rs_resync_2_do - * gives the proportion of the resync that has already been done. - * If un_rs_copysize has changed since this previous partial resync, - * either because this node has been rebooted with a different value - * for md_resync_bufsz or because another node with a different value - * for md_resync_bufsz performed the previous resync, we need to - * recalculate un_rs_resync_done as a proportion of our value of - * resync_2_do. - */ - recalc_resync_done(un, resync_2_do, initblock, blk_size, skip); - - /* - * For MN mirrors we need to send a message to all nodes indicating - * the next region to be resynced. For a component resync, the size of - * the contiguous region that is processed by resync_read_blk_range() - * may be small if there is the interleave size. - * Therefore, rather than sending the message within - * resync_read_blk_range(), we will send a message every - * MD_DEF_RESYNC_BLK_SZ blocks. Calculate the frequency in terms of - * the number of blocks. Then, if we are restarting a resync, round - * un_rs_resync_done down to the previous resync region boundary. This - * ensures that we send a RESYNC_NEXT message before resyncing any - * blocks - */ - if (MD_MNSET_SETNO(setno)) { - blks = ((MD_DEF_RESYNC_BLK_SZ + blk_size + skip - 1)/ - (blk_size + skip)); - un->un_rs_resync_done = (un->un_rs_resync_done/blks) * blks; - } - /* - * un_rs_resync_done is the number of ('size' + 'skip') increments - * already resynced from the base 'block' - * un_rs_resync_2_do is the number of iterations in - * this component resync. - */ - ASSERT(count >= un->un_rs_resync_done); - un->un_rs_resync_2_do = (diskaddr_t)count; - - un->c.un_status |= MD_UN_WAR; - sm->sm_flags |= MD_SM_RESYNC_TARGET; - md_unit_writerexit(ui); - - /* For MN sets, resync NOTIFY is done when processing resync messages */ - if (!MD_MNSET_SETNO(setno)) { - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_RESYNC_START, - SVM_TAG_METADEVICE, setno, MD_SID(un)); - } - un = (mm_unit_t *)md_unit_readerlock(ui); - - /* check to see if we've been asked to terminate */ - if (resync_kill_pending(un, MDI_UNIT(MD_SID(un)), MD_READER_HELD)) { - if (un->c.un_status & MD_UN_RESYNC_CANCEL) - broke_out = RESYNC_ERR; - } - /* - * Check that we are still performing the same component - * resync. If not, another node must have completed it - * so we have no more work to do. - */ - if (un->un_rs_type != old_rs_type) { - md_unit_readerexit(ui); - (void) md_unit_writerlock(ui); - return; - } - /* - * Adjust resync_done, resync_2_do, start of resync area and count to - * skip already resync'd data. We need to recalculate resync_done as - * we have dropped the unit lock above and may have lost ownership to - * another node, with a different resync buffer size and it may have - * sent us new values of resync_done and resync_2_do based on its - * resync buffer size - */ - recalc_resync_done(un, resync_2_do, initblock, blk_size, skip); - un->un_rs_resync_2_do = resync_2_do; - count -= un->un_rs_resync_done; - block = initblock + ((blk_size + skip) * (int)un->un_rs_resync_done); - - un->un_rs_dropped_lock = 1; - while ((count > 0) && (broke_out != RESYNC_ERR)) { - old_rs_done = un->un_rs_resync_done; - /* - * For MN mirrors send a message to the other nodes. This - * message includes the size of the region that must be blocked - * for all writes - */ - if (MD_MNSET_SETNO(setno)) { - if ((un->un_rs_resync_done%blks == 0)) { - un->un_resync_startbl = block; - send_mn_resync_next_message(un, block, - (blk_size+skip)*blks, flags1); - flags1 = 0; - /* - * check to see if we've been asked to - * terminate - */ - if (resync_kill_pending(un, - MDI_UNIT(MD_SID(un)), MD_READER_HELD)) { - if (un->c.un_status & - MD_UN_RESYNC_CANCEL) { - broke_out = RESYNC_ERR; - break; - } - } - - /* - * Check that we are still performing the same - * component resync. If not, another node must - * have completed it so we have no more work to - * do. Also reset count to remaining resync as - * we may have lost ownership in in - * send_mn_resync_next_message while another - * node continued with the resync and - * incremented resync_done. - */ - if (un->un_rs_type != old_rs_type) { - md_unit_readerexit(ui); - (void) md_unit_writerlock(ui); - return; - } - /* - * recalculate resync_done, resync_2_do - * We need to recalculate resync_done as - * we have dropped the unit lock in - * send_mn_resync_next_message above and may - * have lost ownership to another node, with a - * different resync buffer size and it may have - * sent us new values of resync_done and - * resync_2_do based on its resync buffer size - */ - recalc_resync_done(un, resync_2_do, initblock, - blk_size, skip); - un->un_rs_resync_2_do = resync_2_do; - count = un->un_rs_resync_2_do - - un->un_rs_resync_done; - /* - * Adjust start of resync area to skip already - * resync'd data - */ - block = initblock + ((blk_size + skip) * - (int)un->un_rs_resync_done); - old_rs_done = un->un_rs_resync_done; - } - } - err = resync_read_blk_range(un, block, block + size, - MD_READER_HELD, MD_RESYNC_FLAG_ERR); - - /* resync_read_blk_range releases/grabs a new lock */ - un = (mm_unit_t *)MD_UNIT(mnum); - - if (err) { - broke_out = RESYNC_ERR; - break; - } - /* - * If we are no longer resyncing this component, return as - * another node has progressed the resync. - */ - if (un->un_rs_type != old_rs_type) { - md_unit_readerexit(ui); - (void) md_unit_writerlock(ui); - return; - } - - /* - * recalculate resync_done, resync_2_do. We need to recalculate - * resync_done as we have dropped the unit lock in - * resync_read_blk_range above and may have lost ownership to - * another node, with a different resync buffer size and it may - * have sent us new values of resync_done and resync_2_do based - * on its resync buffer size - */ - recalc_resync_done(un, resync_2_do, initblock, blk_size, skip); - un->un_rs_resync_2_do = resync_2_do; - - /* - * Reset count to remaining resync as we may have blocked in - * resync_read_blk_range while another node continued - * with the resync and incremented resync_done. Also adjust - * start of resync area to skip already resync'd data. - */ - count = un->un_rs_resync_2_do - un->un_rs_resync_done; - block = initblock +((blk_size + skip) * - (int)un->un_rs_resync_done); - - /* - * If we are picking up from another node, we retry the last - * block otherwise step on to the next block - */ - if (old_rs_done == un->un_rs_resync_done) { - block += blk_size + skip; - un->un_rs_resync_done++; - count--; - } - - if ((count == 1) && frag) - size = frag; - if (shared->ms_state == CS_ERRED) { - err = 1; - broke_out = RESYNC_ERR; - break; - } - - /* Check to see if we've completed the resync cleanly */ - if (un->un_rs_thread_flags & MD_RI_SHUTDOWN) - break; - } - - md_unit_readerexit(ui); - un = (mm_unit_t *)md_unit_writerlock(ui); - - /* - * If MN set send message to all nodes to indicate resync - * phase is complete. The processing of the message will update the - * mirror state - */ - if (MD_MNSET_SETNO(setno)) { - send_mn_resync_done_message(un, broke_out); - } else { - un->c.un_status &= ~MD_UN_WAR; - sm->sm_flags &= ~MD_SM_RESYNC_TARGET; - - if (err) - shared->ms_flags |= MDM_S_RS_TRIED; - else - /* - * As we don't transmit the changes, - * no need to drop the lock. - */ - set_sm_comp_state(un, smi, ci, CS_OKAY, 0, - MD_STATE_NO_XMIT, (IOLOCK *)NULL); - } - - /* For MN sets, resync NOTIFY is done when processing resync messages */ - if (!MD_MNSET_SETNO(setno)) { - if (broke_out) { - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_RESYNC_FAILED, - SVM_TAG_METADEVICE, setno, MD_SID(un)); - } else { - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_RESYNC_DONE, - SVM_TAG_METADEVICE, setno, MD_SID(un)); - } - SET_RS_TYPE_NONE(un->un_rs_type); - } -} - -static void -submirror_resync(mm_unit_t *un) -{ - mdi_unit_t *ui; - minor_t mnum; - mm_submirror_t *sm; - mm_submirror_ic_t *smic; - int smi; - diskaddr_t chunk; - diskaddr_t curblk; - int err; - int cnt; - set_t setno; - int broke_out = 0; - int i; - int flags1 = MD_FIRST_RESYNC_NEXT; - int compcnt; - - mnum = MD_SID(un); - ui = MDI_UNIT(mnum); - setno = MD_UN2SET(un); - - /* - * If the submirror_index is non-zero, we are continuing a resync - * so restart resync from last submirror marked as being resynced. - */ - if (RS_SMI(un->un_rs_type) != 0) { - smi = RS_SMI(un->un_rs_type); - sm = &un->un_sm[smi]; - smic = &un->un_smic[smi]; - if (!SMS_IS(sm, SMS_ATTACHED_RESYNC)) { - for (smi = 0; smi < NMIRROR; smi++) { - sm = &un->un_sm[smi]; - smic = &un->un_smic[smi]; - if (SMS_IS(sm, SMS_ATTACHED_RESYNC)) - break; - } - } - } else { - for (smi = 0; smi < NMIRROR; smi++) { - sm = &un->un_sm[smi]; - smic = &un->un_smic[smi]; - if (SMS_IS(sm, SMS_ATTACHED_RESYNC)) - break; - } - } - if (smi == NMIRROR) { - SET_RS_TYPE_NONE(un->un_rs_type); - return; - } - - /* - * If we've only got one component we can fail on a resync write - * if an error is encountered. This stops an unnecessary read of the - * whole mirror on a target write error. - */ - compcnt = (*(smic->sm_get_component_count))(sm->sm_dev, sm); - if (compcnt == 1) - flags1 |= MD_RESYNC_FLAG_ERR; - - un->c.un_status |= MD_UN_WAR; - sm->sm_flags |= MD_SM_RESYNC_TARGET; - SET_RS_SMI(un->un_rs_type, smi); - md_unit_writerexit(ui); - - /* For MN sets, resync NOTIFY is done when processing resync messages */ - if (!MD_MNSET_SETNO(setno)) { - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_RESYNC_START, - SVM_TAG_METADEVICE, setno, MD_SID(un)); - } - un = (mm_unit_t *)md_unit_readerlock(ui); - - un->un_rs_dropped_lock = 1; - - /* check to see if we've been asked to terminate */ - if (resync_kill_pending(un, MDI_UNIT(MD_SID(un)), MD_READER_HELD)) { - if (un->c.un_status & MD_UN_RESYNC_CANCEL) - broke_out = RESYNC_ERR; - } - /* - * Check that we are still performing the same submirror - * resync. If not, another node must have completed it - * so we have no more work to do. - */ - if (RS_TYPE(un->un_rs_type) != MD_RS_SUBMIRROR) { - md_unit_readerexit(ui); - (void) md_unit_writerlock(ui); - return; - } - - /* if > 1TB mirror, increase percent done granularity */ - if (un->c.un_total_blocks > MD_MAX_BLKS_FOR_SMALL_DEVS) - chunk = un->c.un_total_blocks / 1000; - else - chunk = un->c.un_total_blocks / 100; - if (chunk == 0) - chunk = un->c.un_total_blocks; - /* - * If a MN set, round the chunk size up to a multiple of - * MD_DEF_RESYNC_BLK_SZ - */ - if (MD_MNSET_SETNO(setno)) { - chunk = ((chunk + MD_DEF_RESYNC_BLK_SZ)/MD_DEF_RESYNC_BLK_SZ) - * MD_DEF_RESYNC_BLK_SZ; - if (chunk > un->c.un_total_blocks) - chunk = un->c.un_total_blocks; - } - /* - * Handle restartable resyncs that continue from where the previous - * resync left off. The new resync range is from un_rs_resync_done .. - * un_rs_resync_2_do - */ - curblk = 0; - if (un->un_rs_resync_done == 0) { - un->un_rs_resync_2_do = un->c.un_total_blocks; - } else { - curblk = un->un_rs_resync_done; - } - while ((curblk != un->c.un_total_blocks) && (broke_out != RESYNC_ERR)) { - diskaddr_t rs_done; - - rs_done = un->un_rs_resync_done; - err = resync_read_blk_range(un, curblk, curblk + chunk, - MD_READER_HELD, MD_SEND_MESS_XMIT | flags1); - flags1 = (compcnt == 1 ? MD_RESYNC_FLAG_ERR : 0); - - /* resync_read_blk_range releases/grabs a new lock */ - un = (mm_unit_t *)MD_UNIT(mnum); - - if (err) { - broke_out = RESYNC_ERR; - break; - } - - /* - * If we are no longer executing a submirror resync, return - * as another node has completed the submirror resync. - */ - if (RS_TYPE(un->un_rs_type) != MD_RS_SUBMIRROR) { - md_unit_readerexit(ui); - (void) md_unit_writerlock(ui); - return; - } - /* - * If resync_done has changed, we must have blocked - * in resync_read_blk_range while another node - * continued with the resync so restart from resync_done. - */ - if (rs_done != un->un_rs_resync_done) { - curblk = un->un_rs_resync_done; - } else { - curblk += chunk; - un->un_rs_resync_done = curblk; - } - - if ((curblk + chunk) > un->c.un_total_blocks) - chunk = un->c.un_total_blocks - curblk; - for (i = 0, cnt = 0; i < NMIRROR; i++) - if (SUBMIRROR_IS_WRITEABLE(un, i) && - !SMS_BY_INDEX_IS(un, i, SMS_ALL_ERRED) && - (un->un_sm[i].sm_flags & MD_SM_RESYNC_TARGET)) - cnt++; - if (cnt == 0) { - broke_out = RESYNC_ERR; - break; - } - - /* Check to see if we've completed the resync cleanly */ - if (un->un_rs_thread_flags & MD_RI_SHUTDOWN) - break; - } - md_unit_readerexit(ui); - un = (mm_unit_t *)md_unit_writerlock(ui); - - /* - * If MN set send message to all nodes to indicate resync - * phase is complete. The processing of the message will update the - * mirror state - */ - if (MD_MNSET_SETNO(setno)) { - send_mn_resync_done_message(un, broke_out); - } else { - sm->sm_flags &= ~MD_SM_RESYNC_TARGET; - if (err) { - mirror_set_sm_state(sm, smic, SMS_ATTACHED, 1); - } else { - mirror_set_sm_state(sm, smic, SMS_RUNNING, 0); - } - un->c.un_status &= ~MD_UN_WAR; - mirror_commit(un, SMI2BIT(smi), 0); - } - - /* For MN sets, resync NOTIFY is done when processing resync messages */ - if (!MD_MNSET_SETNO(setno)) { - if (broke_out) { - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_RESYNC_FAILED, - SVM_TAG_METADEVICE, setno, MD_SID(un)); - } else { - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_RESYNC_DONE, - SVM_TAG_METADEVICE, setno, MD_SID(un)); - } - } -} - -static void -component_resync(mm_unit_t *un) -{ - mm_submirror_t *sm; - mm_submirror_ic_t *smic; - int ci; - int i; - int compcnt; - - /* - * Handle the case where we are picking up a partially complete - * component resync. In this case un_rs_type contains the submirror - * and component index of where we should restart the resync. - */ - while (un->un_rs_type != MD_RS_COMPONENT) { - i = RS_SMI(un->un_rs_type); - ci = RS_CI(un->un_rs_type); - check_comp_4_resync(un, i, ci); - if (resync_kill_pending(un, MDI_UNIT(MD_SID(un)), - MD_WRITER_HELD)) - return; - /* - * If we have no current resync, contine to scan submirror and - * components. If the resync has moved on to another component, - * restart it and if the resync is no longer a component - * resync, just exit - */ - if (RS_TYPE(un->un_rs_type) == MD_RS_NONE) - break; - if (RS_TYPE(un->un_rs_type) != MD_RS_COMPONENT) - return; - } - /* Now continue scanning _all_ submirrors and components */ - for (i = 0; i < NMIRROR; i++) { - sm = &un->un_sm[i]; - smic = &un->un_smic[i]; - if (!SMS_IS(sm, SMS_RUNNING | SMS_LIMPING)) - continue; - compcnt = (*(smic->sm_get_component_count))(sm->sm_dev, sm); - for (ci = 0; ci < compcnt; ci++) { - SET_RS_SMI(un->un_rs_type, i); - SET_RS_CI(un->un_rs_type, ci); - SET_RS_TYPE(un->un_rs_type, MD_RS_COMPONENT); - check_comp_4_resync(un, i, ci); - /* Bail out if we've been asked to abort/shutdown */ - if (resync_kill_pending(un, MDI_UNIT(MD_SID(un)), - MD_WRITER_HELD)) - return; - /* - * Now check if another node has continued with the - * resync, if we are no longer in component resync, - * exit, otherwise update to the current component - 1 - * so that the next call of check_comp_4 resync() will - * resync the current component. - */ - if ((RS_TYPE(un->un_rs_type) != MD_RS_NONE) && - (RS_TYPE(un->un_rs_type) != MD_RS_COMPONENT)) - return; - else { - if (RS_SMI(un->un_rs_type) != i) { - i = RS_SMI(un->un_rs_type); - ci = RS_CI(un->un_rs_type) - 1; - } else if (RS_CI(un->un_rs_type) != ci) - ci = RS_CI(un->un_rs_type) - 1; - } - } - } -} - -static void -reset_comp_flags(mm_unit_t *un) -{ - mm_submirror_t *sm; - mm_submirror_ic_t *smic; - md_m_shared_t *shared; - int ci; - int i; - int compcnt; - - for (i = 0; i < NMIRROR; i++) { - sm = &un->un_sm[i]; - smic = &un->un_smic[i]; - if (!SMS_IS(sm, SMS_INUSE)) - continue; - compcnt = (*(smic->sm_get_component_count))(sm->sm_dev, sm); - for (ci = 0; ci < compcnt; ci++) { - shared = (md_m_shared_t *)(*(smic->sm_shared_by_indx)) - (sm->sm_dev, sm, ci); - shared->ms_flags &= ~MDM_S_RS_TRIED; - } - } -} - -/* - * resync_progress_thread: - * ---------------------- - * Thread started on first resync of a unit which simply blocks until woken up - * by a cv_signal, and then updates the mddb for the mirror unit record. This - * saves the resync progress information (un_rs_resync_done, un_rs_resync_2_do) - * so that an aborted resync can be continued after an intervening reboot. - */ -static void -resync_progress_thread(minor_t mnum) -{ - mm_unit_t *un = MD_UNIT(mnum); - mdi_unit_t *ui = MDI_UNIT(mnum); - set_t setno = MD_MIN2SET(mnum); - - while (un->c.un_status & MD_UN_RESYNC_ACTIVE) { - mutex_enter(&un->un_rs_progress_mx); - cv_wait(&un->un_rs_progress_cv, &un->un_rs_progress_mx); - mutex_exit(&un->un_rs_progress_mx); - if (un->un_rs_progress_flags & MD_RI_KILL) - break; - - /* - * Commit mirror unit if we're the Master node in a multi-node - * environment - */ - if (MD_MNSET_SETNO(setno) && md_set[setno].s_am_i_master) { - (void) md_unit_readerlock(ui); - mirror_commit(un, NO_SUBMIRRORS, 0); - md_unit_readerexit(ui); - } - } - thread_exit(); -} - -/* - * resync_progress: - * --------------- - * Timeout handler for updating the progress of the resync thread. - * Simply wake up the resync progress daemon which will then mirror_commit() the - * unit structure to the mddb. This snapshots the current progress of the resync - */ -static void -resync_progress(void *arg) -{ - mm_unit_t *un = (mm_unit_t *)arg; - mdi_unit_t *ui = MDI_UNIT(MD_SID(un)); - uint_t active; - - mutex_enter(&un->un_rs_progress_mx); - cv_signal(&un->un_rs_progress_cv); - mutex_exit(&un->un_rs_progress_mx); - - /* schedule the next timeout if the resync is still marked active */ - (void) md_unit_readerlock(ui); - active = un->c.un_status & MD_UN_RESYNC_ACTIVE ? 1 : 0; - md_unit_readerexit(ui); - if (active) { - un->un_rs_resync_to_id = timeout(resync_progress, un, - (clock_t)(drv_usectohz(60000000) * - md_mirror_resync_update_intvl)); - } -} - -/* - * resync_unit: - * ----------- - * Resync thread which drives all forms of resync (optimized, component, - * submirror). Must handle thread suspension and kill to allow multi-node - * resync to run without undue ownership changes. - * - * For a MN set, the reync mechanism is as follows: - * - * When a resync is started, either via metattach, metaonline, metareplace, - * metasync or by a hotspare kicking in, a message is sent to all nodes, which - * calls mirror_resync_thread. If there is currently no mirror owner, the - * master node sends a CHOOSE_OWNER message to the handler on the master. This - * chooses a mirror owner and sends a CHANGE_OWNER message requesting the - * selected node to become the owner. - * If this node is not the owner it sets itself to block in resync_kill_pending - * and if there is no owner all nodes will block until the chosen owner is - * selected, in which case it will unblock itself. So, on entry to this - * function only one node will continue past resync_kill_pending(). - * Once the resync thread is started, it basically cycles through the optimized, - * component and submirrors resyncs until there is no more work to do. - * - * For an ABR mirror, once a mirror owner is chosen it will complete the resync - * unless the nodes dies in which case a new owner will be chosen and it will - * have to complete the resync from the point at which the previous owner died. - * To do this we broadcast a RESYNC_NEXT message before each region to be - * resynced and this message contains the address and length of the region - * being resynced and the current progress through the resync. The size of - * this region is MD_DEF_RESYNC_BLK_SZ blocks. It is larger than the resync - * block size to limit the amount of inter node traffic. The RESYNC_NEXT - * message also indicates to all other nodes that all writes to this block - * must be blocked until the next RESYNC_NEXT message is received. This ensures - * that no node can write to a block that is being resynced. For all MN - * mirrors we also block the whole resync region on the resync owner node so - * that all writes to the resync region are blocked on all nodes. There is a - * difference here between a MN set and a regular set in that for a MN set - * we protect the mirror from writes to the current resync block by blocking - * a larger region. For a regular set we just block writes to the current - * resync block. - * - * For a non-ABR mirror the same RESYNC_NEXT message is sent with an - * additional purpose. In this case, there is only one mirror owner at a time - * and rather than continually switching ownership between the chosen mirror - * owner and the node that is writing to the mirror, we move the resync to the - * mirror owner. When we swich ownership, we block the old owner and unblock - * the resync thread on the new owner. To enable the new owner to continue the - * resync, all nodes need to have the latest resync status, Then, following each - * resync write, we check to see if the resync state has changed and if it - * has this must be because we have lost ownership to another node(s) for a - * period and then have become owner again later in the resync process. If we - * are still dealing with the same resync, we just adjust addresses and counts - * and then continue. If the resync has moved on to a different type, for - * example from an optimized to a submirror resync, we move on to process the - * resync described by rs_type and continue from the position described by - * resync_done and resync_startbl. - * - * Note that for non-ABR mirrors it is possible for a write to be made on a - * non resync-owner node without a change of ownership. This is the case when - * the mirror has a soft part created on it and a write in ABR mode is made - * to that soft part. Therefore we still need to block writes to the resync - * region on all nodes. - * - * Sending the latest resync state to all nodes also enables them to continue - * a resync in the event that the mirror owner dies. If a mirror owner for - * a non-ABR mirror has died, there will be dirty resync regions. Therefore, - * regardless of whether another type of resync was in progress, we must first - * do an optimized resync to clean up the dirty regions before continuing - * with the interrupted resync. - * - * The resync status is held in the unit structure - * On disk - * un_rs_resync_done The number of contiguous resyc blocks done so far - * un_rs_resync_2_do The total number of contiguous resync blocks - * un_rs_type The resync type (inc submirror and component numbers) - * In core - * un_resync_startbl The address of the current resync block being processed - * - * In the event that the whole cluster fails we need to just use - * un_rs_resync_done to restart the resync and to ensure that this is - * periodically written to disk, we have a thread which writes the record - * to disk every 5 minutes. As the granularity of un_rs_resync_done is - * usually coarse ( for an optimized resync 1001 is the max value) there is - * little point in writing this more frequently. - */ -static void -resync_unit(minor_t mnum) -{ - mdi_unit_t *ui; - mm_unit_t *un; - md_error_t mde = mdnullerror; - int mn_resync = 0; - int resync_finish = 0; - set_t setno = MD_MIN2SET(mnum); - uint_t old_rs_type = MD_RS_NONE; - uint_t old_rs_done = 0, old_rs_2_do = 0; - uint_t old_rs_startbl = 0; - int block_resync = 1; - char cpr_name[23]; /* Unique CPR name */ - int rs_copysize; - char *rs_buffer; - int nretries = 0; - -resync_restart: -#ifdef DEBUG - if (mirror_debug_flag) - printf("Resync started (mnum = %x)\n", mnum); -#endif - /* - * increment the mirror resync count - */ - mutex_enter(&md_cpr_resync.md_resync_mutex); - md_cpr_resync.md_mirror_resync++; - mutex_exit(&md_cpr_resync.md_resync_mutex); - - ui = MDI_UNIT(mnum); - un = MD_UNIT(mnum); - - rs_copysize = un->un_rs_copysize; - if (rs_copysize == 0) { - /* - * Don't allow buffer size to fall outside the - * range 0 < bufsize <= md_max_xfer_bufsz. - */ - if (md_resync_bufsz <= 0) - md_resync_bufsz = MD_DEF_RESYNC_BUF_SIZE; - rs_copysize = MIN(md_resync_bufsz, md_max_xfer_bufsz); - } - rs_buffer = kmem_zalloc(dbtob(rs_copysize), KM_SLEEP); - un = md_unit_writerlock(ui); - un->un_rs_copysize = rs_copysize; - un->un_rs_buffer = rs_buffer; - - if (MD_MNSET_SETNO(setno)) { - /* - * Register this resync thread with the CPR mechanism. This - * allows us to detect when the system is suspended and so - * keep track of the RPC failure condition. - */ - (void) snprintf(cpr_name, sizeof (cpr_name), - "mirror_resync%x", mnum); - CALLB_CPR_INIT(&un->un_rs_cprinfo, &un->un_rs_cpr_mx, - callb_md_mrs_cpr, cpr_name); - - if (ui->ui_tstate & MD_RESYNC_NOT_DONE) { - /* - * If this is the first resync following the initial - * snarf (MD_RESYNC_NOT_DONE still set) and we've - * been started outside a reconfig step (e.g. by being - * added to an existing set) we need to query the - * existing submirror state for this mirror. - * The set_status flags will have MD_MN_SET_MIR_STATE_RC - * set if we've been through a step4 reconfig, so only - * query the master if this isn't (yet) set. In this - * case we must continue the resync thread as there is - * not guaranteed to be a currently running resync on - * any of the other nodes. Worst case is that we will - * initiate an ownership change to this node and then - * find that there is no resync to perform. However, we - * will then have correct status across the cluster. - */ - if (!md_set[setno].s_am_i_master) { - if (!(md_get_setstatus(setno) & - MD_SET_MN_MIR_STATE_RC)) { - mirror_get_status(un, NULL); - block_resync = 0; -#ifdef DEBUG - if (mirror_debug_flag) { - mm_submirror_t *sm; - int i; - for (i = 0; i < NMIRROR; i++) { - sm = &un->un_sm[i]; - printf( - "sm[%d] state=%4x" - " flags=%4x\n", i, - sm->sm_state, - sm->sm_flags); - } - } -#endif - } - } - ui->ui_tstate &= ~MD_RESYNC_NOT_DONE; - } - /* - * For MN set, if we have an owner, then start the resync on it. - * If there is no owner the master must send a message to - * choose the owner. This message will contain the current - * resync count and it will only be sent to the master, where - * the resync count will be used to choose the next node to - * perform a resync, by cycling through the nodes in the set. - * The message handler will then send a CHANGE_OWNER message to - * all nodes, and on receipt of that message, the chosen owner - * will issue a SET_OWNER ioctl to become the owner. This ioctl - * will be requested to spawn a thread to issue the - * REQUEST_OWNER message to become the owner which avoids the - * need for concurrent ioctl requests. - * After sending the message, we will block waiting for one - * of the nodes to become the owner and start the resync - */ - if (MD_MN_NO_MIRROR_OWNER(un)) { - /* - * There is no owner, block and then the master will - * choose the owner. Only perform this if 'block_resync' - * is set. - */ - if (block_resync) { - mutex_enter(&un->un_rs_thread_mx); - un->un_rs_thread_flags |= MD_RI_BLOCK_OWNER; - mutex_exit(&un->un_rs_thread_mx); - } - if (md_set[setno].s_am_i_master) { - md_unit_writerexit(ui); - (void) mirror_choose_owner(un, NULL); - (void) md_unit_writerlock(ui); - } - } else { - /* There is an owner, block if we are not it */ - if (!MD_MN_MIRROR_OWNER(un)) { - mutex_enter(&un->un_rs_thread_mx); - un->un_rs_thread_flags |= MD_RI_BLOCK_OWNER; - mutex_exit(&un->un_rs_thread_mx); - } - } - } - /* - * Start a timeout chain to update the resync progress to the mddb. - * This will run every md_mirror_resync_update_intvl minutes and allows - * a resync to be continued over a reboot. - */ - ASSERT(un->un_rs_resync_to_id == 0); - un->un_rs_resync_to_id = timeout(resync_progress, un, - (clock_t)(drv_usectohz(60000000) * md_mirror_resync_update_intvl)); - - /* - * Handle resync restart from the last logged position. The contents - * of un_rs_resync_2_do and un_rs_resync_done are dependent on the - * type of resync that was in progress. - */ - if (MD_MNSET_SETNO(setno)) { - switch ((uint_t)RS_TYPE(un->un_rs_type)) { - case MD_RS_NONE: - case MD_RS_OPTIMIZED: - case MD_RS_COMPONENT: - case MD_RS_SUBMIRROR: - case MD_RS_ABR: - break; - default: - un->un_rs_type = MD_RS_NONE; - } - /* Allocate a resync message, if required */ - if (un->un_rs_msg == NULL) { - un->un_rs_msg = (md_mn_msg_resync_t *)kmem_zalloc( - sizeof (md_mn_msg_resync_t), KM_SLEEP); - } - mn_resync = 1; - } - - /* Check to see if we've been requested to block/kill */ - if (resync_kill_pending(un, ui, MD_WRITER_HELD)) { - goto bail_out; - } - - do { - un->un_rs_dropped_lock = 0; - /* - * Always perform an optimized resync first as this will bring - * the mirror into an available state in the shortest time. - * If we are resuming an interrupted resync, other than an - * optimized resync, we save the type and amount done so that - * we can resume the appropriate resync after the optimized - * resync has completed. - */ - if ((RS_TYPE(un->un_rs_type) != MD_RS_NONE) && - (RS_TYPE(un->un_rs_type) != MD_RS_OPTIMIZED)) { - old_rs_type = un->un_rs_type; - old_rs_done = un->un_rs_resync_done; - old_rs_2_do = un->un_rs_resync_2_do; - old_rs_startbl = un->un_resync_startbl; - } - SET_RS_TYPE(un->un_rs_type, MD_RS_OPTIMIZED); - /* - * If we are continuing a resync that is not an - * OPTIMIZED one, then we start from the beginning when - * doing this optimized resync - */ - if (RS_TYPE(old_rs_type) != MD_RS_OPTIMIZED) { - un->un_rs_resync_done = 0; - un->un_rs_resync_2_do = 0; - un->un_resync_startbl = 0; - } - optimized_resync(un); - /* Check to see if we've been requested to block/kill */ - if (resync_kill_pending(un, ui, MD_WRITER_HELD)) { - goto bail_out; - } - un = (mm_unit_t *)MD_UNIT(mnum); - /* - * If another node has moved the resync on, we must - * restart the correct resync - */ - if (mn_resync && - (RS_TYPE(un->un_rs_type) != MD_RS_NONE)) { - old_rs_type = un->un_rs_type; - old_rs_done = un->un_rs_resync_done; - old_rs_2_do = un->un_rs_resync_2_do; - old_rs_startbl = un->un_resync_startbl; - } - - /* - * Restore previous resync progress or move onto a - * component resync. - */ - if (RS_TYPE(old_rs_type) != MD_RS_NONE) { - un->un_rs_type = old_rs_type; - un->un_rs_resync_done = old_rs_done; - un->un_rs_resync_2_do = old_rs_2_do; - un->un_resync_startbl = old_rs_startbl; - } else { - un->un_rs_type = MD_RS_COMPONENT; - un->un_rs_resync_done = 0; - un->un_rs_resync_2_do = 0; - un->un_resync_startbl = 0; - } - - if (RS_TYPE(un->un_rs_type) == MD_RS_COMPONENT) { - component_resync(un); - /* Check to see if we've been requested to block/kill */ - if (resync_kill_pending(un, ui, MD_WRITER_HELD)) { - goto bail_out; - } - un = (mm_unit_t *)MD_UNIT(mnum); - /* - * If we have moved on from a component resync, another - * node must have completed it and started a submirror - * resync, so leave the resync state alone. For non - * multi-node sets we move onto the submirror resync. - */ - if (mn_resync) { - if (RS_TYPE(un->un_rs_type) == MD_RS_NONE) { - un->un_rs_type = MD_RS_SUBMIRROR; - un->un_rs_resync_done = - un->un_rs_resync_2_do = 0; - un->un_resync_startbl = 0; - } - } else { - un->un_rs_type = MD_RS_SUBMIRROR; - un->un_rs_resync_done = 0; - un->un_rs_resync_2_do = 0; - un->un_resync_startbl = 0; - } - } - if (RS_TYPE(un->un_rs_type) == MD_RS_SUBMIRROR) { - submirror_resync(un); - /* Check to see if we've been requested to block/kill */ - if (resync_kill_pending(un, ui, MD_WRITER_HELD)) { - goto bail_out; - } - un = (mm_unit_t *)MD_UNIT(mnum); - /* - * If we have moved on from a submirror resync, another - * node must have completed it and started a different - * resync, so leave the resync state alone - */ - if (mn_resync) { - if (RS_TYPE(un->un_rs_type) == MD_RS_NONE) { - un->un_rs_resync_done = - un->un_rs_resync_2_do = 0; - un->un_resync_startbl = 0; - } - } else { - /* If non-MN mirror, reinitialize state */ - un->un_rs_type = MD_RS_NONE; - un->un_rs_resync_done = 0; - un->un_rs_resync_2_do = 0; - un->un_resync_startbl = 0; - } - } - } while (un->un_rs_dropped_lock); - mutex_enter(&un->un_rs_thread_mx); - un->un_rs_thread_flags |= MD_RI_SHUTDOWN; - mutex_exit(&un->un_rs_thread_mx); - - resync_finish = 1; -bail_out: -#ifdef DEBUG - if (mirror_debug_flag) - printf("Resync stopped (mnum = %x), resync_finish = %d\n", - mnum, resync_finish); -#endif - kmem_free(un->un_rs_buffer, dbtob(un->un_rs_copysize)); - - mutex_enter(&un->un_rs_progress_mx); - un->un_rs_progress_flags |= MD_RI_KILL; - cv_signal(&un->un_rs_progress_cv); - mutex_exit(&un->un_rs_progress_mx); - - /* - * For MN Set, send a RESYNC_FINISH if this node completed the resync. - * There is no need to grow unit here, it will be done in the - * handler for the RESYNC_FINISH message together with resetting - * MD_UN_RESYNC_ACTIVE. - */ - if (mn_resync) { - if (resync_finish) { - /* - * Normal resync completion. Issue a RESYNC_FINISH - * message if we're part of a multi-node set. - */ - md_mn_kresult_t *kres; - md_mn_msg_resync_t *rmsg; - int rval; - - rmsg = (md_mn_msg_resync_t *)un->un_rs_msg; - md_unit_writerexit(ui); - - rmsg->msg_resync_mnum = mnum; - rmsg->msg_resync_type = 0; - rmsg->msg_resync_done = 0; - rmsg->msg_resync_2_do = 0; - rmsg->msg_originator = md_mn_mynode_id; - - kres = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP); - -smrf_msg: - mutex_enter(&un->un_rs_cpr_mx); - CALLB_CPR_SAFE_BEGIN(&un->un_rs_cprinfo); - - rval = mdmn_ksend_message(setno, - MD_MN_MSG_RESYNC_FINISH, MD_MSGF_NO_LOG, 0, - (char *)rmsg, sizeof (md_mn_msg_resync_t), kres); - - CALLB_CPR_SAFE_END(&un->un_rs_cprinfo, - &un->un_rs_cpr_mx); - mutex_exit(&un->un_rs_cpr_mx); - - if (!MDMN_KSEND_MSG_OK(rval, kres)) { - mdmn_ksend_show_error(rval, kres, - "RESYNC_FINISH"); - /* If we're shutting down, pause things here. */ - if (kres->kmmr_comm_state == MDMNE_RPC_FAIL) { - while (!md_mn_is_commd_present()) { - delay(md_hz); - } - /* - * commd is now available again. Retry - * the message once. If this fails we - * panic as the system is in an - * unexpected state. - */ - if (nretries++ == 0) - goto smrf_msg; - } - cmn_err(CE_PANIC, - "ksend_message failure: RESYNC_FINISH"); - } - kmem_free(kres, sizeof (md_mn_kresult_t)); - (void) md_unit_writerlock(ui); - } - /* - * If the resync has been cancelled, clear flags, reset owner - * for ABR mirror and release the resync region parent - * structure. - */ - if (un->c.un_status & MD_UN_RESYNC_CANCEL) { - md_mps_t *ps; - - if (ui->ui_tstate & MD_ABR_CAP) { - /* Resync finished, if ABR set owner to NULL */ - mutex_enter(&un->un_owner_mx); - un->un_mirror_owner = 0; - mutex_exit(&un->un_owner_mx); - } - - un->c.un_status &= ~(MD_UN_RESYNC_CANCEL | - MD_UN_RESYNC_ACTIVE); - ps = un->un_rs_prev_overlap; - if (ps != NULL) { - /* Remove previous overlap resync region */ - if (ps->ps_flags & MD_MPS_ON_OVERLAP) - mirror_overlap_tree_remove(ps); - /* - * Release the overlap range reference - */ - un->un_rs_prev_overlap = NULL; - kmem_cache_free(mirror_parent_cache, - ps); - } - } - - /* - * Release resync message buffer. This will be reallocated on - * the next invocation of the resync_unit thread. - */ - if (un->un_rs_msg) { - kmem_free(un->un_rs_msg, sizeof (md_mn_msg_resync_t)); - un->un_rs_msg = NULL; - } - } else { - /* For non-MN sets deal with any pending grows */ - un->c.un_status &= ~MD_UN_RESYNC_ACTIVE; - if (un->c.un_status & MD_UN_GROW_PENDING) { - if ((mirror_grow_unit(un, &mde) != 0) || - (! mdismderror(&mde, MDE_GROW_DELAYED))) { - un->c.un_status &= ~MD_UN_GROW_PENDING; - } - } - } - - reset_comp_flags(un); - un->un_resync_completed = 0; - mirror_commit(un, NO_SUBMIRRORS, 0); - md_unit_writerexit(ui); - - /* - * Stop the resync progress thread. - */ - if (un->un_rs_resync_to_id != 0) { - (void) untimeout(un->un_rs_resync_to_id); - un->un_rs_resync_to_id = 0; - } - - /* - * Calling mirror_internal_close() makes further reference to un / ui - * dangerous. If we are the only consumer of the mirror it is possible - * for a metaclear to be processed after completion of the m_i_c() - * routine. As we need to handle the case where another resync has been - * scheduled for the mirror, we raise the open count on the device - * which protects against the close / metaclear / lock => panic scenario - */ - (void) md_unit_incopen(MD_SID(un), FREAD|FWRITE, OTYP_LYR); - (void) mirror_internal_close(MD_SID(un), OTYP_LYR, 0, (IOLOCK *)NULL); - - /* - * deccrement the mirror resync count - */ - mutex_enter(&md_cpr_resync.md_resync_mutex); - md_cpr_resync.md_mirror_resync--; - mutex_exit(&md_cpr_resync.md_resync_mutex); - - /* - * Remove the thread reference as we're about to exit. This allows a - * subsequent mirror_resync_unit() to start a new thread. - * If RESYNC_ACTIVE is set, mirror_resync_unit() must have been - * called to start a new resync, so reopen the mirror and go back to - * the start. - */ - (void) md_unit_writerlock(ui); - mutex_enter(&un->un_rs_thread_mx); - un->un_rs_thread_flags &= ~(MD_RI_KILL|MD_RI_SHUTDOWN); - mutex_exit(&un->un_rs_thread_mx); - if (un->c.un_status & MD_UN_RESYNC_ACTIVE) { - md_unit_writerexit(ui); - if (mirror_internal_open(MD_SID(un), (FREAD|FWRITE), - OTYP_LYR, 0, (IOLOCK *)NULL) == 0) { - /* Release the reference grabbed above */ - (void) mirror_internal_close(MD_SID(un), OTYP_LYR, 0, - (IOLOCK *)NULL); - goto resync_restart; - } - (void) md_unit_writerlock(ui); - cmn_err(CE_NOTE, - "Could not open metadevice (%x) for resync\n", - MD_SID(un)); - } - un->un_rs_thread = NULL; - md_unit_writerexit(ui); - - /* - * Check for hotspares once we've cleared the resync thread reference. - * If there are any errored units a poke_hotspares() will result in - * a call to mirror_resync_unit() which we need to allow to start. - */ - (void) poke_hotspares(); - - /* - * Remove this thread from the CPR callback table. - */ - if (mn_resync) { - mutex_enter(&un->un_rs_cpr_mx); - CALLB_CPR_EXIT(&un->un_rs_cprinfo); - } - - /* - * Remove the extra reference to the unit we generated above. After - * this call it is *unsafe* to reference either ui or un as they may - * no longer be allocated. - */ - (void) mirror_internal_close(MD_SID(un), OTYP_LYR, 0, (IOLOCK *)NULL); - - thread_exit(); -} - -/* - * mirror_resync_unit: - * ------------------ - * Start a resync for the given mirror metadevice. Save the resync thread ID in - * un->un_rs_thread for later manipulation. - * - * Returns: - * 0 Success - * !=0 Error - */ -/*ARGSUSED*/ -int -mirror_resync_unit( - minor_t mnum, - md_resync_ioctl_t *ri, - md_error_t *ep, - IOLOCK *lockp -) -{ - mdi_unit_t *ui; - mm_unit_t *un; - set_t setno = MD_MIN2SET(mnum); - - ui = MDI_UNIT(mnum); - - if (md_get_setstatus(setno) & MD_SET_STALE) - return (mdmddberror(ep, MDE_DB_STALE, mnum, setno)); - - if (mirror_internal_open(mnum, (FREAD|FWRITE), OTYP_LYR, 0, lockp)) { - return (mdmderror(ep, MDE_MIRROR_OPEN_FAILURE, mnum)); - } - if (lockp) { - un = (mm_unit_t *)md_ioctl_writerlock(lockp, ui); - } else { - un = (mm_unit_t *)md_unit_writerlock(ui); - } - - /* - * Check to see if we're attempting to start a resync while one is - * already running. - */ - if (un->c.un_status & MD_UN_RESYNC_ACTIVE || - un->un_rs_thread != NULL) { - /* - * Ensure RESYNC_ACTIVE set, it may not be if the resync thread - * is in the process of terminating, setting the flag will - * cause the resync thread to return to the beginning - */ - un->c.un_status |= MD_UN_RESYNC_ACTIVE; - if (lockp) { - md_ioctl_writerexit(lockp); - } else { - md_unit_writerexit(ui); - } - (void) mirror_internal_close(mnum, OTYP_LYR, 0, lockp); - return (0); - } - un->c.un_status |= MD_UN_RESYNC_ACTIVE; - un->c.un_status &= ~MD_UN_RESYNC_CANCEL; - if ((ri) && (ri->ri_copysize > 0) && - (ri->ri_copysize <= md_max_xfer_bufsz)) - un->un_rs_copysize = ri->ri_copysize; - else - un->un_rs_copysize = 0; - - /* Start the resync progress thread off */ - un->un_rs_progress_flags = 0; - (void) thread_create(NULL, 0, resync_progress_thread, - (caddr_t)(uintptr_t)mnum, 0, &p0, TS_RUN, minclsyspri); - - /* - * We have to store the thread ID in the unit structure so do not - * drop writerlock until the thread is active. This means resync_unit - * may spin on its first md_unit_readerlock(), but deadlock won't occur. - */ - mutex_enter(&un->un_rs_thread_mx); - un->un_rs_thread_flags &= ~(MD_RI_KILL|MD_RI_SHUTDOWN); - mutex_exit(&un->un_rs_thread_mx); - un->un_rs_thread = thread_create(NULL, 0, resync_unit, - (caddr_t)(uintptr_t)mnum, 0, &p0, TS_RUN, 60); - if (un->un_rs_thread == (kthread_id_t)NULL) { - un->c.un_status &= ~MD_UN_RESYNC_ACTIVE; - if (lockp) { - md_ioctl_writerexit(lockp); - } else { - md_unit_writerexit(ui); - } - (void) mirror_internal_close(mnum, OTYP_LYR, 0, lockp); - return (mdmderror(ep, MDE_MIRROR_THREAD_FAILURE, mnum)); - } else { - if (lockp) { - md_ioctl_writerexit(lockp); - } else { - md_unit_writerexit(ui); - } - } - - return (0); -} - -/* - * mirror_ioctl_resync: - * ------------------- - * Called as a result of an MD_IOCSETSYNC ioctl. Either start, block, unblock - * or kill the resync thread associated with the specified unit. - * Can return with locks held since mdioctl will free any locks - * that are marked in lock->l_flags. - * - * Returns: - * 0 Success - * !=0 Error Code - */ -int -mirror_ioctl_resync( - md_resync_ioctl_t *ri, - IOLOCK *lock -) -{ - minor_t mnum = ri->ri_mnum; - mm_unit_t *un; - uint_t bits; - mm_submirror_t *sm; - mm_submirror_ic_t *smic; - int smi; - kt_did_t tid; - set_t setno = MD_MIN2SET(mnum); - - mdclrerror(&ri->mde); - - if ((setno >= md_nsets) || - (MD_MIN2UNIT(mnum) >= md_nunits)) { - return (mdmderror(&ri->mde, MDE_INVAL_UNIT, mnum)); - } - - /* RD_LOCK flag grabs the md_ioctl_readerlock */ - un = mirror_getun(mnum, &ri->mde, RD_LOCK, lock); - - if (un == NULL) { - return (mdmderror(&ri->mde, MDE_UNIT_NOT_SETUP, mnum)); - } - if (un->c.un_type != MD_METAMIRROR) { - return (mdmderror(&ri->mde, MDE_NOT_MM, mnum)); - } - if (un->un_nsm < 2) { - return (0); - } - - /* - * Determine the action to take based on the ri_flags field: - * MD_RI_BLOCK: Block current resync thread - * MD_RI_UNBLOCK: Unblock resync thread - * MD_RI_KILL: Abort resync thread - * MD_RI_RESYNC_FORCE_MNSTART: Directly start resync thread - * without using rpc.mdcommd messages. - * any other: Start resync thread - */ - switch (ri->ri_flags & (MD_RI_BLOCK|MD_RI_UNBLOCK|MD_RI_KILL)) { - - case MD_RI_BLOCK: - /* Halt resync thread by setting flag in un_rs_flags */ - if (!(un->c.un_status & MD_UN_RESYNC_ACTIVE)) { - return (0); - } - mutex_enter(&un->un_rs_thread_mx); - un->un_rs_thread_flags |= MD_RI_BLOCK; - mutex_exit(&un->un_rs_thread_mx); - return (0); - - case MD_RI_UNBLOCK: - /* - * Restart resync thread by clearing flag in un_rs_flags and - * cv_signal'ing the blocked thread. - */ - if (!(un->c.un_status & MD_UN_RESYNC_ACTIVE)) { - return (0); - } - mutex_enter(&un->un_rs_thread_mx); - un->un_rs_thread_flags &= ~MD_RI_BLOCK; - cv_signal(&un->un_rs_thread_cv); - mutex_exit(&un->un_rs_thread_mx); - return (0); - - case MD_RI_KILL: - /* Abort resync thread. */ - if (!(un->c.un_status & MD_UN_RESYNC_ACTIVE)) { - return (0); - } - mutex_enter(&un->un_rs_thread_mx); - tid = un->un_rs_thread ? (un->un_rs_thread)->t_did : 0; - un->un_rs_thread_flags &= ~(MD_RI_BLOCK|MD_RI_BLOCK_OWNER); - un->un_rs_thread_flags |= MD_RI_KILL; - cv_signal(&un->un_rs_thread_cv); - mutex_exit(&un->un_rs_thread_mx); - if (tid != 0) { - if (!(ri->ri_flags & MD_RI_NO_WAIT)) { - md_ioctl_readerexit(lock); - thread_join(tid); - un->un_rs_thread_flags &= ~MD_RI_KILL; - un->un_rs_thread = NULL; - cmn_err(CE_WARN, "md: %s: Resync cancelled\n", - md_shortname(MD_SID(un))); - } - } - return (0); - } - - md_ioctl_readerexit(lock); - - bits = 0; - for (smi = 0; smi < NMIRROR; smi++) { - sm = &un->un_sm[smi]; - smic = &un->un_smic[smi]; - if (!SMS_IS(sm, SMS_ATTACHED)) - continue; - mirror_set_sm_state(sm, smic, SMS_ATTACHED_RESYNC, 1); - bits |= SMI2BIT(smi); - } - if (bits != 0) - mirror_commit(un, bits, 0); - - /* - * If we are resyncing a mirror in a MN set and the rpc.mdcommd - * can be used, we do not start the resync at this point. - * Instead, the metasync command that issued the ioctl - * will send a RESYNC_STARTING message to start the resync thread. The - * reason we do it this way is to ensure that the metasync ioctl is - * executed on all nodes before the resync thread is started. - * - * If a MN set and the MD_RI_RESYNC_FORCE_MNSTART flag is set, then - * don't use rpc.mdcommd, but just start the resync thread. This - * flag is set on a node when it is being added to a diskset - * so that the resync threads are started on the newly added node. - */ - if ((!(MD_MNSET_SETNO(setno))) || - (ri->ri_flags & MD_RI_RESYNC_FORCE_MNSTART)) { - return (mirror_resync_unit(mnum, ri, &ri->mde, lock)); - } else { - return (0); - } -} - -int -mirror_mark_resync_region_non_owner(struct mm_unit *un, - diskaddr_t startblk, diskaddr_t endblk, md_mn_nodeid_t source_node) -{ - int no_change; - size_t start_rr; - size_t current_rr; - size_t end_rr; - md_mn_msg_rr_dirty_t *rr; - md_mn_kresult_t *kres; - set_t setno = MD_UN2SET(un); - int rval; - md_mn_nodeid_t node_idx = source_node - 1; - mdi_unit_t *ui = MDI_UNIT(MD_SID(un)); - md_mn_nodeid_t owner_node; - minor_t mnum = MD_SID(un); - - if (un->un_nsm < 2) - return (0); - - /* - * Check to see if we have a un_pernode_dirty_bm[] entry allocated. If - * not, allocate it and then fill the [start..end] entries. - * Update un_pernode_dirty_sum if we've gone 0->1. - * Update un_dirty_bm if the corresponding entries are clear. - */ - rw_enter(&un->un_pernode_dirty_mx[node_idx], RW_WRITER); - if (un->un_pernode_dirty_bm[node_idx] == NULL) { - un->un_pernode_dirty_bm[node_idx] = - (uchar_t *)kmem_zalloc( - (uint_t)howmany(un->un_rrd_num, NBBY), KM_SLEEP); - } - rw_exit(&un->un_pernode_dirty_mx[node_idx]); - - BLK_TO_RR(end_rr, endblk, un); - BLK_TO_RR(start_rr, startblk, un); - - no_change = 1; - - mutex_enter(&un->un_resync_mx); - rw_enter(&un->un_pernode_dirty_mx[node_idx], RW_READER); - for (current_rr = start_rr; current_rr <= end_rr; current_rr++) { - un->un_outstanding_writes[current_rr]++; - if (!IS_PERNODE_DIRTY(source_node, current_rr, un)) { - un->un_pernode_dirty_sum[current_rr]++; - SET_PERNODE_DIRTY(source_node, current_rr, un); - } - CLR_GOING_CLEAN(current_rr, un); - if (!IS_REGION_DIRTY(current_rr, un)) { - no_change = 0; - SET_REGION_DIRTY(current_rr, un); - SET_GOING_DIRTY(current_rr, un); - } else if (IS_GOING_DIRTY(current_rr, un)) - no_change = 0; - } - rw_exit(&un->un_pernode_dirty_mx[node_idx]); - mutex_exit(&un->un_resync_mx); - - if (no_change) { - return (0); - } - - /* - * If we have dirty regions to commit, send a - * message to the owning node so that the - * in-core bitmap gets updated appropriately. - * TODO: make this a kmem_cache pool to improve - * alloc/free performance ??? - */ - kres = (md_mn_kresult_t *)kmem_alloc(sizeof (md_mn_kresult_t), - KM_SLEEP); - rr = (md_mn_msg_rr_dirty_t *)kmem_alloc(sizeof (md_mn_msg_rr_dirty_t), - KM_SLEEP); - -resend_mmrr: - owner_node = un->un_mirror_owner; - - rr->rr_mnum = mnum; - rr->rr_nodeid = md_mn_mynode_id; - rr->rr_range = (ushort_t)start_rr << 16; - rr->rr_range |= (ushort_t)end_rr & 0xFFFF; - - /* release readerlock before sending message */ - md_unit_readerexit(ui); - - rval = mdmn_ksend_message(setno, MD_MN_MSG_RR_DIRTY, - MD_MSGF_NO_LOG|MD_MSGF_BLK_SIGNAL|MD_MSGF_DIRECTED, - un->un_mirror_owner, (char *)rr, - sizeof (md_mn_msg_rr_dirty_t), kres); - - /* reaquire readerlock on message completion */ - (void) md_unit_readerlock(ui); - - /* if the message send failed, note it, and pass an error back up */ - if (!MDMN_KSEND_MSG_OK(rval, kres)) { - /* if commd is gone, no point in printing a message */ - if (md_mn_is_commd_present()) - mdmn_ksend_show_error(rval, kres, "RR_DIRTY"); - kmem_free(kres, sizeof (md_mn_kresult_t)); - kmem_free(rr, sizeof (md_mn_msg_rr_dirty_t)); - return (1); - } - - /* - * if the owner changed while we were sending the message, and it's - * not us, the new mirror owner won't yet have done the right thing - * with our data. Let him know. If we became the owner, we'll - * deal with that differently below. Note that receiving a message - * about another node twice won't hurt anything. - */ - if (un->un_mirror_owner != owner_node && !MD_MN_MIRROR_OWNER(un)) - goto resend_mmrr; - - kmem_free(kres, sizeof (md_mn_kresult_t)); - kmem_free(rr, sizeof (md_mn_msg_rr_dirty_t)); - - mutex_enter(&un->un_resync_mx); - - /* - * If we became the owner changed while we were sending the message, - * we have dirty bits in the un_pernode_bm that aren't yet reflected - * in the un_dirty_bm, as it was re-read from disk, and our bits - * are also not reflected in the on-disk DRL. Fix that now. - */ - if (MD_MN_MIRROR_OWNER(un)) { - rw_enter(&un->un_pernode_dirty_mx[node_idx], RW_WRITER); - mirror_copy_rr(howmany(un->un_rrd_num, NBBY), - un->un_pernode_dirty_bm[node_idx], un->un_dirty_bm); - rw_exit(&un->un_pernode_dirty_mx[node_idx]); - - un->un_resync_flg |= MM_RF_COMMITING | MM_RF_GATECLOSED; - - mutex_exit(&un->un_resync_mx); - mddb_commitrec_wrapper(un->un_rr_dirty_recid); - mutex_enter(&un->un_resync_mx); - - un->un_resync_flg &= ~(MM_RF_COMMITING | MM_RF_GATECLOSED); - cv_broadcast(&un->un_resync_cv); - } - - for (current_rr = start_rr; current_rr <= end_rr; current_rr++) - CLR_GOING_DIRTY(current_rr, un); - - mutex_exit(&un->un_resync_mx); - - return (0); -} - -int -mirror_mark_resync_region_owner(struct mm_unit *un, - diskaddr_t startblk, diskaddr_t endblk, md_mn_nodeid_t source_node) -{ - int no_change; - size_t start_rr; - size_t current_rr; - size_t end_rr; - int mnset = MD_MNSET_SETNO(MD_UN2SET(un)); - md_mn_nodeid_t node_idx = source_node - 1; - - if (un->un_nsm < 2) - return (0); - - /* - * Check to see if we have a un_pernode_dirty_bm[] entry allocated. If - * not, allocate it and then fill the [start..end] entries. - * Update un_pernode_dirty_sum if we've gone 0->1. - * Update un_dirty_bm if the corresponding entries are clear. - */ - if (mnset) { - rw_enter(&un->un_pernode_dirty_mx[node_idx], RW_WRITER); - if (un->un_pernode_dirty_bm[node_idx] == NULL) { - un->un_pernode_dirty_bm[node_idx] = - (uchar_t *)kmem_zalloc( - (uint_t)howmany(un->un_rrd_num, NBBY), KM_SLEEP); - } - rw_exit(&un->un_pernode_dirty_mx[node_idx]); - } - - mutex_enter(&un->un_resync_mx); - - if (mnset) - rw_enter(&un->un_pernode_dirty_mx[node_idx], RW_READER); - - no_change = 1; - BLK_TO_RR(end_rr, endblk, un); - BLK_TO_RR(start_rr, startblk, un); - for (current_rr = start_rr; current_rr <= end_rr; current_rr++) { - if (!mnset || source_node == md_mn_mynode_id) - un->un_outstanding_writes[current_rr]++; - if (mnset) { - if (!IS_PERNODE_DIRTY(source_node, current_rr, un)) - un->un_pernode_dirty_sum[current_rr]++; - SET_PERNODE_DIRTY(source_node, current_rr, un); - } - CLR_GOING_CLEAN(current_rr, un); - if (!IS_REGION_DIRTY(current_rr, un)) - no_change = 0; - if (IS_GOING_DIRTY(current_rr, un)) - no_change = 0; - } - - if (mnset) - rw_exit(&un->un_pernode_dirty_mx[node_idx]); - - if (no_change) { - mutex_exit(&un->un_resync_mx); - return (0); - } - un->un_waiting_to_mark++; - while (un->un_resync_flg & MM_RF_GATECLOSED) { - if (panicstr) - return (1); - cv_wait(&un->un_resync_cv, &un->un_resync_mx); - } - un->un_waiting_to_mark--; - - no_change = 1; - for (current_rr = start_rr; current_rr <= end_rr; current_rr++) { - if (!IS_REGION_DIRTY(current_rr, un)) { - SET_REGION_DIRTY(current_rr, un); - SET_GOING_DIRTY(current_rr, un); - no_change = 0; - } else { - if (IS_GOING_DIRTY(current_rr, un)) - no_change = 0; - } - } - if (no_change) { - if (un->un_waiting_to_mark == 0 || un->un_waiting_to_clear != 0) - cv_broadcast(&un->un_resync_cv); - mutex_exit(&un->un_resync_mx); - return (0); - } - - un->un_resync_flg |= MM_RF_COMMIT_NEEDED; - un->un_waiting_to_commit++; - while (un->un_waiting_to_mark != 0 && - !(un->un_resync_flg & MM_RF_GATECLOSED)) { - if (panicstr) - return (1); - cv_wait(&un->un_resync_cv, &un->un_resync_mx); - } - - if (un->un_resync_flg & MM_RF_COMMIT_NEEDED) { - un->un_resync_flg |= MM_RF_COMMITING | MM_RF_GATECLOSED; - un->un_resync_flg &= ~MM_RF_COMMIT_NEEDED; - - mutex_exit(&un->un_resync_mx); - mddb_commitrec_wrapper(un->un_rr_dirty_recid); - mutex_enter(&un->un_resync_mx); - - un->un_resync_flg &= ~MM_RF_COMMITING; - cv_broadcast(&un->un_resync_cv); - } - while (un->un_resync_flg & MM_RF_COMMITING) { - if (panicstr) - return (1); - cv_wait(&un->un_resync_cv, &un->un_resync_mx); - } - - for (current_rr = start_rr; current_rr <= end_rr; current_rr++) - CLR_GOING_DIRTY(current_rr, un); - - if (--un->un_waiting_to_commit == 0) { - un->un_resync_flg &= ~MM_RF_GATECLOSED; - cv_broadcast(&un->un_resync_cv); - } - mutex_exit(&un->un_resync_mx); - - return (0); -} - -int -mirror_mark_resync_region(struct mm_unit *un, - diskaddr_t startblk, diskaddr_t endblk, md_mn_nodeid_t source_node) -{ - int mnset = MD_MNSET_SETNO(MD_UN2SET(un)); - - if (mnset && !MD_MN_MIRROR_OWNER(un)) { - return (mirror_mark_resync_region_non_owner(un, startblk, - endblk, source_node)); - } else { - return (mirror_mark_resync_region_owner(un, startblk, endblk, - source_node)); - } -} - -int -mirror_resize_resync_regions(mm_unit_t *un, diskaddr_t new_tb) -{ - short *owp; - optim_resync_t *orp; - uint_t rr_mult = 1; - uint_t old_nregions, new_nregions; - int old_bm_size, new_bm_size; - size_t size; - mddb_recid_t recid, old_recid; - uchar_t *old_dirty_bm; - int i, j; - mddb_type_t typ1; - set_t setno = MD_UN2SET(un); - uchar_t *old_pns; - - old_nregions = un->un_rrd_num; - new_nregions = (uint_t)((new_tb/un->un_rrd_blksize) + 1); - - while (new_nregions > MD_MAX_NUM_RR) { - new_nregions >>= 1; - rr_mult <<= 1; - } - - new_bm_size = howmany(new_nregions, NBBY); - old_bm_size = howmany(old_nregions, NBBY); - - size = new_bm_size + sizeof (*orp) - sizeof (orp->or_rr); - - typ1 = (mddb_type_t)md_getshared_key(setno, - mirror_md_ops.md_driver.md_drivername); - recid = mddb_createrec(size, typ1, RESYNC_REC, - MD_CRO_OPTIMIZE|MD_CRO_32BIT, setno); - if (recid < 0) - return (-1); - - orp = (struct optim_resync *)mddb_getrecaddr(recid); - ASSERT(orp != NULL); - - orp->or_magic = OR_MAGIC; /* Magic # */ - orp->or_blksize = un->un_rrd_blksize; /* Same block size */ - orp->or_num = new_nregions; /* New number of regions */ - - old_dirty_bm = un->un_dirty_bm; - un->un_dirty_bm = orp->or_rr; - - kmem_free((caddr_t)un->un_goingdirty_bm, old_bm_size); - un->un_goingdirty_bm = (uchar_t *)kmem_zalloc(new_bm_size, KM_SLEEP); - - kmem_free((caddr_t)un->un_goingclean_bm, old_bm_size); - un->un_goingclean_bm = (uchar_t *)kmem_zalloc(new_bm_size, KM_SLEEP); - - kmem_free((caddr_t)un->un_resync_bm, old_bm_size); - un->un_resync_bm = (uchar_t *)kmem_zalloc(new_bm_size, KM_SLEEP); - - owp = un->un_outstanding_writes; - un->un_outstanding_writes = (short *)kmem_zalloc( - new_nregions * sizeof (short), KM_SLEEP); - - old_pns = un->un_pernode_dirty_sum; - if (old_pns) - un->un_pernode_dirty_sum = (uchar_t *)kmem_zalloc(new_nregions, - KM_SLEEP); - - /* - * Now translate the old records into the new - * records - */ - for (i = 0; i < old_nregions; i++) { - /* - * only bring forward the - * outstanding write counters and the dirty bits and also - * the pernode_summary counts - */ - if (!isset(old_dirty_bm, i)) - continue; - - setbit(un->un_dirty_bm, (i / rr_mult)); - un->un_outstanding_writes[(i / rr_mult)] += owp[i]; - if (old_pns) - un->un_pernode_dirty_sum[(i / rr_mult)] += old_pns[i]; - } - kmem_free((caddr_t)owp, old_nregions * sizeof (short)); - if (old_pns) - kmem_free((caddr_t)old_pns, old_nregions); - - /* - * Copy all non-zero un_pernode_dirty_bm[] arrays to new versions - */ - for (j = 0; j < MD_MNMAXSIDES; j++) { - rw_enter(&un->un_pernode_dirty_mx[j], RW_WRITER); - old_dirty_bm = un->un_pernode_dirty_bm[j]; - if (old_dirty_bm) { - un->un_pernode_dirty_bm[j] = (uchar_t *)kmem_zalloc( - new_bm_size, KM_SLEEP); - for (i = 0; i < old_nregions; i++) { - if (!isset(old_dirty_bm, i)) - continue; - - setbit(un->un_pernode_dirty_bm[j], - (i / rr_mult)); - } - kmem_free((caddr_t)old_dirty_bm, old_bm_size); - } - rw_exit(&un->un_pernode_dirty_mx[j]); - } - - /* Save the old record id */ - old_recid = un->un_rr_dirty_recid; - - /* Update the mirror unit struct */ - un->un_rr_dirty_recid = recid; - un->un_rrd_num = new_nregions; - un->un_rrd_blksize = un->un_rrd_blksize * rr_mult; - - orp->or_blksize = un->un_rrd_blksize; - - /* - * NOTE: The reason there are distinct calls to mddb_commitrec_wrapper - * instead of using mddb_commitrecs_wrapper, is that you cannot - * atomically commit optimized records. - */ - mddb_commitrec_wrapper(recid); - mddb_commitrec_wrapper(un->c.un_record_id); - mddb_deleterec_wrapper(old_recid); - return (0); -} - -/* lockp can be NULL for !MN diksets */ -int -mirror_add_resync_regions(mm_unit_t *un, diskaddr_t new_tb) -{ - uchar_t *old; - short *owp; - optim_resync_t *orp; - uint_t old_nregions, new_nregions; - int old_bm_size, new_bm_size; - size_t size; - mddb_recid_t recid, old_recid; - mddb_type_t typ1; - set_t setno = MD_UN2SET(un); - int i; - - old_nregions = un->un_rrd_num; - new_nregions = (uint_t)((new_tb/un->un_rrd_blksize) + 1); - - new_bm_size = howmany(new_nregions, NBBY); - old_bm_size = howmany(old_nregions, NBBY); - - size = new_bm_size + sizeof (*orp) - sizeof (orp->or_rr); - - typ1 = (mddb_type_t)md_getshared_key(setno, - mirror_md_ops.md_driver.md_drivername); - - recid = mddb_createrec(size, typ1, RESYNC_REC, - MD_CRO_OPTIMIZE|MD_CRO_32BIT, setno); - if (recid < 0) - return (-1); - - orp = (struct optim_resync *)mddb_getrecaddr(recid); - ASSERT(orp != NULL); - - orp->or_magic = OR_MAGIC; /* Magic # */ - orp->or_blksize = un->un_rrd_blksize; /* Same block size */ - orp->or_num = new_nregions; /* New number of regions */ - - /* Copy the old bm over the new bm */ - bcopy((caddr_t)un->un_dirty_bm, (caddr_t)orp->or_rr, old_bm_size); - - /* - * Create new bigger incore arrays, copy, and free old ones: - * un_goingdirty_bm - * un_goingclean_bm - * un_resync_bm - * un_outstanding_writes - * un_pernode_dirty_sum - * un_pernode_dirty_bm[] - */ - old = un->un_goingdirty_bm; - un->un_goingdirty_bm = (uchar_t *)kmem_zalloc(new_bm_size, KM_SLEEP); - bcopy((caddr_t)old, (caddr_t)un->un_goingdirty_bm, old_bm_size); - kmem_free((caddr_t)old, old_bm_size); - - old = un->un_goingclean_bm; - un->un_goingclean_bm = (uchar_t *)kmem_zalloc(new_bm_size, KM_SLEEP); - bcopy((caddr_t)old, (caddr_t)un->un_goingclean_bm, old_bm_size); - kmem_free((caddr_t)old, old_bm_size); - - old = un->un_resync_bm; - un->un_resync_bm = (uchar_t *)kmem_zalloc(new_bm_size, KM_SLEEP); - bcopy((caddr_t)old, (caddr_t)un->un_resync_bm, old_bm_size); - kmem_free((caddr_t)old, old_bm_size); - - owp = un->un_outstanding_writes; - un->un_outstanding_writes = (short *)kmem_zalloc( - (uint_t)new_nregions * sizeof (short), KM_SLEEP); - bcopy((caddr_t)owp, (caddr_t)un->un_outstanding_writes, - old_nregions * sizeof (short)); - kmem_free((caddr_t)owp, (old_nregions * sizeof (short))); - - old = un->un_pernode_dirty_sum; - if (old) { - un->un_pernode_dirty_sum = (uchar_t *)kmem_zalloc( - new_nregions, KM_SLEEP); - bcopy((caddr_t)old, (caddr_t)un->un_pernode_dirty_sum, - old_nregions); - kmem_free((caddr_t)old, old_nregions); - } - - for (i = 0; i < MD_MNMAXSIDES; i++) { - rw_enter(&un->un_pernode_dirty_mx[i], RW_WRITER); - old = un->un_pernode_dirty_bm[i]; - if (old) { - un->un_pernode_dirty_bm[i] = (uchar_t *)kmem_zalloc( - new_bm_size, KM_SLEEP); - bcopy((caddr_t)old, (caddr_t)un->un_pernode_dirty_bm[i], - old_bm_size); - kmem_free((caddr_t)old, old_bm_size); - } - rw_exit(&un->un_pernode_dirty_mx[i]); - } - - /* Save the old record id */ - old_recid = un->un_rr_dirty_recid; - - /* Update the mirror unit struct */ - un->un_rr_dirty_recid = recid; - un->un_rrd_num = new_nregions; - un->un_dirty_bm = orp->or_rr; - - /* - * NOTE: The reason there are distinct calls to mddb_commitrec_wrapper - * instead of using mddb_commitrecs_wrapper, is that you cannot - * atomically commit optimized records. - */ - mddb_commitrec_wrapper(recid); - mddb_commitrec_wrapper(un->c.un_record_id); - mddb_deleterec_wrapper(old_recid); - return (0); -} - -/* - * mirror_copy_rr: - * -------------- - * Combine the dirty record bitmap with the in-core resync bitmap. This allows - * us to carry a resync over an ownership change. - */ -void -mirror_copy_rr(int sz, uchar_t *src, uchar_t *dest) -{ - int i; - - for (i = 0; i < sz; i++) - *dest++ |= *src++; -} - -/* - * mirror_set_dirty_rr: - * ------------------- - * Set the pernode_dirty_bm[node] entries and un_dirty_bm[] if appropriate. - * For the owning node (DRL/mirror owner) update the on-disk RR if needed. - * Called on every clean->dirty transition for the originating writer node. - * Note: only the non-owning nodes will initiate this message and it is only - * the owning node that has to process it. - */ -int -mirror_set_dirty_rr(md_mn_rr_dirty_params_t *iocp) -{ - - minor_t mnum = iocp->rr_mnum; - mm_unit_t *un; - int start = (int)iocp->rr_start; - int end = (int)iocp->rr_end; - set_t setno = MD_MIN2SET(mnum); - md_mn_nodeid_t orignode = iocp->rr_nodeid; /* 1-based */ - diskaddr_t startblk, endblk; - - mdclrerror(&iocp->mde); - - if ((setno >= md_nsets) || - (MD_MIN2UNIT(mnum) >= md_nunits)) { - return (mdmderror(&iocp->mde, MDE_INVAL_UNIT, mnum)); - } - - /* Must have _NO_ ioctl lock set if we update the RR on-disk */ - un = mirror_getun(mnum, &iocp->mde, NO_LOCK, NULL); - - if (un == NULL) { - return (mdmderror(&iocp->mde, MDE_UNIT_NOT_SETUP, mnum)); - } - if (un->c.un_type != MD_METAMIRROR) { - return (mdmderror(&iocp->mde, MDE_NOT_MM, mnum)); - } - if (orignode < 1 || orignode >= MD_MNMAXSIDES) { - return (mdmderror(&iocp->mde, MDE_INVAL_UNIT, mnum)); - } - if (un->un_nsm < 2) { - return (0); - } - - /* - * Only process this message if we're the owner of the mirror. - */ - if (!MD_MN_MIRROR_OWNER(un)) { - return (0); - } - - RR_TO_BLK(startblk, start, un); - RR_TO_BLK(endblk, end, un); - return (mirror_mark_resync_region_owner(un, startblk, endblk, - orignode)); -} - -/* - * mirror_clean_rr_bits: - * -------------------- - * Clear the pernode_dirty_bm[node] entries which are passed in the bitmap - * Once _all_ references are removed (pernode_dirty_count[x] == 0) this region - * is 'cleanable' and will get flushed out by clearing un_dirty_bm[] on all - * nodes. Callable from ioctl / interrupt / whatever context. - * un_resync_mx is held on entry. - */ -static void -mirror_clean_rr_bits( - md_mn_rr_clean_params_t *iocp) -{ - minor_t mnum = iocp->rr_mnum; - mm_unit_t *un; - uint_t cleared_bits; - md_mn_nodeid_t node = iocp->rr_nodeid - 1; - md_mn_nodeid_t orignode = iocp->rr_nodeid; - int i, start, end; - - un = mirror_getun(mnum, &iocp->mde, NO_LOCK, NULL); - - cleared_bits = 0; - start = MDMN_RR_CLEAN_PARAMS_START_BIT(iocp); - end = start + MDMN_RR_CLEAN_PARAMS_DATA_BYTES(iocp) * NBBY; - rw_enter(&un->un_pernode_dirty_mx[node], RW_READER); - for (i = start; i < end; i++) { - if (isset(MDMN_RR_CLEAN_PARAMS_DATA(iocp), i - start)) { - if (IS_PERNODE_DIRTY(orignode, i, un)) { - un->un_pernode_dirty_sum[i]--; - CLR_PERNODE_DIRTY(orignode, i, un); - } - if (un->un_pernode_dirty_sum[i] == 0) { - cleared_bits++; - CLR_REGION_DIRTY(i, un); - CLR_GOING_CLEAN(i, un); - } - } - } - rw_exit(&un->un_pernode_dirty_mx[node]); - if (cleared_bits) { - /* - * We can only be called iff we are the mirror owner, however - * as this is a (potentially) decoupled routine the ownership - * may have moved from us by the time we get to execute the - * bit clearing. Hence we still need to check for being the - * owner before flushing the DRL to the replica. - */ - if (MD_MN_MIRROR_OWNER(un)) { - mutex_exit(&un->un_resync_mx); - mddb_commitrec_wrapper(un->un_rr_dirty_recid); - mutex_enter(&un->un_resync_mx); - } - } -} - -/* - * mirror_drl_task: - * --------------- - * Service routine for clearing the DRL bits on a deferred MD_MN_RR_CLEAN call - * We need to obtain exclusive access to the un_resync_cv and then clear the - * necessary bits. - * On completion, we must also free the passed in argument as it is allocated - * at the end of the ioctl handler and won't be freed on completion. - */ -static void -mirror_drl_task(void *arg) -{ - md_mn_rr_clean_params_t *iocp = (md_mn_rr_clean_params_t *)arg; - minor_t mnum = iocp->rr_mnum; - mm_unit_t *un; - - un = mirror_getun(mnum, &iocp->mde, NO_LOCK, NULL); - - mutex_enter(&un->un_rrp_inflight_mx); - mutex_enter(&un->un_resync_mx); - un->un_waiting_to_clear++; - while (un->un_resync_flg & MM_RF_STALL_CLEAN) - cv_wait(&un->un_resync_cv, &un->un_resync_mx); - un->un_waiting_to_clear--; - - un->un_resync_flg |= MM_RF_GATECLOSED; - mirror_clean_rr_bits(iocp); - un->un_resync_flg &= ~MM_RF_GATECLOSED; - if (un->un_waiting_to_mark != 0 || un->un_waiting_to_clear != 0) { - cv_broadcast(&un->un_resync_cv); - } - mutex_exit(&un->un_resync_mx); - mutex_exit(&un->un_rrp_inflight_mx); - - kmem_free((caddr_t)iocp, MDMN_RR_CLEAN_PARAMS_SIZE(iocp)); -} - -/* - * mirror_set_clean_rr: - * ------------------- - * Clear the pernode_dirty_bm[node] entries which are passed in the bitmap - * Once _all_ references are removed (pernode_dirty_count[x] == 0) this region - * is 'cleanable' and will get flushed out by clearing un_dirty_bm[] on all - * nodes. - * - * Only the mirror-owner need process this message as it is the only RR updater. - * Non-owner nodes issue this request, but as we have no point-to-point message - * support we will receive the message on all nodes. - */ -int -mirror_set_clean_rr(md_mn_rr_clean_params_t *iocp) -{ - - minor_t mnum = iocp->rr_mnum; - mm_unit_t *un; - set_t setno = MD_MIN2SET(mnum); - md_mn_nodeid_t node = iocp->rr_nodeid - 1; - int can_clear = 0; - md_mn_rr_clean_params_t *newiocp; - int rval = 0; - - mdclrerror(&iocp->mde); - - if ((setno >= md_nsets) || - (MD_MIN2UNIT(mnum) >= md_nunits)) { - return (mdmderror(&iocp->mde, MDE_INVAL_UNIT, mnum)); - } - - /* Must have _NO_ ioctl lock set if we update the RR on-disk */ - un = mirror_getun(mnum, &iocp->mde, NO_LOCK, NULL); - - if (un == NULL) { - return (mdmderror(&iocp->mde, MDE_UNIT_NOT_SETUP, mnum)); - } - if (un->c.un_type != MD_METAMIRROR) { - return (mdmderror(&iocp->mde, MDE_NOT_MM, mnum)); - } - if (un->un_nsm < 2) { - return (0); - } - - /* - * Check to see if we're the mirror owner. If not, there's nothing - * for us to to. - */ - if (!MD_MN_MIRROR_OWNER(un)) { - return (0); - } - - /* - * Process the to-be-cleaned bitmap. We need to update the pernode_dirty - * bits and pernode_dirty_sum[n], and if, and only if, the sum goes 0 - * we can then mark the un_dirty_bm entry as GOINGCLEAN. Alternatively - * we can just defer this cleaning until the next process_resync_regions - * timeout. - */ - rw_enter(&un->un_pernode_dirty_mx[node], RW_WRITER); - if (un->un_pernode_dirty_bm[node] == NULL) { - un->un_pernode_dirty_bm[node] = (uchar_t *)kmem_zalloc( - howmany(un->un_rrd_num, NBBY), KM_SLEEP); - } - rw_exit(&un->un_pernode_dirty_mx[node]); - - /* - * See if we can simply clear the un_dirty_bm[] entries. If we're not - * the issuing node _and_ we aren't in the process of marking/clearing - * the RR bitmaps, we can simply update the bits as needed. - * If we're the owning node and _not_ the issuing node, we should also - * sync the RR if we clear any bits in it. - */ - mutex_enter(&un->un_resync_mx); - can_clear = (un->un_resync_flg & MM_RF_STALL_CLEAN) ? 0 : 1; - if (can_clear) { - un->un_resync_flg |= MM_RF_GATECLOSED; - mirror_clean_rr_bits(iocp); - un->un_resync_flg &= ~MM_RF_GATECLOSED; - if (un->un_waiting_to_mark != 0 || - un->un_waiting_to_clear != 0) { - cv_broadcast(&un->un_resync_cv); - } - } - mutex_exit(&un->un_resync_mx); - - /* - * If we couldn't clear the bits, due to DRL update from m_m_r_r / p_r_r - * we must schedule a blocking call to update the DRL on this node. - * As we're invoked from an ioctl we are going to have the original data - * disappear (kmem_free) once we return. So, copy the data into a new - * structure and let the taskq routine release it on completion. - */ - if (!can_clear) { - size_t sz = MDMN_RR_CLEAN_PARAMS_SIZE(iocp); - - newiocp = (md_mn_rr_clean_params_t *)kmem_alloc(sz, KM_SLEEP); - - bcopy(iocp, newiocp, sz); - - if (ddi_taskq_dispatch(un->un_drl_task, mirror_drl_task, - newiocp, DDI_NOSLEEP) != DDI_SUCCESS) { - kmem_free(newiocp, sz); - rval = ENOMEM; /* probably starvation */ - } - } - - return (rval); -} diff --git a/usr/src/uts/common/io/lvm/notify/md_notify.c b/usr/src/uts/common/io/lvm/notify/md_notify.c deleted file mode 100644 index 693793f285fe..000000000000 --- a/usr/src/uts/common/io/lvm/notify/md_notify.c +++ /dev/null @@ -1,669 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - * Copyright (c) 2011 Bayard G. Bell. All rights reserved. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -md_ops_t event_md_ops; -#ifndef lint -md_ops_t *md_interface_ops = &event_md_ops; -#endif - -extern void sigintr(); -extern void sigunintr(); -extern md_set_t md_set[]; - -extern kmutex_t md_mx; /* used to md global stuff */ -extern kcondvar_t md_cv; /* md_status events */ -extern int md_status; -extern clock_t md_hz; -extern md_event_queue_t *md_event_queue; -static void md_reaper(); -extern void md_clear_named_service(); - -/* event handler stuff */ -kmutex_t md_eventq_mx; -int md_reap_count = 32; /* check for pid alive */ -int md_reap = 0; -int md_max_notify_queue = 512; -int md_reap_off = 0; /* non-zero turns off reap */ -/* don't allow module to be unloaded until all pending ops are complete */ -int global_lock_wait_cnt = 0; - -static int -md_flush_queue(md_event_queue_t *queue) -{ - md_event_t *element, *next_element; - /* - * if there is something waiting on it and the - * process/pid no longer exist then signal the defunct - * process continue on to clean this up later. - */ - if (queue->mdn_waiting) - return (1); - /* - * this pid no longer exists blow it away - * first remove any entries, then unlink it and lastly - * free it. - */ - element = queue->mdn_front; - while (element) { - next_element = element->mdn_next; - kmem_free(element, sizeof (md_event_t)); - element = next_element; - } - queue->mdn_front = queue->mdn_tail = NULL; - return (0); - -} - -static void -md_put_event(md_tags_t tag, set_t sp, md_dev64_t dev, int event, - u_longlong_t user) -{ - - md_event_queue_t *queue; - md_event_t *entry; - - if (!md_event_queue) - return; - - mutex_enter(&md_eventq_mx); - for (queue = md_event_queue; queue; queue = queue->mdn_nextq) { - if (queue->mdn_size >= md_max_notify_queue) { - ASSERT(queue->mdn_front != NULL); - ASSERT(queue->mdn_front->mdn_next != NULL); - entry = queue->mdn_front; - queue->mdn_front = entry->mdn_next; - queue->mdn_size--; - queue->mdn_flags |= MD_EVENT_QUEUE_FULL; - } else - entry = (md_event_t *)kmem_alloc(sizeof (md_event_t), - KM_NOSLEEP); - if (entry == NULL) { - queue->mdn_flags |= MD_EVENT_QUEUE_INVALID; - continue; - } - entry->mdn_tag = tag; - entry->mdn_set = sp; - entry->mdn_dev = dev; - entry->mdn_event = event; - entry->mdn_user = user; - entry->mdn_next = NULL; - uniqtime(&entry->mdn_time); - if (queue->mdn_front == NULL) { - queue->mdn_front = entry; - queue->mdn_tail = entry; - } else { - queue->mdn_tail->mdn_next = entry; - queue->mdn_tail = entry; - } - if (queue->mdn_waiting) - cv_signal(&queue->mdn_cv); - - queue->mdn_size++; - } - md_reap++; - mutex_exit(&md_eventq_mx); - - if (md_reap > md_reap_count) - md_reaper(); -} - -static void -md_reaper() -{ - md_event_queue_t *next = md_event_queue; - md_event_queue_t *present, *last = NULL; - - if (md_event_queue == NULL || md_reap_off) - return; - - mutex_enter(&md_eventq_mx); - while (next) { - present = next; - next = present->mdn_nextq; - - /* check for long term event queue */ - if (present->mdn_flags & MD_EVENT_QUEUE_PERM) { - last = present; - continue; - } - - /* check to see if the pid is still alive */ - if (!md_checkpid(present->mdn_pid, present->mdn_proc)) - present->mdn_flags |= MD_EVENT_QUEUE_DESTROY; - - /* see if queue is a "marked queue" if so destroy */ - if (! (present->mdn_flags & MD_EVENT_QUEUE_DESTROY)) { - last = present; - continue; - } - - /* yeeeha blow this one away */ - present->mdn_pid = 0; - present->mdn_proc = NULL; - /* - * if there is something waiting on it and the - * process/pid no longer exist then signal the defunct - * process continue on to clean this up later. - */ - if (md_flush_queue(present)) { - present->mdn_flags = MD_EVENT_QUEUE_DESTROY; - cv_broadcast(&present->mdn_cv); - last = present; - continue; - } - /* remove the entry */ - if (last == NULL) - md_event_queue = next; - else - last->mdn_nextq = next; - cv_destroy(&present->mdn_cv); - kmem_free(present, sizeof (md_event_queue_t)); - } - md_reap = 0; - mutex_exit(&md_eventq_mx); -} - -/* ARGSUSED */ -static int -notify_halt(md_haltcmd_t cmd, set_t setno) -{ - md_event_queue_t *orig_queue, *queue, *queue_free; - int i; - - - switch (cmd) { - case MD_HALT_CLOSE: - case MD_HALT_OPEN: - case MD_HALT_DOIT: - case MD_HALT_CHECK: - - return (0); - - case MD_HALT_UNLOAD: - if (setno != MD_LOCAL_SET) - return (1); - mutex_enter(&md_eventq_mx); - if (md_event_queue == NULL) { - mutex_exit(&md_eventq_mx); - return (0); - } - - orig_queue = md_event_queue; - md_event_queue = NULL; - for (i = 0; i < MD_NOTIFY_HALT_TRIES; i++) { - for (queue = orig_queue; queue; - queue = queue->mdn_nextq) { - if (queue->mdn_waiting == 0) { - continue; - } - queue->mdn_flags = MD_EVENT_QUEUE_DESTROY; - mutex_exit(&md_eventq_mx); - cv_broadcast(&queue->mdn_cv); - delay(md_hz); - mutex_enter(&md_eventq_mx); - } - } - for (queue = orig_queue; queue; ) { - if (md_flush_queue(queue)) { - cmn_err(CE_WARN, "md: queue not freed"); - mutex_exit(&md_eventq_mx); - return (1); - } - queue_free = queue; - queue = queue->mdn_nextq; - kmem_free(queue_free, sizeof (md_event_queue_t)); - } - md_event_queue = NULL; - mutex_exit(&md_eventq_mx); - return (0); - - default: - return (1); - } -} - -static md_event_queue_t * -md_find_event_queue(char *q_name, int lock) -{ - md_event_queue_t *event_q = md_event_queue; - - if (lock) - mutex_enter(&md_eventq_mx); - ASSERT(MUTEX_HELD(&md_eventq_mx)); - while (event_q) { - if ((*event_q->mdn_name != *q_name) || - (event_q->mdn_flags & MD_EVENT_QUEUE_DESTROY)) { - event_q = event_q->mdn_nextq; - continue; - } - - if (bcmp(q_name, event_q->mdn_name, MD_NOTIFY_NAME_SIZE) == 0) - break; - event_q = event_q->mdn_nextq; - } - if (lock) - mutex_exit(&md_eventq_mx); - - return ((md_event_queue_t *)event_q); -} - -static intptr_t -notify_interface(md_event_cmds_t cmd, md_tags_t tag, set_t set, md_dev64_t dev, - md_event_type_t event) -{ - switch (cmd) { - case EQ_PUT: - md_put_event(tag, set, dev, event, (u_longlong_t)0); - break; - default: - return (-1); - } - return (0); -} - -static int -notify_fillin_empty_ioctl(void *data, void *ioctl_in, size_t sz, - int mode) -{ - - int err; - md_event_ioctl_t *ioctl = (md_event_ioctl_t *)data; - - - ioctl->mdn_event = EQ_EMPTY; - ioctl->mdn_tag = TAG_EMPTY; - ioctl->mdn_set = MD_ALLSETS; - ioctl->mdn_dev = MD_ALLDEVS; - uniqtime32(&ioctl->mdn_time); - ioctl->mdn_user = (u_longlong_t)0; - err = ddi_copyout(data, ioctl_in, sz, mode); - return (err); -} - -/* - * md_wait_for_event: - * IOLOCK_RETURN which drops the md_ioctl_lock is called in this - * routine to enable other mdioctls to enter the kernel while this - * thread of execution waits on an event. When that event occurs, the - * stopped thread wakes and continues and md_ioctl_lock must be - * reacquired. Even though md_ioctl_lock is interruptable, we choose - * to ignore EINTR. Returning w/o acquiring md_ioctl_lock is - * catastrophic since it breaks down ioctl single threading. - * - * Return: 0 md_eventq_mx held - * EINTR md_eventq_mx no held - * Always returns with IOCTL lock held - */ - -static int -md_wait_for_event(md_event_queue_t *event_queue, void *ioctl_in, - md_event_ioctl_t *ioctl, size_t sz, - int mode, IOLOCK *lockp) -{ - int rval = 0; - - while (event_queue->mdn_front == NULL) { - event_queue->mdn_waiting++; - (void) IOLOCK_RETURN(0, lockp); - rval = cv_wait_sig(&event_queue->mdn_cv, &md_eventq_mx); - event_queue->mdn_waiting--; - if ((rval == 0) || (event_queue->mdn_flags & - MD_EVENT_QUEUE_DESTROY)) { - global_lock_wait_cnt++; - mutex_exit(&md_eventq_mx); - /* reenable single threading of ioctls */ - while (md_ioctl_lock_enter() == EINTR); - - (void) notify_fillin_empty_ioctl - ((void *)ioctl, ioctl_in, sz, mode); - mutex_enter(&md_eventq_mx); - global_lock_wait_cnt--; - mutex_exit(&md_eventq_mx); - return (EINTR); - } - /* - * reacquire single threading ioctls. Drop eventq_mutex - * since md_ioctl_lock_enter can sleep. - */ - global_lock_wait_cnt++; - mutex_exit(&md_eventq_mx); - while (md_ioctl_lock_enter() == EINTR); - mutex_enter(&md_eventq_mx); - global_lock_wait_cnt--; - } - return (0); -} - -/* ARGSUSED */ -static int -notify_ioctl(dev_t dev, int icmd, void *ioctl_in, int mode, IOLOCK *lockp) -{ - int cmd; - pid_t pid; - md_event_queue_t *event_queue; - md_event_t *event; - cred_t *credp; - char *q_name; - int err = 0; - size_t sz = 0; - md_event_ioctl_t *ioctl; - - sz = sizeof (*ioctl); - ioctl = kmem_zalloc(sz, KM_SLEEP); - - if (ddi_copyin(ioctl_in, (void *)ioctl, sz, mode)) { - err = EFAULT; - goto out; - } - - if (ioctl->mdn_rev != MD_NOTIFY_REVISION) { - err = EINVAL; - goto out; - } - if (ioctl->mdn_magic != MD_EVENT_ID) { - err = EINVAL; - goto out; - } - - pid = md_getpid(); - cmd = ioctl->mdn_cmd; - q_name = ioctl->mdn_name; - - if (((cmd != EQ_OFF) && (cmd != EQ_ON)) && (md_reap >= md_reap_count)) - md_reaper(); - - if ((cmd != EQ_ON) && (cmd != EQ_PUT)) { - mutex_enter(&md_eventq_mx); - if ((event_queue = md_find_event_queue(q_name, 0)) == NULL) { - mutex_exit(&md_eventq_mx); - (void) notify_fillin_empty_ioctl - ((void *)ioctl, ioctl_in, sz, mode); - err = ENOENT; - goto out; - } - } - - switch (cmd) { - case EQ_ON: - - md_reaper(); - - mutex_enter(&md_eventq_mx); - if (md_find_event_queue(q_name, 0) != NULL) { - mutex_exit(&md_eventq_mx); - err = EEXIST; - break; - } - - /* allocate and initialize queue head */ - event_queue = (md_event_queue_t *) - kmem_alloc(sizeof (md_event_queue_t), KM_NOSLEEP); - if (event_queue == NULL) { - mutex_exit(&md_eventq_mx); - err = ENOMEM; - break; - } - - cv_init(&event_queue->mdn_cv, NULL, CV_DEFAULT, NULL); - - event_queue->mdn_flags = 0; - event_queue->mdn_pid = pid; - event_queue->mdn_proc = md_getproc(); - event_queue->mdn_size = 0; - event_queue->mdn_front = NULL; - event_queue->mdn_tail = NULL; - event_queue->mdn_waiting = 0; - event_queue->mdn_nextq = NULL; - credp = ddi_get_cred(); - event_queue->mdn_uid = crgetuid(credp); - bcopy(q_name, event_queue->mdn_name, - MD_NOTIFY_NAME_SIZE); - if (ioctl->mdn_flags & EQ_Q_PERM) - event_queue->mdn_flags |= MD_EVENT_QUEUE_PERM; - - /* link into the list of event queues */ - if (md_event_queue != NULL) - event_queue->mdn_nextq = md_event_queue; - md_event_queue = event_queue; - mutex_exit(&md_eventq_mx); - err = 0; - break; - - case EQ_OFF: - - if (md_event_queue == NULL) - return (ENOENT); - - event_queue->mdn_flags = MD_EVENT_QUEUE_DESTROY; - event_queue->mdn_pid = 0; - event_queue->mdn_proc = NULL; - - if (event_queue->mdn_waiting != 0) - cv_broadcast(&event_queue->mdn_cv); - - /* - * force the reaper to delete this when it has no process - * waiting on it. - */ - mutex_exit(&md_eventq_mx); - md_reaper(); - err = 0; - break; - - case EQ_GET_NOWAIT: - case EQ_GET_WAIT: - if (cmd == EQ_GET_WAIT) { - err = md_wait_for_event(event_queue, ioctl_in, - ioctl, sz, mode, lockp); - if (err == EINTR) - goto out; - } - ASSERT(MUTEX_HELD(&md_eventq_mx)); - if (event_queue->mdn_flags & - (MD_EVENT_QUEUE_INVALID | MD_EVENT_QUEUE_FULL)) { - event_queue->mdn_flags &= - ~(MD_EVENT_QUEUE_INVALID | MD_EVENT_QUEUE_FULL); - mutex_exit(&md_eventq_mx); - err = notify_fillin_empty_ioctl - ((void *)ioctl, ioctl_in, sz, mode); - ioctl->mdn_event = EQ_NOTIFY_LOST; - err = ddi_copyout((void *)ioctl, ioctl_in, sz, mode); - if (err) - err = EFAULT; - goto out; - } - if (event_queue->mdn_front != NULL) { - event = event_queue->mdn_front; - event_queue->mdn_front = event->mdn_next; - event_queue->mdn_size--; - if (event_queue->mdn_front == NULL) - event_queue->mdn_tail = NULL; - mutex_exit(&md_eventq_mx); - ioctl->mdn_tag = event->mdn_tag; - ioctl->mdn_set = event->mdn_set; - ioctl->mdn_dev = event->mdn_dev; - ioctl->mdn_event = event->mdn_event; - ioctl->mdn_user = event->mdn_user; - ioctl->mdn_time.tv_sec = event->mdn_time.tv_sec; - ioctl->mdn_time.tv_usec = - event->mdn_time.tv_usec; - kmem_free(event, sizeof (md_event_t)); - err = ddi_copyout((void *)ioctl, ioctl_in, sz, mode); - if (err) - err = EFAULT; - goto out; - } else { /* no elements on queue */ - mutex_exit(&md_eventq_mx); - err = notify_fillin_empty_ioctl - ((void *)ioctl, ioctl_in, sz, mode); - if (err) - err = EFAULT; - } - - if (cmd == EQ_GET_NOWAIT) - err = EAGAIN; - goto out; - - case EQ_PUT: - - if (!md_event_queue) { - err = ENOENT; - break; - } - md_put_event(ioctl->mdn_tag, - ioctl->mdn_set, ioctl->mdn_dev, - ioctl->mdn_event, ioctl->mdn_user); - err = 0; - goto out; - - default: - err = EINVAL; - goto out; - } - -out: - kmem_free(ioctl, sz); - return (err); -} - -/* - * Turn orphaned queue off for testing purposes. - */ - -static intptr_t -notify_reap_off() -{ - md_reap_off = 1; - return (0); -} - -/* - * Turn reaping back on. - */ - -static intptr_t -notify_reap_on() -{ - md_reap_off = 0; - return (0); -} - -/* - * Return information that is used to test the notification feature. - */ - -static intptr_t -notify_test_stats(md_notify_stats_t *stats) -{ - stats->mds_eventq_mx = &md_eventq_mx; - stats->mds_reap_count = md_reap_count; - stats->mds_reap = md_reap; - stats->mds_max_queue = md_max_notify_queue; - stats->mds_reap_off = md_reap_off; - return (0); -} - -/* - * put this stuff at end so we don't have to create forward - * references for everything - */ -static struct modlmisc modlmisc = { - &mod_miscops, - "Solaris Volume Manager notification module" -}; - -static struct modlinkage modlinkage = { - MODREV_1, (void *)&modlmisc, NULL -}; - -static md_named_services_t notify_services[] = { - {notify_interface, "notify interface"}, - {notify_reap_off, MD_NOTIFY_REAP_OFF}, - {notify_reap_on, MD_NOTIFY_REAP_ON}, - {notify_test_stats, MD_NOTIFY_TEST_STATS}, - {NULL, 0} -}; - -md_ops_t event_md_ops = { - NULL, /* open */ - NULL, /* close */ - NULL, /* strategy */ - NULL, /* print */ - NULL, /* dump */ - NULL, /* read */ - NULL, /* write */ - notify_ioctl, /* event_ioctls, */ - NULL, /* snarf */ - notify_halt, /* halt */ - NULL, /* aread */ - NULL, /* awrite */ - NULL, /* import set */ - notify_services /* named_services */ -}; - -int -_init() -{ - md_event_queue = NULL; - mutex_init(&md_eventq_mx, NULL, MUTEX_DEFAULT, NULL); - return (mod_install(&modlinkage)); -} - -int -_fini() -{ - int err = 0; - - /* - * Don't allow the module to be unloaded while there is a thread - * of execution that is waiting for a global lock. - */ - if (global_lock_wait_cnt > 0) - return (EBUSY); - - if ((err = mod_remove(&modlinkage)) != 0) - return (err); - - md_clear_named_service(); - mutex_destroy(&md_eventq_mx); - return (err); -} - -int -_info(struct modinfo *modinfop) -{ - return (mod_info(&modlinkage, modinfop)); -} diff --git a/usr/src/uts/common/io/lvm/raid/raid.c b/usr/src/uts/common/io/lvm/raid/raid.c deleted file mode 100644 index 940a3e99df53..000000000000 --- a/usr/src/uts/common/io/lvm/raid/raid.c +++ /dev/null @@ -1,4433 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - * Copyright (c) 2011 Bayard G. Bell. All rights reserved. - */ - -/* - * NAME: raid.c - * - * DESCRIPTION: Main RAID driver source file containing open, close and I/O - * operations. - * - * ROUTINES PROVIDED FOR EXTERNAL USE: - * raid_open() - open the RAID metadevice for access. - * raid_internal_open() - internal open routine of RAID metdevice. - * md_raid_strategy() - perform normal I/O operations, - * such as read and write. - * raid_close() - close the RAID metadevice. - * raid_internal_close() - internal close routine of RAID metadevice. - * raid_snarf() - initialize and clean up MDD records. - * raid_halt() - reset the RAID metadevice - * raid_line() - return the line # of this segment - * raid_dcolumn() - return the data column # of this segment - * raid_pcolumn() - return the parity column # of this segment - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -md_ops_t raid_md_ops; -#ifndef lint -md_ops_t *md_interface_ops = &raid_md_ops; -#endif /* lint */ - -extern unit_t md_nunits; -extern unit_t md_nsets; -extern md_set_t md_set[]; -extern int md_status; -extern major_t md_major; -extern mdq_anchor_t md_done_daemon; -extern mdq_anchor_t md_mstr_daemon; -extern int md_sleep_for_test; -extern clock_t md_hz; - -extern md_event_queue_t *md_event_queue; - - -int pchunks = 16; -int phigh = 1024; -int plow = 128; -int cchunks = 64; -int chigh = 1024; -int clow = 512; -int bchunks = 32; -int bhigh = 256; -int blow = 128; - -int raid_total_io = 0; -int raid_reads = 0; -int raid_writes = 0; -int raid_no_bpmaps = 0; -int raid_512 = 0; -int raid_1024 = 0; -int raid_1024_8192 = 0; -int raid_8192 = 0; -int raid_8192_bigger = 0; -int raid_line_lock_wait = 0; - -int data_buffer_waits = 0; -int parity_buffer_waits = 0; - -/* writer line locks */ -int raid_writer_locks = 0; /* total writer locks */ -int raid_write_waits = 0; /* total writer locks that waited */ -int raid_full_line_writes = 0; /* total full line writes */ -int raid_write_queue_length = 0; /* wait queue length */ -int raid_max_write_q_length = 0; /* maximum queue length */ -int raid_write_locks_active = 0; /* writer locks at any time */ -int raid_max_write_locks = 0; /* maximum writer locks active */ - -/* read line locks */ -int raid_reader_locks = 0; /* total reader locks held */ -int raid_reader_locks_active = 0; /* reader locks held */ -int raid_max_reader_locks = 0; /* maximum reader locks held in run */ -int raid_read_overlaps = 0; /* number of times 2 reads hit same line */ -int raid_read_waits = 0; /* times a reader waited on writer */ - -/* prewrite stats */ -int raid_prewrite_waits = 0; /* number of waits for a pw slot */ -int raid_pw = 0; /* number of pw slots in use */ -int raid_prewrite_max = 0; /* maximum number of pw slots in use */ -int raid_pw_invalidates = 0; - -static clock_t md_wr_wait = 0; - -int nv_available = 0; /* presence of nv-ram support in device */ -int nv_prewrite = 1; /* mark prewrites with nv_available */ -int nv_parity = 1; /* mark parity with nv_available */ - -kmem_cache_t *raid_parent_cache = NULL; -kmem_cache_t *raid_child_cache = NULL; -kmem_cache_t *raid_cbuf_cache = NULL; - -int raid_internal_open(minor_t mnum, int flag, int otyp, - int md_oflags); - -static void freebuffers(md_raidcs_t *cs); -static int raid_read(mr_unit_t *un, md_raidcs_t *cs); -static void raid_read_io(mr_unit_t *un, md_raidcs_t *cs); -static int raid_write(mr_unit_t *un, md_raidcs_t *cs); -static void raid_write_io(mr_unit_t *un, md_raidcs_t *cs); -static void raid_stage(md_raidcs_t *cs); -static void raid_enqueue(md_raidcs_t *cs); -static diskaddr_t raid_line(diskaddr_t segment, mr_unit_t *un); -uint_t raid_dcolumn(diskaddr_t segment, mr_unit_t *un); -static void getpbuffer(md_raidcs_t *cs); -static void getdbuffer(md_raidcs_t *cs); -static void raid_done(buf_t *bp); -static void raid_io_startup(mr_unit_t *un); - -static rus_state_t -raid_col2unit(rcs_state_t state, rus_state_t unitstate) -{ - switch (state) { - case RCS_INIT: - return (RUS_INIT); - case RCS_OKAY: - return (RUS_OKAY); - case RCS_RESYNC: - if (unitstate & RUS_LAST_ERRED) - return (RUS_LAST_ERRED); - else - return (RUS_ERRED); - case RCS_ERRED: - return (RUS_ERRED); - case RCS_LAST_ERRED: - return (RUS_ERRED); - default: - break; - } - panic("raid_col2unit"); - /*NOTREACHED*/ -} - -void -raid_set_state(mr_unit_t *un, int col, rcs_state_t newstate, int force) -{ - - rus_state_t unitstate, origstate; - rcs_state_t colstate; - rcs_state_t orig_colstate; - int errcnt = 0, okaycnt = 0, resynccnt = 0; - int i; - char *devname; - - ASSERT(un); - ASSERT(col < un->un_totalcolumncnt); - ASSERT(newstate & - (RCS_INIT | RCS_INIT_ERRED | RCS_OKAY | RCS_RESYNC | RCS_ERRED | - RCS_LAST_ERRED | RCS_REGEN)); - ASSERT((newstate & - ~(RCS_INIT | RCS_INIT_ERRED | RCS_OKAY | RCS_RESYNC | RCS_ERRED | - RCS_LAST_ERRED | RCS_REGEN)) - == 0); - - ASSERT(MDI_UNIT(MD_SID(un)) ? UNIT_WRITER_HELD(un) : 1); - - unitstate = un->un_state; - origstate = unitstate; - - if (force) { - un->un_column[col].un_devstate = newstate; - un->un_state = raid_col2unit(newstate, unitstate); - uniqtime32(&un->un_column[col].un_devtimestamp); - uniqtime32(&un->un_timestamp); - return; - } - - ASSERT(un->un_state & - (RUS_INIT | RUS_OKAY | RUS_ERRED | RUS_DOI | RUS_LAST_ERRED | - RUS_REGEN)); - ASSERT((un->un_state & ~(RUS_INIT | - RUS_OKAY | RUS_ERRED | RUS_DOI | RUS_LAST_ERRED | RUS_REGEN)) == 0); - - if (un->un_column[col].un_devstate == newstate) - return; - - if (newstate == RCS_REGEN) { - if (raid_state_cnt(un, RCS_OKAY) != un->un_totalcolumncnt) - return; - un->un_state = RUS_REGEN; - return; - } - - orig_colstate = un->un_column[col].un_devstate; - - /* - * if there is another column in the error state then this - * column should go to the last errored state - */ - for (i = 0; i < un->un_totalcolumncnt; i++) { - if (i == col) - colstate = newstate; - else - colstate = un->un_column[i].un_devstate; - if (colstate & (RCS_ERRED | RCS_LAST_ERRED | RCS_INIT_ERRED)) - errcnt++; - if (colstate & RCS_OKAY) - okaycnt++; - if (colstate & RCS_RESYNC) - resynccnt++; - } - ASSERT(resynccnt < 2); - - if (okaycnt == un->un_totalcolumncnt) - unitstate = RUS_OKAY; - else if (errcnt > 1) { - unitstate = RUS_LAST_ERRED; - if (newstate & RCS_ERRED) - newstate = RCS_LAST_ERRED; - } else if (errcnt == 1) - if (!(unitstate & RUS_LAST_ERRED)) - unitstate = RUS_ERRED; - - if (un->un_state == RUS_DOI) - unitstate = RUS_DOI; - - un->un_column[col].un_devstate = newstate; - uniqtime32(&un->un_column[col].un_devtimestamp); - /* - * if there are last errored column being brought back online - * by open or snarf, then be sure to clear the RUS_LAST_ERRED - * bit to allow writes. If there is a real error then the - * column will go back into last erred. - */ - if ((raid_state_cnt(un, RCS_LAST_ERRED) == 0) && - (raid_state_cnt(un, RCS_ERRED) == 1)) - unitstate = RUS_ERRED; - - un->un_state = unitstate; - uniqtime32(&un->un_timestamp); - - if ((! (origstate & (RUS_ERRED|RUS_LAST_ERRED|RUS_DOI))) && - (unitstate & (RUS_ERRED|RUS_LAST_ERRED|RUS_DOI))) { - devname = md_devname(MD_UN2SET(un), - un->un_column[col].un_dev, NULL, 0); - - cmn_err(CE_WARN, "md: %s: %s needs maintenance", - md_shortname(MD_SID(un)), devname); - - if (unitstate & RUS_LAST_ERRED) { - cmn_err(CE_WARN, "md: %s: %s last erred", - md_shortname(MD_SID(un)), devname); - - } else if (un->un_column[col].un_devflags & - MD_RAID_DEV_ISOPEN) { - /* - * Close the broken device and clear the open flag on - * it. We have to check that the device is open, - * otherwise the first open on it has resulted in the - * error that is being processed and the actual un_dev - * will be NODEV64. - */ - md_layered_close(un->un_column[col].un_dev, - MD_OFLG_NULL); - un->un_column[col].un_devflags &= ~MD_RAID_DEV_ISOPEN; - } - } else if (orig_colstate == RCS_LAST_ERRED && newstate == RCS_ERRED && - un->un_column[col].un_devflags & MD_RAID_DEV_ISOPEN) { - /* - * Similar to logic above except no log messages since we - * are just transitioning from Last Erred to Erred. - */ - md_layered_close(un->un_column[col].un_dev, MD_OFLG_NULL); - un->un_column[col].un_devflags &= ~MD_RAID_DEV_ISOPEN; - } - - /* - * If a resync has completed, see if there is a Last Erred - * component that we can change to the Erred state. - */ - if ((orig_colstate == RCS_RESYNC) && (newstate == RCS_OKAY)) { - for (i = 0; i < un->un_totalcolumncnt; i++) { - if (i != col && - (un->un_column[i].un_devstate & RCS_LAST_ERRED)) { - raid_set_state(un, i, RCS_ERRED, 0); - break; - } - } - } -} - -/* - * NAME: erred_check_line - * - * DESCRIPTION: Return the type of write to perform on an erred column based - * upon any resync activity. - * - * if a column is being resynced and the write is above the - * resync point may have to write to the target being resynced. - * - * Column state may make it impossible to do the write - * in which case RCL_EIO or RCL_ENXIO is returned. - * - * If a column cannot be written directly, RCL_ERRED is - * returned and processing should proceed accordingly. - * - * PARAMETERS: minor_t mnum - minor number identity of metadevice - * md_raidcs_t *cs - child save structure - * mr_column_t *dcolumn - pointer to data column structure - * mr_column_t *pcolumn - pointer to parity column structure - * - * RETURNS: RCL_OKAY, RCL_ERRED - * - * LOCKS: Expects Line Writer Lock and Unit Resource Lock to be held - * across call. - */ - -static int -erred_check_line(mr_unit_t *un, md_raidcs_t *cs, mr_column_t *column) -{ - - ASSERT(un != NULL); - ASSERT(cs->cs_flags & MD_RCS_LLOCKD); - - if (column->un_devstate & RCS_OKAY) - return (RCL_OKAY); - - if (column->un_devstate & RCS_ERRED) - return (RCL_ERRED); /* do not read from errored disk */ - - /* - * for the last errored case their are two considerations. - * When the last errored column is the only errored column then - * do treat it like a maintenance column, not doing I/O from - * it. When it there are other failures then just attempt - * to use it. - */ - if (column->un_devstate & RCS_LAST_ERRED) - return (RCL_ERRED); - - ASSERT(column->un_devstate & RCS_RESYNC); - - /* - * When a resync from a hotspare is being done (copy resync) - * then always treat it as an OKAY column, since no regen - * is required. - */ - if (column->un_devflags & MD_RAID_COPY_RESYNC) { - return (RCL_OKAY); - } - - mutex_enter(&un->un_mx); - if (cs->cs_line < un->un_resync_line_index) { - mutex_exit(&un->un_mx); - return (RCL_OKAY); - } - mutex_exit(&un->un_mx); - return (RCL_ERRED); - -} - -/* - * NAMES: raid_state_cnt - * - * DESCRIPTION: counts number of column in a specific state - * - * PARAMETERS: md_raid_t *un - * rcs_state state - */ -int -raid_state_cnt(mr_unit_t *un, rcs_state_t state) -{ - int i, retval = 0; - - for (i = 0; i < un->un_totalcolumncnt; i++) - if (un->un_column[i].un_devstate & state) - retval++; - return (retval); -} - -/* - * NAMES: raid_io_overlaps - * - * DESCRIPTION: checkst for overlap of 2 child save structures - * - * PARAMETERS: md_raidcs_t cs1 - * md_raidcs_t cs2 - * - * RETURNS: 0 - no overlap - * 1 - overlap - */ -int -raid_io_overlaps(md_raidcs_t *cs1, md_raidcs_t *cs2) -{ - if (cs1->cs_blkno > cs2->cs_lastblk) - return (0); - if (cs1->cs_lastblk < cs2->cs_blkno) - return (0); - return (1); -} - -/* - * NAMES: raid_parent_constructor - * DESCRIPTION: parent structure constructor routine - * PARAMETERS: - */ -/*ARGSUSED1*/ -static int -raid_parent_constructor(void *p, void *d1, int d2) -{ - mutex_init(&((md_raidps_t *)p)->ps_mx, - NULL, MUTEX_DEFAULT, NULL); - mutex_init(&((md_raidps_t *)p)->ps_mapin_mx, - NULL, MUTEX_DEFAULT, NULL); - return (0); -} - -void -raid_parent_init(md_raidps_t *ps) -{ - bzero(ps, offsetof(md_raidps_t, ps_mx)); - ((md_raidps_t *)ps)->ps_flags = MD_RPS_INUSE; - ((md_raidps_t *)ps)->ps_magic = RAID_PSMAGIC; -} - -/*ARGSUSED1*/ -static void -raid_parent_destructor(void *p, void *d) -{ - mutex_destroy(&((md_raidps_t *)p)->ps_mx); - mutex_destroy(&((md_raidps_t *)p)->ps_mapin_mx); -} - -/* - * NAMES: raid_child_constructor - * DESCRIPTION: child structure constructor routine - * PARAMETERS: - */ -/*ARGSUSED1*/ -static int -raid_child_constructor(void *p, void *d1, int d2) -{ - md_raidcs_t *cs = (md_raidcs_t *)p; - mutex_init(&cs->cs_mx, NULL, MUTEX_DEFAULT, NULL); - bioinit(&cs->cs_dbuf); - bioinit(&cs->cs_pbuf); - bioinit(&cs->cs_hbuf); - return (0); -} - -void -raid_child_init(md_raidcs_t *cs) -{ - bzero(cs, offsetof(md_raidcs_t, cs_mx)); - - md_bioreset(&cs->cs_dbuf); - md_bioreset(&cs->cs_pbuf); - md_bioreset(&cs->cs_hbuf); - - ((md_raidcs_t *)cs)->cs_dbuf.b_chain = - ((md_raidcs_t *)cs)->cs_pbuf.b_chain = - ((md_raidcs_t *)cs)->cs_hbuf.b_chain = - (struct buf *)(cs); - - cs->cs_magic = RAID_CSMAGIC; - cs->cs_line = MD_DISKADDR_ERROR; - cs->cs_dpwslot = -1; - cs->cs_ppwslot = -1; -} - -/*ARGSUSED1*/ -static void -raid_child_destructor(void *p, void *d) -{ - biofini(&((md_raidcs_t *)p)->cs_dbuf); - biofini(&((md_raidcs_t *)p)->cs_hbuf); - biofini(&((md_raidcs_t *)p)->cs_pbuf); - mutex_destroy(&((md_raidcs_t *)p)->cs_mx); -} - -/*ARGSUSED1*/ -static int -raid_cbuf_constructor(void *p, void *d1, int d2) -{ - bioinit(&((md_raidcbuf_t *)p)->cbuf_bp); - return (0); -} - -static void -raid_cbuf_init(md_raidcbuf_t *cb) -{ - bzero(cb, offsetof(md_raidcbuf_t, cbuf_bp)); - md_bioreset(&cb->cbuf_bp); - cb->cbuf_magic = RAID_BUFMAGIC; - cb->cbuf_pwslot = -1; - cb->cbuf_flags = CBUF_WRITE; -} - -/*ARGSUSED1*/ -static void -raid_cbuf_destructor(void *p, void *d) -{ - biofini(&((md_raidcbuf_t *)p)->cbuf_bp); -} - -/* - * NAMES: raid_run_queue - * DESCRIPTION: spawn a backend processing daemon for RAID metadevice. - * PARAMETERS: - */ -/*ARGSUSED*/ -static void -raid_run_queue(void *d) -{ - if (!(md_status & MD_GBL_DAEMONS_LIVE)) - md_daemon(1, &md_done_daemon); -} - -/* - * NAME: raid_build_pwslot - * DESCRIPTION: builds mr_pw_reserve for the column - * PARAMETERS: un is the pointer to the unit structure - * colindex is the column to create the structure for - */ -int -raid_build_pw_reservation(mr_unit_t *un, int colindex) -{ - mr_pw_reserve_t *pw; - mr_scoreboard_t *sb; - int i; - - pw = (mr_pw_reserve_t *) kmem_zalloc(sizeof (mr_pw_reserve_t) + - (sizeof (mr_scoreboard_t) * un->un_pwcnt), KM_SLEEP); - pw->pw_magic = RAID_PWMAGIC; - pw->pw_column = colindex; - pw->pw_free = un->un_pwcnt; - sb = &pw->pw_sb[0]; - for (i = 0; i < un->un_pwcnt; i++) { - sb[i].sb_column = colindex; - sb[i].sb_flags = SB_UNUSED; - sb[i].sb_start_blk = 0; - sb[i].sb_last_blk = 0; - sb[i].sb_cs = NULL; - } - un->un_column_ic[colindex].un_pw_reserve = pw; - return (0); -} -/* - * NAME: raid_free_pw_reservation - * DESCRIPTION: RAID metadevice pre-write slot structure destroy routine - * PARAMETERS: mr_unit_t *un - pointer to a unit structure - * int colindex - index of the column whose pre-write slot struct - * is to be destroyed. - */ -void -raid_free_pw_reservation(mr_unit_t *un, int colindex) -{ - mr_pw_reserve_t *pw = un->un_column_ic[colindex].un_pw_reserve; - - kmem_free(pw, sizeof (mr_pw_reserve_t) + - (sizeof (mr_scoreboard_t) * un->un_pwcnt)); -} - -/* - * NAME: raid_cancel_pwslot - * DESCRIPTION: RAID metadevice write routine - * PARAMETERS: md_raidcs_t *cs - pointer to a child structure - */ -static void -raid_cancel_pwslot(md_raidcs_t *cs) -{ - mr_unit_t *un = cs->cs_un; - mr_pw_reserve_t *pw; - mr_scoreboard_t *sb; - mr_column_ic_t *col; - md_raidcbuf_t *cbuf; - int broadcast = 0; - - if (cs->cs_ps->ps_flags & MD_RPS_READ) - return; - if (cs->cs_dpwslot != -1) { - col = &un->un_column_ic[cs->cs_dcolumn]; - pw = col->un_pw_reserve; - sb = &pw->pw_sb[cs->cs_dpwslot]; - sb->sb_flags = SB_AVAIL; - if ((pw->pw_free++ == 0) || (un->un_rflags & MD_RFLAG_NEEDPW)) - broadcast++; - sb->sb_cs = NULL; - } - - if (cs->cs_ppwslot != -1) { - col = &un->un_column_ic[cs->cs_pcolumn]; - pw = col->un_pw_reserve; - sb = &pw->pw_sb[cs->cs_ppwslot]; - sb->sb_flags = SB_AVAIL; - if ((pw->pw_free++ == 0) || (un->un_rflags & MD_RFLAG_NEEDPW)) - broadcast++; - sb->sb_cs = NULL; - } - - for (cbuf = cs->cs_buflist; cbuf; cbuf = cbuf->cbuf_next) { - if (cbuf->cbuf_pwslot == -1) - continue; - col = &un->un_column_ic[cbuf->cbuf_column]; - pw = col->un_pw_reserve; - sb = &pw->pw_sb[cbuf->cbuf_pwslot]; - sb->sb_flags = SB_AVAIL; - if ((pw->pw_free++ == 0) || (un->un_rflags & MD_RFLAG_NEEDPW)) - broadcast++; - sb->sb_cs = NULL; - } - if (broadcast) { - cv_broadcast(&un->un_cv); - return; - } - mutex_enter(&un->un_mx); - if (un->un_rflags & MD_RFLAG_NEEDPW) - cv_broadcast(&un->un_cv); - mutex_exit(&un->un_mx); -} - -static void -raid_free_pwinvalidate(md_raidcs_t *cs) -{ - md_raidcbuf_t *cbuf; - md_raidcbuf_t *cbuf_to_free; - mr_unit_t *un = cs->cs_un; - mdi_unit_t *ui = MDI_UNIT(MD_SID(un)); - mr_pw_reserve_t *pw; - mr_scoreboard_t *sb; - int broadcast = 0; - - cbuf = cs->cs_pw_inval_list; - ASSERT(cbuf); - mutex_enter(&un->un_linlck_mx); - while (cbuf) { - pw = un->un_column_ic[cbuf->cbuf_column].un_pw_reserve; - sb = &pw->pw_sb[0]; - ASSERT(sb[cbuf->cbuf_pwslot].sb_flags & SB_INVAL_PEND); - sb[cbuf->cbuf_pwslot].sb_flags = SB_UNUSED; - sb[cbuf->cbuf_pwslot].sb_cs = NULL; - if ((pw->pw_free++ == 0) || (un->un_rflags & MD_RFLAG_NEEDPW)) - broadcast++; - cbuf_to_free = cbuf; - cbuf = cbuf->cbuf_next; - kmem_free(cbuf_to_free->cbuf_buffer, dbtob(un->un_iosize)); - kmem_cache_free(raid_cbuf_cache, cbuf_to_free); - } - cs->cs_pw_inval_list = (md_raidcbuf_t *)NULL; - /* - * now that there is a free prewrite slot, check to see if there - * are any io operations waiting first wake up the raid_io_startup - * then signal the the processes waiting in raid_write. - */ - if (ui->ui_io_lock->io_list_front) - raid_io_startup(un); - mutex_exit(&un->un_linlck_mx); - if (broadcast) { - cv_broadcast(&un->un_cv); - return; - } - mutex_enter(&un->un_mx); - if (un->un_rflags & MD_RFLAG_NEEDPW) - cv_broadcast(&un->un_cv); - mutex_exit(&un->un_mx); -} - - -static int -raid_get_pwslot(md_raidcs_t *cs, int column) -{ - mr_scoreboard_t *sb; - mr_pw_reserve_t *pw; - mr_unit_t *un = cs->cs_un; - diskaddr_t start_blk = cs->cs_blkno; - diskaddr_t last_blk = cs->cs_lastblk; - int i; - int pwcnt = un->un_pwcnt; - int avail = -1; - int use = -1; - int flags; - - - /* start with the data column */ - pw = cs->cs_un->un_column_ic[column].un_pw_reserve; - sb = &pw->pw_sb[0]; - ASSERT(pw->pw_free > 0); - for (i = 0; i < pwcnt; i++) { - flags = sb[i].sb_flags; - if (flags & SB_INVAL_PEND) - continue; - - if ((avail == -1) && (flags & (SB_AVAIL | SB_UNUSED))) - avail = i; - - if ((start_blk > sb[i].sb_last_blk) || - (last_blk < sb[i].sb_start_blk)) - continue; - - /* OVERLAP */ - ASSERT(! (sb[i].sb_flags & SB_INUSE)); - - /* - * raid_invalidate_pwslot attempts to zero out prewrite entry - * in parallel with other disk reads/writes related to current - * transaction. however cs_frags accounting for this case is - * broken because raid_write_io resets cs_frags i.e. ignoring - * that it could have been been set to > 0 value by - * raid_invalidate_pwslot. While this can be fixed an - * additional problem is that we don't seem to handle - * correctly the case of getting a disk error for prewrite - * entry invalidation. - * It does not look like we really need - * to invalidate prewrite slots because raid_replay sorts - * prewrite id's in ascending order and during recovery the - * latest prewrite entry for the same block will be replay - * last. That's why i ifdef'd out the call to - * raid_invalidate_pwslot. --aguzovsk@east - */ - - if (use == -1) { - use = i; - } - } - - ASSERT(avail != -1); - pw->pw_free--; - if (use == -1) - use = avail; - - ASSERT(! (sb[use].sb_flags & SB_INUSE)); - sb[use].sb_flags = SB_INUSE; - sb[use].sb_cs = cs; - sb[use].sb_start_blk = start_blk; - sb[use].sb_last_blk = last_blk; - ASSERT((use >= 0) && (use < un->un_pwcnt)); - return (use); -} - -static int -raid_check_pw(md_raidcs_t *cs) -{ - - mr_unit_t *un = cs->cs_un; - int i; - - ASSERT(! (cs->cs_flags & MD_RCS_HAVE_PW_SLOTS)); - /* - * check to be sure there is a prewrite slot available - * if not just return. - */ - if (cs->cs_flags & MD_RCS_LINE) { - for (i = 0; i < un->un_totalcolumncnt; i++) - if (un->un_column_ic[i].un_pw_reserve->pw_free <= 0) - return (1); - return (0); - } - - if (un->un_column_ic[cs->cs_dcolumn].un_pw_reserve->pw_free <= 0) - return (1); - if (un->un_column_ic[cs->cs_pcolumn].un_pw_reserve->pw_free <= 0) - return (1); - return (0); -} -static int -raid_alloc_pwslot(md_raidcs_t *cs) -{ - mr_unit_t *un = cs->cs_un; - md_raidcbuf_t *cbuf; - - ASSERT(! (cs->cs_flags & MD_RCS_HAVE_PW_SLOTS)); - if (raid_check_pw(cs)) - return (1); - - mutex_enter(&un->un_mx); - un->un_pwid++; - cs->cs_pwid = un->un_pwid; - mutex_exit(&un->un_mx); - - cs->cs_dpwslot = raid_get_pwslot(cs, cs->cs_dcolumn); - for (cbuf = cs->cs_buflist; cbuf; cbuf = cbuf->cbuf_next) { - cbuf->cbuf_pwslot = raid_get_pwslot(cs, cbuf->cbuf_column); - } - cs->cs_ppwslot = raid_get_pwslot(cs, cs->cs_pcolumn); - - cs->cs_flags |= MD_RCS_HAVE_PW_SLOTS; - - return (0); -} - -/* - * NAMES: raid_build_incore - * DESCRIPTION: RAID metadevice incore structure building routine - * PARAMETERS: void *p - pointer to a unit structure - * int snarfing - a flag to indicate snarfing is required - */ -int -raid_build_incore(void *p, int snarfing) -{ - mr_unit_t *un = (mr_unit_t *)p; - minor_t mnum = MD_SID(un); - mddb_recid_t hs_recid = 0; - int i; - int preserve_flags; - mr_column_t *column; - int iosize; - md_dev64_t hs, dev; - int resync_cnt = 0, error_cnt = 0; - - hs = NODEV64; - dev = NODEV64; - - /* clear out bogus pointer incase we return(1) prior to alloc */ - un->mr_ic = NULL; - - if (MD_STATUS(un) & MD_UN_BEING_RESET) { - mddb_setrecprivate(un->c.un_record_id, MD_PRV_PENDCLEAN); - return (1); - } - - if (MD_UNIT(mnum) != NULL) - return (0); - - if (snarfing) - MD_STATUS(un) = 0; - - un->mr_ic = (mr_unit_ic_t *)kmem_zalloc(sizeof (*un->mr_ic), - KM_SLEEP); - - un->un_column_ic = (mr_column_ic_t *) - kmem_zalloc(sizeof (mr_column_ic_t) * - un->un_totalcolumncnt, KM_SLEEP); - - for (i = 0; i < un->un_totalcolumncnt; i++) { - - column = &un->un_column[i]; - preserve_flags = column->un_devflags & - (MD_RAID_COPY_RESYNC | MD_RAID_REGEN_RESYNC); - column->un_devflags &= - ~(MD_RAID_ALT_ISOPEN | MD_RAID_DEV_ISOPEN | - MD_RAID_WRITE_ALT); - if (raid_build_pw_reservation(un, i) != 0) { - /* could not build pwslot */ - return (1); - } - - if (snarfing) { - set_t setno = MD_MIN2SET(mnum); - dev = md_getdevnum(setno, mddb_getsidenum(setno), - column->un_orig_key, MD_NOTRUST_DEVT); - /* - * Comment out instead of remove so we have history - * In the pre-SVM releases stored devt is used so - * as long as there is one snarf is always happy - * even the component is powered off. This is not - * the case in current SVM implementation. NODEV64 - * can be returned and in this case since we resolve - * the devt at 'open' time (first use of metadevice) - * we will allow snarf continue. - * - * if (dev == NODEV64) - * return (1); - */ - - /* - * Setup un_orig_dev from device id info if the device - * is valid (not NODEV64). - */ - if (dev != NODEV64) - column->un_orig_dev = dev; - - if (column->un_devstate & RCS_RESYNC) - resync_cnt++; - if (column->un_devstate & (RCS_ERRED | RCS_LAST_ERRED)) - error_cnt++; - - if (HOTSPARED(un, i)) { - (void) md_hot_spare_ifc(HS_MKDEV, - 0, 0, 0, &column->un_hs_id, NULL, - &hs, NULL); - /* - * Same here - * - * if (hs == NODEV64) - * return (1); - */ - } - - if (HOTSPARED(un, i)) { - if (column->un_devstate & - (RCS_OKAY | RCS_LAST_ERRED)) { - column->un_dev = hs; - column->un_pwstart = - column->un_hs_pwstart; - column->un_devstart = - column->un_hs_devstart; - preserve_flags &= - ~(MD_RAID_COPY_RESYNC | - MD_RAID_REGEN_RESYNC); - } else if (column->un_devstate & RCS_RESYNC) { - /* - * if previous system was 4.0 set - * the direction flags - */ - if ((preserve_flags & - (MD_RAID_COPY_RESYNC | - MD_RAID_REGEN_RESYNC)) == 0) { - if (column->un_alt_dev != - NODEV64) - preserve_flags |= - MD_RAID_COPY_RESYNC; - else - preserve_flags |= - /* CSTYLED */ - MD_RAID_REGEN_RESYNC; - } - } - } else { /* no hot spares */ - column->un_dev = dev; - column->un_pwstart = column->un_orig_pwstart; - column->un_devstart = column->un_orig_devstart; - if (column->un_devstate & RCS_RESYNC) { - preserve_flags |= MD_RAID_REGEN_RESYNC; - preserve_flags &= ~MD_RAID_COPY_RESYNC; - } - } - if (! (column->un_devstate & RCS_RESYNC)) { - preserve_flags &= - ~(MD_RAID_REGEN_RESYNC | - MD_RAID_COPY_RESYNC); - } - - column->un_devflags = preserve_flags; - column->un_alt_dev = NODEV64; - column->un_alt_pwstart = 0; - column->un_alt_devstart = 0; - un->un_resync_line_index = 0; - un->un_resync_index = 0; - un->un_percent_done = 0; - } - } - - if (resync_cnt && error_cnt) { - for (i = 0; i < un->un_totalcolumncnt; i++) { - column = &un->un_column[i]; - if (HOTSPARED(un, i) && - (column->un_devstate & RCS_RESYNC) && - (column->un_devflags & MD_RAID_COPY_RESYNC)) - /* hotspare has data */ - continue; - - if (HOTSPARED(un, i) && - (column->un_devstate & RCS_RESYNC)) { - /* hotspare does not have data */ - raid_hs_release(HS_FREE, un, &hs_recid, i); - column->un_dev = column->un_orig_dev; - column->un_pwstart = column->un_orig_pwstart; - column->un_devstart = column->un_orig_devstart; - mddb_setrecprivate(hs_recid, MD_PRV_PENDCOM); - } - - if (column->un_devstate & RCS_ERRED) - column->un_devstate = RCS_LAST_ERRED; - - if (column->un_devstate & RCS_RESYNC) - column->un_devstate = RCS_ERRED; - } - } - mddb_setrecprivate(un->c.un_record_id, MD_PRV_PENDCOM); - - un->un_pwid = 1; /* or some other possible value */ - un->un_magic = RAID_UNMAGIC; - iosize = un->un_iosize; - un->un_pbuffer = kmem_alloc(dbtob(iosize), KM_SLEEP); - un->un_dbuffer = kmem_alloc(dbtob(iosize), KM_SLEEP); - mutex_init(&un->un_linlck_mx, NULL, MUTEX_DEFAULT, NULL); - cv_init(&un->un_linlck_cv, NULL, CV_DEFAULT, NULL); - un->un_linlck_chn = NULL; - - /* place various information in the in-core data structures */ - md_nblocks_set(mnum, un->c.un_total_blocks); - MD_UNIT(mnum) = un; - - return (0); -} - -/* - * NAMES: reset_raid - * DESCRIPTION: RAID metadevice reset routine - * PARAMETERS: mr_unit_t *un - pointer to a unit structure - * minor_t mnum - RAID metadevice minor number - * int removing - a flag to imply removing device name from - * MDDB database. - */ -void -reset_raid(mr_unit_t *un, minor_t mnum, int removing) -{ - int i, n = 0; - sv_dev_t *sv; - mr_column_t *column; - int column_cnt = un->un_totalcolumncnt; - mddb_recid_t *recids, vtoc_id; - int hserr; - - ASSERT((MDI_UNIT(mnum)->ui_io_lock->io_list_front == NULL) && - (MDI_UNIT(mnum)->ui_io_lock->io_list_back == NULL)); - - md_destroy_unit_incore(mnum, &raid_md_ops); - - md_nblocks_set(mnum, -1ULL); - MD_UNIT(mnum) = NULL; - - if (un->un_pbuffer) { - kmem_free(un->un_pbuffer, dbtob(un->un_iosize)); - un->un_pbuffer = NULL; - } - if (un->un_dbuffer) { - kmem_free(un->un_dbuffer, dbtob(un->un_iosize)); - un->un_dbuffer = NULL; - } - - /* free all pre-write slots created during build incore */ - for (i = 0; i < un->un_totalcolumncnt; i++) - raid_free_pw_reservation(un, i); - - kmem_free(un->un_column_ic, sizeof (mr_column_ic_t) * - un->un_totalcolumncnt); - - kmem_free(un->mr_ic, sizeof (*un->mr_ic)); - - /* - * Attempt release of its minor node - */ - md_remove_minor_node(mnum); - - if (!removing) - return; - - sv = (sv_dev_t *)kmem_zalloc((column_cnt + 1) * sizeof (sv_dev_t), - KM_SLEEP); - - recids = (mddb_recid_t *) - kmem_zalloc((column_cnt + 2) * sizeof (mddb_recid_t), KM_SLEEP); - - for (i = 0; i < column_cnt; i++) { - md_unit_t *comp_un; - md_dev64_t comp_dev; - - column = &un->un_column[i]; - sv[i].setno = MD_MIN2SET(mnum); - sv[i].key = column->un_orig_key; - if (HOTSPARED(un, i)) { - if (column->un_devstate & (RCS_ERRED | RCS_LAST_ERRED)) - hserr = HS_BAD; - else - hserr = HS_FREE; - raid_hs_release(hserr, un, &recids[n++], i); - } - /* - * deparent any metadevices. - * NOTE: currently soft partitions are the only metadevices - * allowed in RAID metadevices. - */ - comp_dev = column->un_dev; - if (md_getmajor(comp_dev) == md_major) { - comp_un = MD_UNIT(md_getminor(comp_dev)); - recids[n++] = MD_RECID(comp_un); - md_reset_parent(comp_dev); - } - } - /* decrement the reference count of the old hsp */ - if (un->un_hsp_id != -1) - (void) md_hot_spare_ifc(HSP_DECREF, un->un_hsp_id, 0, 0, - &recids[n++], NULL, NULL, NULL); - recids[n] = 0; - MD_STATUS(un) |= MD_UN_BEING_RESET; - vtoc_id = un->c.un_vtoc_id; - - raid_commit(un, recids); - - /* - * Remove self from the namespace - */ - if (un->c.un_revision & MD_FN_META_DEV) { - (void) md_rem_selfname(un->c.un_self_id); - } - - /* Remove the unit structure */ - mddb_deleterec_wrapper(un->c.un_record_id); - - /* Remove the vtoc, if present */ - if (vtoc_id) - mddb_deleterec_wrapper(vtoc_id); - md_rem_names(sv, column_cnt); - kmem_free(sv, (column_cnt + 1) * sizeof (sv_dev_t)); - kmem_free(recids, (column_cnt + 2) * sizeof (mddb_recid_t)); - - SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DELETE, SVM_TAG_METADEVICE, - MD_MIN2SET(mnum), mnum); -} - -/* - * NAMES: raid_error_parent - * DESCRIPTION: mark a parent structure in error - * PARAMETERS: md_raidcs_t *cs - pointer to child structure - * int error - error value to set - * NOTE: (TBR) - this routine currently is not in use. - */ -static void -raid_error_parent(md_raidps_t *ps, int error) -{ - mutex_enter(&ps->ps_mx); - ps->ps_flags |= MD_RPS_ERROR; - ps->ps_error = error; - mutex_exit(&ps->ps_mx); -} - -/* - * The following defines tell raid_free_parent - * RFP_RLS_LOCK release the unit reader lock when done. - * RFP_DECR_PWFRAGS decrement ps_pwfrags - * RFP_DECR_FRAGS decrement ps_frags - * RFP_DECR_READFRAGS read keeps FRAGS and PWFRAGS in lockstep - */ -#define RFP_RLS_LOCK 0x00001 -#define RFP_DECR_PWFRAGS 0x00002 -#define RFP_DECR_FRAGS 0x00004 -#define RFP_DECR_READFRAGS (RFP_DECR_PWFRAGS | RFP_DECR_FRAGS) - -/* - * NAMES: raid_free_parent - * DESCRIPTION: free a parent structure - * PARAMETERS: md_raidcs_t *cs - pointer to child structure - * int todo - indicates what needs to be done - */ -static void -raid_free_parent(md_raidps_t *ps, int todo) -{ - mdi_unit_t *ui = ps->ps_ui; - - ASSERT(ps->ps_magic == RAID_PSMAGIC); - ASSERT(ps->ps_flags & MD_RPS_INUSE); - mutex_enter(&ps->ps_mx); - if (todo & RFP_DECR_PWFRAGS) { - ASSERT(ps->ps_pwfrags); - ps->ps_pwfrags--; - if (ps->ps_pwfrags == 0 && (! (ps->ps_flags & MD_RPS_IODONE))) { - if (ps->ps_flags & MD_RPS_ERROR) { - ps->ps_bp->b_flags |= B_ERROR; - ps->ps_bp->b_error = ps->ps_error; - } - md_kstat_done(ui, ps->ps_bp, 0); - biodone(ps->ps_bp); - ps->ps_flags |= MD_RPS_IODONE; - } - } - - if (todo & RFP_DECR_FRAGS) { - ASSERT(ps->ps_frags); - ps->ps_frags--; - } - - if (ps->ps_frags != 0) { - mutex_exit(&ps->ps_mx); - return; - } - - ASSERT((ps->ps_frags == 0) && (ps->ps_pwfrags == 0)); - mutex_exit(&ps->ps_mx); - - if (todo & RFP_RLS_LOCK) - md_io_readerexit(ui); - - if (panicstr) { - ps->ps_flags |= MD_RPS_DONE; - return; - } - - if (ps->ps_flags & MD_RPS_HSREQ) - (void) raid_hotspares(); - - ASSERT(todo & RFP_RLS_LOCK); - ps->ps_flags &= ~MD_RPS_INUSE; - - md_dec_iocount(MD_MIN2SET(ps->ps_un->c.un_self_id)); - - kmem_cache_free(raid_parent_cache, ps); -} - -/* - * NAMES: raid_free_child - * DESCRIPTION: free a parent structure - * PARAMETERS: md_raidcs_t *cs - pointer to child structure - * int drop_locks - 0 for no locks held - * NOTE: (TBR) - this routine currently is not in use. - */ -static void -raid_free_child(md_raidcs_t *cs, int drop_locks) -{ - mr_unit_t *un = cs->cs_un; - md_raidcbuf_t *cbuf, *cbuf1; - - if (cs->cs_pw_inval_list) - raid_free_pwinvalidate(cs); - - if (drop_locks) { - ASSERT(cs->cs_flags & MD_RCS_LLOCKD && - (cs->cs_flags & (MD_RCS_READER | MD_RCS_WRITER))); - md_unit_readerexit(MDI_UNIT(MD_SID(un))); - raid_line_exit(cs); - } else { - ASSERT(!(cs->cs_flags & MD_RCS_LLOCKD)); - } - - freebuffers(cs); - cbuf = cs->cs_buflist; - while (cbuf) { - cbuf1 = cbuf->cbuf_next; - kmem_cache_free(raid_cbuf_cache, cbuf); - cbuf = cbuf1; - } - if (cs->cs_dbuf.b_flags & B_REMAPPED) - bp_mapout(&cs->cs_dbuf); - kmem_cache_free(raid_child_cache, cs); -} - -/* - * NAME: raid_regen_parity - * - * DESCRIPTION: This routine is used to regenerate the parity blocks - * for the entire raid device. It is called from - * both the regen thread and the IO path. - * - * On error the entire device is marked as in error by - * placing the erroring device in error and all other - * devices in last_errored. - * - * PARAMETERS: md_raidcs_t *cs - */ -void -raid_regen_parity(md_raidcs_t *cs) -{ - mr_unit_t *un = cs->cs_un; - mdi_unit_t *ui = MDI_UNIT(un->c.un_self_id); - caddr_t buffer; - caddr_t parity_buffer; - buf_t *bp; - uint_t *dbuf, *pbuf; - uint_t colcnt = un->un_totalcolumncnt; - int column; - int parity_column = cs->cs_pcolumn; - size_t bcount; - int j; - - /* - * This routine uses the data and parity buffers allocated to a - * write. In the case of a read the buffers are allocated and - * freed at the end. - */ - - ASSERT(IO_READER_HELD(un)); - ASSERT(cs->cs_flags & MD_RCS_LLOCKD); - ASSERT(UNIT_READER_HELD(un)); - - if (raid_state_cnt(un, RCS_OKAY) != colcnt) - return; - - if (cs->cs_flags & MD_RCS_READER) { - getpbuffer(cs); - getdbuffer(cs); - } - ASSERT(cs->cs_dbuffer && cs->cs_pbuffer); - bcount = cs->cs_bcount; - buffer = cs->cs_dbuffer; - parity_buffer = cs->cs_pbuffer; - bzero(parity_buffer, bcount); - bp = &cs->cs_dbuf; - for (column = 0; column < colcnt; column++) { - if (column == parity_column) - continue; - reset_buf(bp, B_READ | B_BUSY, bcount); - bp->b_un.b_addr = buffer; - bp->b_edev = md_dev64_to_dev(un->un_column[column].un_dev); - bp->b_lblkno = cs->cs_blkno + un->un_column[column].un_devstart; - bp->b_bcount = bcount; - bp->b_bufsize = bcount; - (void) md_call_strategy(bp, MD_STR_NOTTOP, NULL); - if (biowait(bp)) - goto bail; - pbuf = (uint_t *)(void *)parity_buffer; - dbuf = (uint_t *)(void *)buffer; - for (j = 0; j < (bcount / (sizeof (uint_t))); j++) { - *pbuf = *pbuf ^ *dbuf; - pbuf++; - dbuf++; - } - } - - reset_buf(bp, B_WRITE | B_BUSY, cs->cs_bcount); - bp->b_un.b_addr = parity_buffer; - bp->b_edev = md_dev64_to_dev(un->un_column[parity_column].un_dev); - bp->b_lblkno = cs->cs_blkno + un->un_column[parity_column].un_devstart; - bp->b_bcount = bcount; - bp->b_bufsize = bcount; - (void) md_call_strategy(bp, MD_STR_NOTTOP, NULL); - if (biowait(bp)) - goto bail; - - if (cs->cs_flags & MD_RCS_READER) { - freebuffers(cs); - cs->cs_pbuffer = NULL; - cs->cs_dbuffer = NULL; - } - bp->b_chain = (struct buf *)cs; - return; -bail: - if (cs->cs_flags & MD_RCS_READER) { - freebuffers(cs); - cs->cs_pbuffer = NULL; - cs->cs_dbuffer = NULL; - } - md_unit_readerexit(ui); - un = md_unit_writerlock(ui); - raid_set_state(un, column, RCS_ERRED, 0); - for (column = 0; column < colcnt; column++) - raid_set_state(un, column, RCS_ERRED, 0); - raid_commit(un, NULL); - md_unit_writerexit(ui); - un = md_unit_readerlock(ui); - bp->b_chain = (struct buf *)cs; -} - -/* - * NAMES: raid_error_state - * DESCRIPTION: check unit and column states' impact on I/O error - * NOTE: the state now may not be the state when the - * I/O completed due to race conditions. - * PARAMETERS: mr_unit_t *un - pointer to raid unit structure - * md_raidcs_t *cs - pointer to child structure - * buf_t *bp - pointer to buffer structure - */ -static int -raid_error_state(mr_unit_t *un, buf_t *bp) -{ - int column; - int i; - - ASSERT(IO_READER_HELD(un)); - ASSERT(UNIT_WRITER_HELD(un)); - - column = -1; - for (i = 0; i < un->un_totalcolumncnt; i++) { - if (un->un_column[i].un_dev == md_expldev(bp->b_edev)) { - column = i; - break; - } - if (un->un_column[i].un_alt_dev == md_expldev(bp->b_edev)) { - column = i; - break; - } - } - - /* in case a replace snuck in while waiting on unit writer lock */ - - if (column == -1) { - return (0); - } - - (void) raid_set_state(un, column, RCS_ERRED, 0); - ASSERT(un->un_state & (RUS_ERRED | RUS_LAST_ERRED)); - - raid_commit(un, NULL); - if (un->un_state & RUS_ERRED) { - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ERRED, SVM_TAG_METADEVICE, - MD_UN2SET(un), MD_SID(un)); - } else if (un->un_state & RUS_LAST_ERRED) { - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_LASTERRED, SVM_TAG_METADEVICE, - MD_UN2SET(un), MD_SID(un)); - } - - return (EIO); -} - -/* - * NAME: raid_mapin_buf - * DESCRIPTION: wait for the input buffer header to be maped in - * PARAMETERS: md_raidps_t *ps - */ -static void -raid_mapin_buf(md_raidcs_t *cs) -{ - md_raidps_t *ps = cs->cs_ps; - - /* - * check to see if the buffer is maped. If all is ok return the - * offset of the data and return. Since it is expensive to grab - * a mutex this is only done if the mapin is not complete. - * Once the mutex is aquired it is possible that the mapin was - * not done so recheck and if necessary do the mapin. - */ - if (ps->ps_mapin > 0) { - cs->cs_addr = ps->ps_addr + cs->cs_offset; - return; - } - mutex_enter(&ps->ps_mapin_mx); - if (ps->ps_mapin > 0) { - cs->cs_addr = ps->ps_addr + cs->cs_offset; - mutex_exit(&ps->ps_mapin_mx); - return; - } - bp_mapin(ps->ps_bp); - /* - * get the new b_addr out of the parent since bp_mapin just changed it - */ - ps->ps_addr = ps->ps_bp->b_un.b_addr; - cs->cs_addr = ps->ps_addr + cs->cs_offset; - ps->ps_mapin++; - mutex_exit(&ps->ps_mapin_mx); -} - -/* - * NAMES: raid_read_no_retry - * DESCRIPTION: I/O retry routine for a RAID metadevice read - * read failed attempting to regenerate the data, - * no retry possible, error occured in raid_raidregenloop(). - * PARAMETERS: mr_unit_t *un - pointer to raid unit structure - * md_raidcs_t *cs - pointer to child structure - */ -/*ARGSUSED*/ -static void -raid_read_no_retry(mr_unit_t *un, md_raidcs_t *cs) -{ - md_raidps_t *ps = cs->cs_ps; - - raid_error_parent(ps, EIO); - raid_free_child(cs, 1); - - /* decrement readfrags */ - raid_free_parent(ps, RFP_DECR_READFRAGS | RFP_RLS_LOCK); -} - -/* - * NAMES: raid_read_retry - * DESCRIPTION: I/O retry routine for a RAID metadevice read - * PARAMETERS: md_raidcs_t *cs - pointer to child structure - */ -static void -raid_read_retry(mr_unit_t *un, md_raidcs_t *cs) -{ - /* re-initialize the buf_t structure for raid_read() */ - cs->cs_dbuf.b_chain = (struct buf *)cs; - cs->cs_dbuf.b_back = &cs->cs_dbuf; - cs->cs_dbuf.b_forw = &cs->cs_dbuf; - cs->cs_dbuf.b_flags = B_BUSY; /* initialize flags */ - cs->cs_dbuf.b_error = 0; /* initialize error */ - cs->cs_dbuf.b_offset = -1; - /* Initialize semaphores */ - sema_init(&cs->cs_dbuf.b_io, 0, NULL, - SEMA_DEFAULT, NULL); - sema_init(&cs->cs_dbuf.b_sem, 0, NULL, - SEMA_DEFAULT, NULL); - - cs->cs_pbuf.b_chain = (struct buf *)cs; - cs->cs_pbuf.b_back = &cs->cs_pbuf; - cs->cs_pbuf.b_forw = &cs->cs_pbuf; - cs->cs_pbuf.b_flags = B_BUSY; /* initialize flags */ - cs->cs_pbuf.b_error = 0; /* initialize error */ - cs->cs_pbuf.b_offset = -1; - sema_init(&cs->cs_pbuf.b_io, 0, NULL, - SEMA_DEFAULT, NULL); - sema_init(&cs->cs_pbuf.b_sem, 0, NULL, - SEMA_DEFAULT, NULL); - - cs->cs_flags &= ~MD_RCS_ERROR; /* reset child error flag */ - cs->cs_flags |= MD_RCS_RECOVERY; /* set RECOVERY flag */ - - /* - * re-scheduling I/O with raid_read_io() is simpler. basically, - * raid_read_io() is invoked again with same child structure. - * (NOTE: we aren`t supposed to do any error recovery when an I/O - * error occured in raid_raidregenloop(). - */ - raid_mapin_buf(cs); - raid_read_io(un, cs); -} - -/* - * NAMES: raid_rderr - * DESCRIPTION: I/O error handling routine for a RAID metadevice read - * PARAMETERS: md_raidcs_t *cs - pointer to child structure - * LOCKS: must obtain unit writer lock while calling raid_error_state - * since a unit or column state transition may take place. - * must obtain unit reader lock to retry I/O. - */ -/*ARGSUSED*/ -static void -raid_rderr(md_raidcs_t *cs) -{ - md_raidps_t *ps; - mdi_unit_t *ui; - mr_unit_t *un; - int error = 0; - - ps = cs->cs_ps; - ui = ps->ps_ui; - un = (mr_unit_t *)md_unit_writerlock(ui); - ASSERT(un != 0); - - if (cs->cs_dbuf.b_flags & B_ERROR) - error = raid_error_state(un, &cs->cs_dbuf); - if (cs->cs_pbuf.b_flags & B_ERROR) - error |= raid_error_state(un, &cs->cs_pbuf); - - md_unit_writerexit(ui); - - ps->ps_flags |= MD_RPS_HSREQ; - - un = (mr_unit_t *)md_unit_readerlock(ui); - ASSERT(un != 0); - /* now attempt the appropriate retry routine */ - (*(cs->cs_retry_call))(un, cs); -} - - -/* - * NAMES: raid_read_error - * DESCRIPTION: I/O error handling routine for a RAID metadevice read - * PARAMETERS: md_raidcs_t *cs - pointer to child structure - */ -/*ARGSUSED*/ -static void -raid_read_error(md_raidcs_t *cs) -{ - md_raidps_t *ps; - mdi_unit_t *ui; - mr_unit_t *un; - set_t setno; - - ps = cs->cs_ps; - ui = ps->ps_ui; - un = cs->cs_un; - - setno = MD_UN2SET(un); - - if ((cs->cs_dbuf.b_flags & B_ERROR) && - (COLUMN_STATE(un, cs->cs_dcolumn) != RCS_ERRED) && - (COLUMN_STATE(un, cs->cs_dcolumn) != RCS_LAST_ERRED)) - cmn_err(CE_WARN, "md %s: read error on %s", - md_shortname(MD_SID(un)), - md_devname(setno, md_expldev(cs->cs_dbuf.b_edev), NULL, 0)); - - if ((cs->cs_pbuf.b_flags & B_ERROR) && - (COLUMN_STATE(un, cs->cs_pcolumn) != RCS_ERRED) && - (COLUMN_STATE(un, cs->cs_pcolumn) != RCS_LAST_ERRED)) - cmn_err(CE_WARN, "md %s: read error on %s", - md_shortname(MD_SID(un)), - md_devname(setno, md_expldev(cs->cs_pbuf.b_edev), NULL, 0)); - - md_unit_readerexit(ui); - - ASSERT(cs->cs_frags == 0); - - /* now schedule processing for possible state change */ - daemon_request(&md_mstr_daemon, raid_rderr, - (daemon_queue_t *)cs, REQ_OLD); - -} - -/* - * NAMES: getdbuffer - * DESCRIPTION: data buffer allocation for a child structure - * PARAMETERS: md_raidcs_t *cs - pointer to child structure - * - * NOTE: always get dbuffer before pbuffer - * and get both buffers before pwslot - * otherwise a deadlock could be introduced. - */ -static void -getdbuffer(md_raidcs_t *cs) -{ - mr_unit_t *un; - - cs->cs_dbuffer = kmem_alloc(cs->cs_bcount + DEV_BSIZE, KM_NOSLEEP); - if (cs->cs_dbuffer != NULL) - return; - un = cs->cs_ps->ps_un; - mutex_enter(&un->un_mx); - while (un->un_dbuffer == NULL) { - STAT_INC(data_buffer_waits); - un->un_rflags |= MD_RFLAG_NEEDBUF; - cv_wait(&un->un_cv, &un->un_mx); - } - cs->cs_dbuffer = un->un_dbuffer; - cs->cs_flags |= MD_RCS_UNDBUF; - un->un_dbuffer = NULL; - mutex_exit(&un->un_mx); -} - -/* - * NAMES: getpbuffer - * DESCRIPTION: parity buffer allocation for a child structure - * PARAMETERS: md_raidcs_t *cs - pointer to child structure - * - * NOTE: always get dbuffer before pbuffer - * and get both buffers before pwslot - * otherwise a deadlock could be introduced. - */ -static void -getpbuffer(md_raidcs_t *cs) -{ - mr_unit_t *un; - - cs->cs_pbuffer = kmem_alloc(cs->cs_bcount + DEV_BSIZE, KM_NOSLEEP); - if (cs->cs_pbuffer != NULL) - return; - un = cs->cs_ps->ps_un; - mutex_enter(&un->un_mx); - while (un->un_pbuffer == NULL) { - STAT_INC(parity_buffer_waits); - un->un_rflags |= MD_RFLAG_NEEDBUF; - cv_wait(&un->un_cv, &un->un_mx); - } - cs->cs_pbuffer = un->un_pbuffer; - cs->cs_flags |= MD_RCS_UNPBUF; - un->un_pbuffer = NULL; - mutex_exit(&un->un_mx); -} -static void -getresources(md_raidcs_t *cs) -{ - md_raidcbuf_t *cbuf; - /* - * NOTE: always get dbuffer before pbuffer - * and get both buffers before pwslot - * otherwise a deadlock could be introduced. - */ - getdbuffer(cs); - getpbuffer(cs); - for (cbuf = cs->cs_buflist; cbuf; cbuf = cbuf->cbuf_next) - cbuf->cbuf_buffer = - kmem_alloc(cs->cs_bcount + DEV_BSIZE, KM_SLEEP); -} -/* - * NAMES: freebuffers - * DESCRIPTION: child structure buffer freeing routine - * PARAMETERS: md_raidcs_t *cs - pointer to child structure - */ -static void -freebuffers(md_raidcs_t *cs) -{ - mr_unit_t *un; - md_raidcbuf_t *cbuf; - - /* free buffers used for full line write */ - for (cbuf = cs->cs_buflist; cbuf; cbuf = cbuf->cbuf_next) { - if (cbuf->cbuf_buffer == NULL) - continue; - kmem_free(cbuf->cbuf_buffer, cbuf->cbuf_bcount + DEV_BSIZE); - cbuf->cbuf_buffer = NULL; - cbuf->cbuf_bcount = 0; - } - - if (cs->cs_flags & (MD_RCS_UNDBUF | MD_RCS_UNPBUF)) { - un = cs->cs_un; - mutex_enter(&un->un_mx); - } - if (cs->cs_dbuffer) { - if (cs->cs_flags & MD_RCS_UNDBUF) - un->un_dbuffer = cs->cs_dbuffer; - else - kmem_free(cs->cs_dbuffer, cs->cs_bcount + DEV_BSIZE); - } - if (cs->cs_pbuffer) { - if (cs->cs_flags & MD_RCS_UNPBUF) - un->un_pbuffer = cs->cs_pbuffer; - else - kmem_free(cs->cs_pbuffer, cs->cs_bcount + DEV_BSIZE); - } - if (cs->cs_flags & (MD_RCS_UNDBUF | MD_RCS_UNPBUF)) { - un->un_rflags &= ~MD_RFLAG_NEEDBUF; - cv_broadcast(&un->un_cv); - mutex_exit(&un->un_mx); - } -} - -/* - * NAMES: raid_line_reader_lock, raid_line_writer_lock - * DESCRIPTION: RAID metadevice line reader and writer lock routines - * data column # and parity column #. - * PARAMETERS: md_raidcs_t *cs - pointer to child structure - */ - -void -raid_line_reader_lock(md_raidcs_t *cs, int resync_thread) -{ - mr_unit_t *un; - md_raidcs_t *cs1; - - ASSERT(cs->cs_line != MD_DISKADDR_ERROR); - un = cs->cs_un; - cs->cs_flags |= MD_RCS_READER; - STAT_CHECK(raid_line_lock_wait, MUTEX_HELD(&un->un_linlck_mx)); - if (!panicstr) - mutex_enter(&un->un_linlck_mx); - cs1 = un->un_linlck_chn; - while (cs1 != NULL) { - for (cs1 = un->un_linlck_chn; cs1; cs1 = cs1->cs_linlck_next) - if (raid_io_overlaps(cs, cs1) == 1) - if (cs1->cs_flags & MD_RCS_WRITER) - break; - - if (cs1 != NULL) { - if (panicstr) - panic("md; raid line write lock held"); - un->un_linlck_flg = 1; - cv_wait(&un->un_linlck_cv, &un->un_linlck_mx); - STAT_INC(raid_read_waits); - } - } - STAT_MAX(raid_max_reader_locks, raid_reader_locks_active); - STAT_INC(raid_reader_locks); - cs1 = un->un_linlck_chn; - if (cs1 != NULL) - cs1->cs_linlck_prev = cs; - cs->cs_linlck_next = cs1; - cs->cs_linlck_prev = NULL; - un->un_linlck_chn = cs; - cs->cs_flags |= MD_RCS_LLOCKD; - if (resync_thread) { - diskaddr_t lastblk = cs->cs_blkno + cs->cs_blkcnt - 1; - diskaddr_t line = (lastblk + 1) / un->un_segsize; - ASSERT(raid_state_cnt(un, RCS_RESYNC)); - mutex_enter(&un->un_mx); - un->un_resync_line_index = line; - mutex_exit(&un->un_mx); - } - if (!panicstr) - mutex_exit(&un->un_linlck_mx); -} - -int -raid_line_writer_lock(md_raidcs_t *cs, int lock) -{ - mr_unit_t *un; - md_raidcs_t *cs1; - - ASSERT(cs->cs_line != MD_DISKADDR_ERROR); - cs->cs_flags |= MD_RCS_WRITER; - un = cs->cs_ps->ps_un; - - STAT_CHECK(raid_line_lock_wait, MUTEX_HELD(&un->un_linlck_mx)); - if (lock && !panicstr) - mutex_enter(&un->un_linlck_mx); - ASSERT(MUTEX_HELD(&un->un_linlck_mx)); - - cs1 = un->un_linlck_chn; - for (cs1 = un->un_linlck_chn; cs1; cs1 = cs1->cs_linlck_next) - if (raid_io_overlaps(cs, cs1)) - break; - - if (cs1 != NULL) { - if (panicstr) - panic("md: line writer lock inaccessible"); - goto no_lock_exit; - } - - if (raid_alloc_pwslot(cs)) { - if (panicstr) - panic("md: no prewrite slots"); - STAT_INC(raid_prewrite_waits); - goto no_lock_exit; - } - - cs1 = un->un_linlck_chn; - if (cs1 != NULL) - cs1->cs_linlck_prev = cs; - cs->cs_linlck_next = cs1; - cs->cs_linlck_prev = NULL; - un->un_linlck_chn = cs; - cs->cs_flags |= MD_RCS_LLOCKD; - cs->cs_flags &= ~MD_RCS_WAITING; - STAT_INC(raid_writer_locks); - STAT_MAX(raid_max_write_locks, raid_write_locks_active); - if (lock && !panicstr) - mutex_exit(&un->un_linlck_mx); - return (0); - -no_lock_exit: - /* if this is already queued then do not requeue it */ - ASSERT(! (cs->cs_flags & MD_RCS_LLOCKD)); - if (!lock || (cs->cs_flags & MD_RCS_WAITING)) - return (1); - cs->cs_flags |= MD_RCS_WAITING; - cs->cs_un = un; - raid_enqueue(cs); - if (lock && !panicstr) - mutex_exit(&un->un_linlck_mx); - return (1); -} - -static void -raid_startio(md_raidcs_t *cs) -{ - mdi_unit_t *ui = cs->cs_ps->ps_ui; - mr_unit_t *un = cs->cs_un; - - un = md_unit_readerlock(ui); - raid_write_io(un, cs); -} - -void -raid_io_startup(mr_unit_t *un) -{ - md_raidcs_t *waiting_list, *cs1; - md_raidcs_t *previous = NULL, *next = NULL; - mdi_unit_t *ui = MDI_UNIT(un->c.un_self_id); - kmutex_t *io_list_mutex = &ui->ui_io_lock->io_list_mutex; - - ASSERT(MUTEX_HELD(&un->un_linlck_mx)); - mutex_enter(io_list_mutex); - - /* - * check to be sure there are no reader locks outstanding. If - * there are not then pass on the writer lock. - */ - waiting_list = ui->ui_io_lock->io_list_front; - while (waiting_list) { - ASSERT(waiting_list->cs_flags & MD_RCS_WAITING); - ASSERT(! (waiting_list->cs_flags & MD_RCS_LLOCKD)); - for (cs1 = un->un_linlck_chn; cs1; cs1 = cs1->cs_linlck_next) - if (raid_io_overlaps(waiting_list, cs1) == 1) - break; - /* - * there was an IOs that overlaps this io so go onto - * the next io in the waiting list - */ - if (cs1) { - previous = waiting_list; - waiting_list = waiting_list->cs_linlck_next; - continue; - } - - /* - * There are no IOs that overlap this, so remove it from - * the waiting queue, and start it - */ - - if (raid_check_pw(waiting_list)) { - ASSERT(waiting_list->cs_flags & MD_RCS_WAITING); - previous = waiting_list; - waiting_list = waiting_list->cs_linlck_next; - continue; - } - ASSERT(waiting_list->cs_flags & MD_RCS_WAITING); - - next = waiting_list->cs_linlck_next; - if (previous) - previous->cs_linlck_next = next; - else - ui->ui_io_lock->io_list_front = next; - - if (ui->ui_io_lock->io_list_front == NULL) - ui->ui_io_lock->io_list_back = NULL; - - if (ui->ui_io_lock->io_list_back == waiting_list) - ui->ui_io_lock->io_list_back = previous; - - waiting_list->cs_linlck_next = NULL; - waiting_list->cs_flags &= ~MD_RCS_WAITING; - STAT_DEC(raid_write_queue_length); - if (raid_line_writer_lock(waiting_list, 0)) - panic("region locking corrupted"); - - ASSERT(waiting_list->cs_flags & MD_RCS_LLOCKD); - daemon_request(&md_mstr_daemon, raid_startio, - (daemon_queue_t *)waiting_list, REQ_OLD); - waiting_list = next; - - } - mutex_exit(io_list_mutex); -} - -void -raid_line_exit(md_raidcs_t *cs) -{ - mr_unit_t *un; - - un = cs->cs_ps->ps_un; - STAT_CHECK(raid_line_lock_wait, MUTEX_HELD(&un->un_linlck_mx)); - mutex_enter(&un->un_linlck_mx); - if (cs->cs_flags & MD_RCS_READER) - STAT_DEC(raid_reader_locks_active); - else - STAT_DEC(raid_write_locks_active); - - if (cs->cs_linlck_prev) - cs->cs_linlck_prev->cs_linlck_next = cs->cs_linlck_next; - else - un->un_linlck_chn = cs->cs_linlck_next; - if (cs->cs_linlck_next) - cs->cs_linlck_next->cs_linlck_prev = cs->cs_linlck_prev; - - cs->cs_flags &= ~MD_RCS_LLOCKD; - - if (un->un_linlck_flg) - cv_broadcast(&un->un_linlck_cv); - - un->un_linlck_flg = 0; - cs->cs_line = MD_DISKADDR_ERROR; - - raid_cancel_pwslot(cs); - /* - * now that the lock is droped go ahead and see if there are any - * other writes that can be started up - */ - raid_io_startup(un); - - mutex_exit(&un->un_linlck_mx); -} - -/* - * NAMES: raid_line, raid_pcolumn, raid_dcolumn - * DESCRIPTION: RAID metadevice APIs for mapping segment # to line #, - * data column # and parity column #. - * PARAMETERS: int segment - segment number - * mr_unit_t *un - pointer to an unit structure - * RETURNS: raid_line returns line # - * raid_dcolumn returns data column # - * raid_pcolumn returns parity column # - */ -static diskaddr_t -raid_line(diskaddr_t segment, mr_unit_t *un) -{ - diskaddr_t adj_seg; - diskaddr_t line; - diskaddr_t max_orig_segment; - - max_orig_segment = (un->un_origcolumncnt - 1) * un->un_segsincolumn; - if (segment >= max_orig_segment) { - adj_seg = segment - max_orig_segment; - line = adj_seg % un->un_segsincolumn; - } else { - line = segment / (un->un_origcolumncnt - 1); - } - return (line); -} - -uint_t -raid_dcolumn(diskaddr_t segment, mr_unit_t *un) -{ - diskaddr_t adj_seg; - diskaddr_t line; - diskaddr_t max_orig_segment; - uint_t column; - - max_orig_segment = (un->un_origcolumncnt - 1) * un->un_segsincolumn; - if (segment >= max_orig_segment) { - adj_seg = segment - max_orig_segment; - column = un->un_origcolumncnt + - (uint_t)(adj_seg / un->un_segsincolumn); - } else { - line = segment / (un->un_origcolumncnt - 1); - column = (uint_t)((segment % - (un->un_origcolumncnt - 1) + line) % un->un_origcolumncnt); - } - return (column); -} - -uint_t -raid_pcolumn(diskaddr_t segment, mr_unit_t *un) -{ - diskaddr_t adj_seg; - diskaddr_t line; - diskaddr_t max_orig_segment; - uint_t column; - - max_orig_segment = (un->un_origcolumncnt - 1) * un->un_segsincolumn; - if (segment >= max_orig_segment) { - adj_seg = segment - max_orig_segment; - line = adj_seg % un->un_segsincolumn; - } else { - line = segment / (un->un_origcolumncnt - 1); - } - column = (uint_t)((line + (un->un_origcolumncnt - 1)) % - un->un_origcolumncnt); - return (column); -} - - -/* - * Is called in raid_iosetup to probe each column to insure - * that all the columns are in 'okay' state and meet the - * 'full line' requirement. If any column is in error, - * we don't want to enable the 'full line' flag. Previously, - * we would do so and disable it only when a error is - * detected after the first 'full line' io which is too late - * and leads to the potential data corruption. - */ -static int -raid_check_cols(mr_unit_t *un) -{ - buf_t bp; - char *buf; - mr_column_t *colptr; - minor_t mnum = MD_SID(un); - int i; - int err = 0; - - buf = kmem_zalloc((uint_t)DEV_BSIZE, KM_SLEEP); - - for (i = 0; i < un->un_totalcolumncnt; i++) { - md_dev64_t tmpdev; - - colptr = &un->un_column[i]; - - tmpdev = colptr->un_dev; - /* - * Open by device id - * If this device is hotspared - * use the hotspare key - */ - tmpdev = md_resolve_bydevid(mnum, tmpdev, HOTSPARED(un, i) ? - colptr->un_hs_key : colptr->un_orig_key); - - if (tmpdev == NODEV64) { - err = 1; - break; - } - - colptr->un_dev = tmpdev; - - bzero((caddr_t)&bp, sizeof (buf_t)); - bp.b_back = &bp; - bp.b_forw = &bp; - bp.b_flags = (B_READ | B_BUSY); - sema_init(&bp.b_io, 0, NULL, - SEMA_DEFAULT, NULL); - sema_init(&bp.b_sem, 0, NULL, - SEMA_DEFAULT, NULL); - bp.b_edev = md_dev64_to_dev(colptr->un_dev); - bp.b_lblkno = colptr->un_pwstart; - bp.b_bcount = DEV_BSIZE; - bp.b_bufsize = DEV_BSIZE; - bp.b_un.b_addr = (caddr_t)buf; - (void) md_call_strategy(&bp, 0, NULL); - if (biowait(&bp)) { - err = 1; - break; - } - } - - kmem_free(buf, DEV_BSIZE); - return (err); -} - -/* - * NAME: raid_iosetup - * DESCRIPTION: RAID metadevice specific I/O set up routine which does - * all the necessary calculations to determine the location - * of the segement for the I/O. - * PARAMETERS: mr_unit_t *un - unit number of RAID metadevice - * diskaddr_t blkno - block number of the I/O attempt - * size_t blkcnt - block count for this I/O - * md_raidcs_t *cs - child structure for each segmented I/O - * - * NOTE: The following is an example of a raid disk layer out: - * - * Total Column = 5 - * Original Column = 4 - * Segment Per Column = 10 - * - * Col#0 Col#1 Col#2 Col#3 Col#4 Col#5 Col#6 - * ------------------------------------------------------------- - * line#0 Seg#0 Seg#1 Seg#2 Parity Seg#30 Seg#40 - * line#1 Parity Seg#3 Seg#4 Seg#5 Seg#31 - * line#2 Seg#8 Parity Seg#6 Seg#7 Seg#32 - * line#3 Seg#10 Seg#11 Parity Seg#9 Seg#33 - * line#4 Seg#12 Seg#13 Seg#14 Parity Seg#34 - * line#5 Parity Seg#15 Seg#16 Seg#17 Seg#35 - * line#6 Seg#20 Parity Seg#18 Seg#19 Seg#36 - * line#7 Seg#22 Seg#23 Parity Seg#21 Seg#37 - * line#8 Seg#24 Seg#25 Seg#26 Parity Seg#38 - * line#9 Parity Seg#27 Seg#28 Seg#29 Seg#39 - */ -static size_t -raid_iosetup( - mr_unit_t *un, - diskaddr_t blkno, - size_t blkcnt, - md_raidcs_t *cs -) -{ - diskaddr_t segment; - diskaddr_t segstart; - diskaddr_t segoff; - size_t leftover; - diskaddr_t line; - uint_t iosize; - uint_t colcnt; - - /* caculate the segment# and offset for the block */ - segment = blkno / un->un_segsize; - segstart = segment * un->un_segsize; - segoff = blkno - segstart; - iosize = un->un_iosize - 1; - colcnt = un->un_totalcolumncnt - 1; - line = raid_line(segment, un); - cs->cs_dcolumn = raid_dcolumn(segment, un); - cs->cs_pcolumn = raid_pcolumn(segment, un); - cs->cs_dflags = un->un_column[cs->cs_dcolumn].un_devflags; - cs->cs_pflags = un->un_column[cs->cs_pcolumn].un_devflags; - cs->cs_line = line; - - if ((cs->cs_ps->ps_flags & MD_RPS_WRITE) && - (UNIT_STATE(un) & RCS_OKAY) && - (segoff == 0) && - (un->un_totalcolumncnt == un->un_origcolumncnt) && - (un->un_segsize < un->un_iosize) && - (un->un_iosize <= un->un_maxio) && - (blkno == line * un->un_segsize * colcnt) && - (blkcnt >= ((un->un_totalcolumncnt -1) * un->un_segsize)) && - (raid_state_cnt(un, RCS_OKAY) == un->un_origcolumncnt) && - (raid_check_cols(un) == 0)) { - - md_raidcbuf_t **cbufp; - md_raidcbuf_t *cbuf; - int i, j; - - STAT_INC(raid_full_line_writes); - leftover = blkcnt - (un->un_segsize * colcnt); - ASSERT(blkcnt >= (un->un_segsize * colcnt)); - cs->cs_blkno = line * un->un_segsize; - cs->cs_blkcnt = un->un_segsize; - cs->cs_lastblk = cs->cs_blkno + cs->cs_blkcnt - 1; - cs->cs_bcount = dbtob(cs->cs_blkcnt); - cs->cs_flags |= MD_RCS_LINE; - - cbufp = &cs->cs_buflist; - for (i = 0; i < un->un_totalcolumncnt; i++) { - j = cs->cs_dcolumn + i; - j = j % un->un_totalcolumncnt; - - if ((j == cs->cs_dcolumn) || (j == cs->cs_pcolumn)) - continue; - cbuf = kmem_cache_alloc(raid_cbuf_cache, - MD_ALLOCFLAGS); - raid_cbuf_init(cbuf); - cbuf->cbuf_un = cs->cs_un; - cbuf->cbuf_ps = cs->cs_ps; - cbuf->cbuf_column = j; - cbuf->cbuf_bcount = dbtob(un->un_segsize); - *cbufp = cbuf; - cbufp = &cbuf->cbuf_next; - } - return (leftover); - } - - leftover = blkcnt - (un->un_segsize - segoff); - if (blkcnt > (un->un_segsize - segoff)) - blkcnt -= leftover; - else - leftover = 0; - - if (blkcnt > (size_t)iosize) { - leftover += (blkcnt - iosize); - blkcnt = iosize; - } - - /* calculate the line# and column# for the segment */ - cs->cs_flags &= ~MD_RCS_LINE; - cs->cs_blkno = line * un->un_segsize + segoff; - cs->cs_blkcnt = (uint_t)blkcnt; - cs->cs_lastblk = cs->cs_blkno + cs->cs_blkcnt - 1; - cs->cs_bcount = dbtob((uint_t)blkcnt); - return (leftover); -} - -/* - * NAME: raid_done - * DESCRIPTION: RAID metadevice I/O done interrupt routine - * PARAMETERS: struct buf *bp - pointer to a buffer structure - */ -static void -raid_done(struct buf *bp) -{ - md_raidcs_t *cs; - int flags, frags; - - sema_v(&bp->b_io); - cs = (md_raidcs_t *)bp->b_chain; - - ASSERT(cs != NULL); - - mutex_enter(&cs->cs_mx); - if (bp->b_flags & B_ERROR) { - cs->cs_flags |= MD_RCS_ERROR; - cs->cs_flags &= ~(MD_RCS_ISCALL); - } - - flags = cs->cs_flags; - frags = --cs->cs_frags; - mutex_exit(&cs->cs_mx); - if (frags != 0) { - return; - } - - if (flags & MD_RCS_ERROR) { - if (cs->cs_error_call) { - daemon_request(&md_done_daemon, cs->cs_error_call, - (daemon_queue_t *)cs, REQ_OLD); - } - return; - } - - if (flags & MD_RCS_ISCALL) { - cs->cs_flags &= ~(MD_RCS_ISCALL); - (*(cs->cs_call))(cs); - return; - } - daemon_request(&md_done_daemon, cs->cs_call, - (daemon_queue_t *)cs, REQ_OLD); -} -/* - * the flag RIO_EXTRA is used when dealing with a column in the process - * of being resynced. During the resync, writes may have to take place - * on both the original component and a hotspare component. - */ -#define RIO_DATA 0x00100 /* use data buffer & data column */ -#define RIO_PARITY 0x00200 /* use parity buffer & parity column */ -#define RIO_WRITE 0x00400 /* issue a write */ -#define RIO_READ 0x00800 /* issue a read */ -#define RIO_PWIO 0x01000 /* do the I/O to the prewrite entry */ -#define RIO_ALT 0x02000 /* do write to alternate device */ -#define RIO_EXTRA 0x04000 /* use extra buffer */ - -#define RIO_COLMASK 0x000ff - -#define RIO_PREWRITE RIO_WRITE | RIO_PWIO - -/* - * NAME: raidio - * DESCRIPTION: RAID metadevice write routine - * PARAMETERS: md_raidcs_t *cs - pointer to a child structure - */ -static void -raidio(md_raidcs_t *cs, int flags) -{ - buf_t *bp; - int column; - int flag; - void *private; - mr_unit_t *un; - int iosize; - diskaddr_t pwstart; - diskaddr_t devstart; - md_dev64_t dev; - - un = cs->cs_un; - - ASSERT(IO_READER_HELD(un)); - ASSERT(UNIT_READER_HELD(un)); - - if (flags & RIO_DATA) { - if (flags & RIO_EXTRA) - bp = &cs->cs_hbuf; - else - bp = &cs->cs_dbuf; - bp->b_un.b_addr = cs->cs_dbuffer; - column = cs->cs_dcolumn; - } else { - if (flags & RIO_EXTRA) - bp = &cs->cs_hbuf; - else - bp = &cs->cs_pbuf; - bp->b_un.b_addr = cs->cs_pbuffer; - column = cs->cs_pcolumn; - } - if (flags & RIO_COLMASK) - column = (flags & RIO_COLMASK) - 1; - - bp->b_bcount = cs->cs_bcount; - bp->b_bufsize = cs->cs_bcount; - iosize = un->un_iosize; - - /* check if the hotspared device will be used */ - if (flags & RIO_ALT && (flags & RIO_WRITE)) { - pwstart = un->un_column[column].un_alt_pwstart; - devstart = un->un_column[column].un_alt_devstart; - dev = un->un_column[column].un_alt_dev; - } else { - pwstart = un->un_column[column].un_pwstart; - devstart = un->un_column[column].un_devstart; - dev = un->un_column[column].un_dev; - } - - /* if not writing to log skip log header */ - if ((flags & RIO_PWIO) == 0) { - bp->b_lblkno = devstart + cs->cs_blkno; - bp->b_un.b_addr += DEV_BSIZE; - } else { - bp->b_bcount += DEV_BSIZE; - bp->b_bufsize = bp->b_bcount; - if (flags & RIO_DATA) { - bp->b_lblkno = cs->cs_dpwslot * iosize + pwstart; - } else { /* not DATA -> PARITY */ - bp->b_lblkno = cs->cs_ppwslot * iosize + pwstart; - } - } - - bp->b_flags &= ~(B_READ | B_WRITE | B_ERROR | nv_available); - bp->b_flags |= B_BUSY; - if (flags & RIO_READ) { - bp->b_flags |= B_READ; - } else { - bp->b_flags |= B_WRITE; - if ((nv_available && nv_parity && (flags & RIO_PARITY)) || - (nv_available && nv_prewrite && (flags & RIO_PWIO))) - bp->b_flags |= nv_available; - } - bp->b_iodone = (int (*)())raid_done; - bp->b_edev = md_dev64_to_dev(dev); - - ASSERT((bp->b_edev != 0) && (bp->b_edev != NODEV)); - - private = cs->cs_strategy_private; - flag = cs->cs_strategy_flag; - - md_call_strategy(bp, flag, private); -} - -/* - * NAME: genstandardparity - * DESCRIPTION: This routine - * PARAMETERS: md_raidcs_t *cs - pointer to a child structure - */ -static void -genstandardparity(md_raidcs_t *cs) -{ - uint_t *dbuf, *pbuf; - size_t wordcnt; - uint_t dsum = 0; - uint_t psum = 0; - - ASSERT((cs->cs_bcount & 0x3) == 0); - - wordcnt = cs->cs_bcount / sizeof (uint_t); - - dbuf = (uint_t *)(void *)(cs->cs_dbuffer + DEV_BSIZE); - pbuf = (uint_t *)(void *)(cs->cs_pbuffer + DEV_BSIZE); - - /* Word aligned */ - if (((uintptr_t)cs->cs_addr & 0x3) == 0) { - uint_t *uwbuf = (uint_t *)(void *)(cs->cs_addr); - uint_t uval; - - while (wordcnt--) { - uval = *uwbuf++; - psum ^= (*pbuf = ((*pbuf ^ *dbuf) ^ uval)); - ++pbuf; - *dbuf = uval; - dsum ^= uval; - ++dbuf; - } - } else { - uchar_t *ubbuf = (uchar_t *)(cs->cs_addr); - union { - uint_t wb; - uchar_t bb[4]; - } cb; - - while (wordcnt--) { - cb.bb[0] = *ubbuf++; - cb.bb[1] = *ubbuf++; - cb.bb[2] = *ubbuf++; - cb.bb[3] = *ubbuf++; - psum ^= (*pbuf = ((*pbuf ^ *dbuf) ^ cb.wb)); - ++pbuf; - *dbuf = cb.wb; - dsum ^= cb.wb; - ++dbuf; - } - } - - RAID_FILLIN_RPW(cs->cs_dbuffer, cs->cs_un, dsum, cs->cs_pcolumn, - cs->cs_blkno, cs->cs_blkcnt, cs->cs_pwid, - 2, cs->cs_dcolumn, RAID_PWMAGIC); - - RAID_FILLIN_RPW(cs->cs_pbuffer, cs->cs_un, psum, cs->cs_dcolumn, - cs->cs_blkno, cs->cs_blkcnt, cs->cs_pwid, - 2, cs->cs_pcolumn, RAID_PWMAGIC); -} - -static void -genlineparity(md_raidcs_t *cs) -{ - - mr_unit_t *un = cs->cs_un; - md_raidcbuf_t *cbuf; - uint_t *pbuf, *dbuf; - uint_t *uwbuf; - uchar_t *ubbuf; - size_t wordcnt; - uint_t psum = 0, dsum = 0; - size_t count = un->un_segsize * DEV_BSIZE; - uint_t col; - buf_t *bp; - - ASSERT((cs->cs_bcount & 0x3) == 0); - - pbuf = (uint_t *)(void *)(cs->cs_pbuffer + DEV_BSIZE); - dbuf = (uint_t *)(void *)(cs->cs_dbuffer + DEV_BSIZE); - uwbuf = (uint_t *)(void *)(cs->cs_addr); - ubbuf = (uchar_t *)(void *)(cs->cs_addr); - - wordcnt = count / sizeof (uint_t); - - /* Word aligned */ - if (((uintptr_t)cs->cs_addr & 0x3) == 0) { - uint_t uval; - - while (wordcnt--) { - uval = *uwbuf++; - *dbuf = uval; - *pbuf = uval; - dsum ^= uval; - ++pbuf; - ++dbuf; - } - } else { - union { - uint_t wb; - uchar_t bb[4]; - } cb; - - while (wordcnt--) { - cb.bb[0] = *ubbuf++; - cb.bb[1] = *ubbuf++; - cb.bb[2] = *ubbuf++; - cb.bb[3] = *ubbuf++; - *dbuf = cb.wb; - *pbuf = cb.wb; - dsum ^= cb.wb; - ++pbuf; - ++dbuf; - } - } - - RAID_FILLIN_RPW(cs->cs_dbuffer, un, dsum, cs->cs_pcolumn, - cs->cs_blkno, cs->cs_blkcnt, cs->cs_pwid, - un->un_totalcolumncnt, cs->cs_dcolumn, RAID_PWMAGIC); - - raidio(cs, RIO_PREWRITE | RIO_DATA); - - for (cbuf = cs->cs_buflist; cbuf; cbuf = cbuf->cbuf_next) { - - dsum = 0; - pbuf = (uint_t *)(void *)(cs->cs_pbuffer + DEV_BSIZE); - dbuf = (uint_t *)(void *)(cbuf->cbuf_buffer + DEV_BSIZE); - - wordcnt = count / sizeof (uint_t); - - col = cbuf->cbuf_column; - - /* Word aligned */ - if (((uintptr_t)cs->cs_addr & 0x3) == 0) { - uint_t uval; - - /* - * Only calculate psum when working on the last - * data buffer. - */ - if (cbuf->cbuf_next == NULL) { - psum = 0; - while (wordcnt--) { - uval = *uwbuf++; - *dbuf = uval; - psum ^= (*pbuf ^= uval); - dsum ^= uval; - ++dbuf; - ++pbuf; - } - } else { - while (wordcnt--) { - uval = *uwbuf++; - *dbuf = uval; - *pbuf ^= uval; - dsum ^= uval; - ++dbuf; - ++pbuf; - } - } - } else { - union { - uint_t wb; - uchar_t bb[4]; - } cb; - - /* - * Only calculate psum when working on the last - * data buffer. - */ - if (cbuf->cbuf_next == NULL) { - psum = 0; - while (wordcnt--) { - cb.bb[0] = *ubbuf++; - cb.bb[1] = *ubbuf++; - cb.bb[2] = *ubbuf++; - cb.bb[3] = *ubbuf++; - *dbuf = cb.wb; - psum ^= (*pbuf ^= cb.wb); - dsum ^= cb.wb; - ++dbuf; - ++pbuf; - } - } else { - while (wordcnt--) { - cb.bb[0] = *ubbuf++; - cb.bb[1] = *ubbuf++; - cb.bb[2] = *ubbuf++; - cb.bb[3] = *ubbuf++; - *dbuf = cb.wb; - *pbuf ^= cb.wb; - dsum ^= cb.wb; - ++dbuf; - ++pbuf; - } - } - } - RAID_FILLIN_RPW(cbuf->cbuf_buffer, un, dsum, cs->cs_pcolumn, - cs->cs_blkno, cs->cs_blkcnt, cs->cs_pwid, - un->un_totalcolumncnt, col, RAID_PWMAGIC); - - /* - * fill in buffer for write to prewrite area - */ - bp = &cbuf->cbuf_bp; - bp->b_un.b_addr = cbuf->cbuf_buffer; - bp->b_bcount = cbuf->cbuf_bcount + DEV_BSIZE; - bp->b_bufsize = bp->b_bcount; - bp->b_lblkno = (cbuf->cbuf_pwslot * un->un_iosize) + - un->un_column[col].un_pwstart; - bp->b_flags = B_WRITE | B_BUSY; - if (nv_available && nv_prewrite) - bp->b_flags |= nv_available; - bp->b_iodone = (int (*)())raid_done; - bp->b_edev = md_dev64_to_dev(un->un_column[col].un_dev); - bp->b_chain = (struct buf *)cs; - md_call_strategy(bp, - cs->cs_strategy_flag, cs->cs_strategy_private); - } - - RAID_FILLIN_RPW(cs->cs_pbuffer, un, psum, cs->cs_dcolumn, - cs->cs_blkno, cs->cs_blkcnt, cs->cs_pwid, - un->un_totalcolumncnt, cs->cs_pcolumn, RAID_PWMAGIC); - - raidio(cs, RIO_PREWRITE | RIO_PARITY); -} - -/* - * NAME: raid_readregenloop - * DESCRIPTION: RAID metadevice write routine - * PARAMETERS: md_raidcs_t *cs - pointer to a child structure - */ -static void -raid_readregenloop(md_raidcs_t *cs) -{ - mr_unit_t *un; - md_raidps_t *ps; - uint_t *dbuf; - uint_t *pbuf; - size_t wordcnt; - - un = cs->cs_un; - - /* - * XOR the parity with data bytes, must skip the - * pre-write entry header in all data/parity buffers - */ - wordcnt = cs->cs_bcount / sizeof (uint_t); - dbuf = (uint_t *)(void *)(cs->cs_dbuffer + DEV_BSIZE); - pbuf = (uint_t *)(void *)(cs->cs_pbuffer + DEV_BSIZE); - while (wordcnt--) - *dbuf++ ^= *pbuf++; - - /* bump up the loop count */ - cs->cs_loop++; - - /* skip the errored component */ - if (cs->cs_loop == cs->cs_dcolumn) - cs->cs_loop++; - - if (cs->cs_loop != un->un_totalcolumncnt) { - cs->cs_frags = 1; - raidio(cs, RIO_PARITY | RIO_READ | (cs->cs_loop + 1)); - return; - } - /* reaching the end sof loop */ - ps = cs->cs_ps; - bcopy(cs->cs_dbuffer + DEV_BSIZE, cs->cs_addr, cs->cs_bcount); - raid_free_child(cs, 1); - - /* decrement readfrags */ - raid_free_parent(ps, RFP_DECR_READFRAGS | RFP_RLS_LOCK); -} - -/* - * NAME: raid_read_io - * DESCRIPTION: RAID metadevice read I/O routine - * PARAMETERS: mr_unit_t *un - pointer to a unit structure - * md_raidcs_t *cs - pointer to a child structure - */ -static void -raid_read_io(mr_unit_t *un, md_raidcs_t *cs) -{ - int flag; - void *private; - buf_t *bp; - buf_t *pb = cs->cs_ps->ps_bp; - mr_column_t *column; - - flag = cs->cs_strategy_flag; - private = cs->cs_strategy_private; - column = &un->un_column[cs->cs_dcolumn]; - - /* - * The component to be read is good, simply set up bp structure - * and call low level md routine doing the read. - */ - - if (COLUMN_ISOKAY(un, cs->cs_dcolumn) || - (COLUMN_ISLASTERR(un, cs->cs_dcolumn) && - (cs->cs_flags & MD_RCS_RECOVERY) == 0)) { - dev_t ddi_dev; /* needed for bioclone, so not md_dev64_t */ - ddi_dev = md_dev64_to_dev(column->un_dev); - - bp = &cs->cs_dbuf; - bp = md_bioclone(pb, cs->cs_offset, cs->cs_bcount, ddi_dev, - column->un_devstart + cs->cs_blkno, - (int (*)())raid_done, bp, KM_NOSLEEP); - - bp->b_chain = (buf_t *)cs; - - cs->cs_frags = 1; - cs->cs_error_call = raid_read_error; - cs->cs_retry_call = raid_read_retry; - cs->cs_flags |= MD_RCS_ISCALL; - cs->cs_stage = RAID_READ_DONE; - cs->cs_call = raid_stage; - - ASSERT(bp->b_edev != 0); - - md_call_strategy(bp, flag, private); - return; - } - - /* - * The component to be read is bad, have to go through - * raid specific method to read data from other members. - */ - cs->cs_loop = 0; - /* - * NOTE: always get dbuffer before pbuffer - * and get both buffers before pwslot - * otherwise a deadlock could be introduced. - */ - raid_mapin_buf(cs); - getdbuffer(cs); - getpbuffer(cs); - if (cs->cs_loop == cs->cs_dcolumn) - cs->cs_loop++; - - /* zero out data buffer for use as a data sink */ - bzero(cs->cs_dbuffer + DEV_BSIZE, cs->cs_bcount); - cs->cs_stage = RAID_NONE; - cs->cs_call = raid_readregenloop; - cs->cs_error_call = raid_read_error; - cs->cs_retry_call = raid_read_no_retry; - cs->cs_frags = 1; - - /* use parity buffer to read other columns */ - raidio(cs, RIO_PARITY | RIO_READ | (cs->cs_loop + 1)); -} - -/* - * NAME: raid_read - * DESCRIPTION: RAID metadevice write routine - * PARAMETERS: mr_unit_t *un - pointer to a unit structure - * md_raidcs_t *cs - pointer to a child structure - */ -static int -raid_read(mr_unit_t *un, md_raidcs_t *cs) -{ - int error = 0; - md_raidps_t *ps; - mdi_unit_t *ui; - minor_t mnum; - - ASSERT(IO_READER_HELD(un)); - ps = cs->cs_ps; - ui = ps->ps_ui; - raid_line_reader_lock(cs, 0); - un = (mr_unit_t *)md_unit_readerlock(ui); - ASSERT(UNIT_STATE(un) != RUS_INIT); - mnum = MD_SID(un); - cs->cs_un = un; - - /* make sure the read doesn't go beyond the end of the column */ - if (cs->cs_blkno + cs->cs_blkcnt > - un->un_segsize * un->un_segsincolumn) { - error = ENXIO; - } - if (error) - goto rerror; - - if (un->un_state & RUS_REGEN) { - raid_regen_parity(cs); - un = MD_UNIT(mnum); - cs->cs_un = un; - } - - raid_read_io(un, cs); - return (0); - -rerror: - raid_error_parent(ps, error); - raid_free_child(cs, 1); - /* decrement readfrags */ - raid_free_parent(ps, RFP_DECR_READFRAGS | RFP_RLS_LOCK); - return (0); -} - -/* - * NAME: raid_write_err_retry - * DESCRIPTION: RAID metadevice write retry routine - * write was for parity or data only; - * complete write with error, no recovery possible - * PARAMETERS: mr_unit_t *un - pointer to a unit structure - * md_raidcs_t *cs - pointer to a child structure - */ -/*ARGSUSED*/ -static void -raid_write_err_retry(mr_unit_t *un, md_raidcs_t *cs) -{ - md_raidps_t *ps = cs->cs_ps; - int flags = RFP_DECR_FRAGS | RFP_RLS_LOCK; - - /* decrement pwfrags if needed, and frags */ - if (!(cs->cs_flags & MD_RCS_PWDONE)) - flags |= RFP_DECR_PWFRAGS; - raid_error_parent(ps, EIO); - raid_free_child(cs, 1); - raid_free_parent(ps, flags); -} - -/* - * NAME: raid_write_err_retry - * DESCRIPTION: RAID metadevice write retry routine - * write is too far along to retry and parent - * has already been signaled with iodone. - * PARAMETERS: mr_unit_t *un - pointer to a unit structure - * md_raidcs_t *cs - pointer to a child structure - */ -/*ARGSUSED*/ -static void -raid_write_no_retry(mr_unit_t *un, md_raidcs_t *cs) -{ - md_raidps_t *ps = cs->cs_ps; - int flags = RFP_DECR_FRAGS | RFP_RLS_LOCK; - - /* decrement pwfrags if needed, and frags */ - if (!(cs->cs_flags & MD_RCS_PWDONE)) - flags |= RFP_DECR_PWFRAGS; - raid_free_child(cs, 1); - raid_free_parent(ps, flags); -} - -/* - * NAME: raid_write_retry - * DESCRIPTION: RAID metadevice write retry routine - * PARAMETERS: mr_unit_t *un - pointer to a unit structure - * md_raidcs_t *cs - pointer to a child structure - */ -static void -raid_write_retry(mr_unit_t *un, md_raidcs_t *cs) -{ - md_raidps_t *ps; - - ps = cs->cs_ps; - - /* re-initialize the buf_t structure for raid_write() */ - cs->cs_dbuf.b_chain = (struct buf *)cs; - cs->cs_dbuf.b_back = &cs->cs_dbuf; - cs->cs_dbuf.b_forw = &cs->cs_dbuf; - cs->cs_dbuf.b_flags = B_BUSY; /* initialize flags */ - cs->cs_dbuf.b_error = 0; /* initialize error */ - cs->cs_dbuf.b_offset = -1; - /* Initialize semaphores */ - sema_init(&cs->cs_dbuf.b_io, 0, NULL, - SEMA_DEFAULT, NULL); - sema_init(&cs->cs_dbuf.b_sem, 0, NULL, - SEMA_DEFAULT, NULL); - - cs->cs_pbuf.b_chain = (struct buf *)cs; - cs->cs_pbuf.b_back = &cs->cs_pbuf; - cs->cs_pbuf.b_forw = &cs->cs_pbuf; - cs->cs_pbuf.b_flags = B_BUSY; /* initialize flags */ - cs->cs_pbuf.b_error = 0; /* initialize error */ - cs->cs_pbuf.b_offset = -1; - sema_init(&cs->cs_pbuf.b_io, 0, NULL, - SEMA_DEFAULT, NULL); - sema_init(&cs->cs_pbuf.b_sem, 0, NULL, - SEMA_DEFAULT, NULL); - - cs->cs_hbuf.b_chain = (struct buf *)cs; - cs->cs_hbuf.b_back = &cs->cs_hbuf; - cs->cs_hbuf.b_forw = &cs->cs_hbuf; - cs->cs_hbuf.b_flags = B_BUSY; /* initialize flags */ - cs->cs_hbuf.b_error = 0; /* initialize error */ - cs->cs_hbuf.b_offset = -1; - sema_init(&cs->cs_hbuf.b_io, 0, NULL, - SEMA_DEFAULT, NULL); - sema_init(&cs->cs_hbuf.b_sem, 0, NULL, - SEMA_DEFAULT, NULL); - - cs->cs_flags &= ~(MD_RCS_ERROR); - /* - * If we have already done'ed the i/o but have done prewrite - * on this child, then reset PWDONE flag and bump pwfrags before - * restarting i/o. - * If pwfrags is zero, we have already 'iodone'd the i/o so - * leave things alone. We don't want to re-'done' it. - */ - mutex_enter(&ps->ps_mx); - if (cs->cs_flags & MD_RCS_PWDONE) { - cs->cs_flags &= ~MD_RCS_PWDONE; - ps->ps_pwfrags++; - } - mutex_exit(&ps->ps_mx); - raid_write_io(un, cs); -} - -/* - * NAME: raid_wrerr - * DESCRIPTION: RAID metadevice write routine - * PARAMETERS: md_raidcs_t *cs - pointer to a child structure - * LOCKS: must obtain unit writer lock while calling raid_error_state - * since a unit or column state transition may take place. - * must obtain unit reader lock to retry I/O. - */ -static void -raid_wrerr(md_raidcs_t *cs) -{ - md_raidps_t *ps; - mdi_unit_t *ui; - mr_unit_t *un; - md_raidcbuf_t *cbuf; - - ps = cs->cs_ps; - ui = ps->ps_ui; - - un = (mr_unit_t *)md_unit_writerlock(ui); - ASSERT(un != 0); - - if (cs->cs_dbuf.b_flags & B_ERROR) - (void) raid_error_state(un, &cs->cs_dbuf); - if (cs->cs_pbuf.b_flags & B_ERROR) - (void) raid_error_state(un, &cs->cs_pbuf); - if (cs->cs_hbuf.b_flags & B_ERROR) - (void) raid_error_state(un, &cs->cs_hbuf); - for (cbuf = cs->cs_buflist; cbuf; cbuf = cbuf->cbuf_next) - if (cbuf->cbuf_bp.b_flags & B_ERROR) - (void) raid_error_state(un, &cbuf->cbuf_bp); - - md_unit_writerexit(ui); - - ps->ps_flags |= MD_RPS_HSREQ; - - un = (mr_unit_t *)md_unit_readerlock(ui); - - /* now attempt the appropriate retry routine */ - (*(cs->cs_retry_call))(un, cs); -} -/* - * NAMES: raid_write_error - * DESCRIPTION: I/O error handling routine for a RAID metadevice write - * PARAMETERS: md_raidcs_t *cs - pointer to child structure - */ -/*ARGSUSED*/ -static void -raid_write_error(md_raidcs_t *cs) -{ - md_raidps_t *ps; - mdi_unit_t *ui; - mr_unit_t *un; - md_raidcbuf_t *cbuf; - set_t setno; - - ps = cs->cs_ps; - ui = ps->ps_ui; - un = cs->cs_un; - - setno = MD_UN2SET(un); - - /* - * locate each buf that is in error on this io and then - * output an error message - */ - if ((cs->cs_dbuf.b_flags & B_ERROR) && - (COLUMN_STATE(un, cs->cs_dcolumn) != RCS_ERRED) && - (COLUMN_STATE(un, cs->cs_dcolumn) != RCS_LAST_ERRED)) - cmn_err(CE_WARN, "md %s: write error on %s", - md_shortname(MD_SID(un)), - md_devname(setno, md_expldev(cs->cs_dbuf.b_edev), NULL, 0)); - - if ((cs->cs_pbuf.b_flags & B_ERROR) && - (COLUMN_STATE(un, cs->cs_pcolumn) != RCS_ERRED) && - (COLUMN_STATE(un, cs->cs_pcolumn) != RCS_LAST_ERRED)) - cmn_err(CE_WARN, "md %s: write error on %s", - md_shortname(MD_SID(un)), - md_devname(setno, md_expldev(cs->cs_pbuf.b_edev), NULL, 0)); - - for (cbuf = cs->cs_buflist; cbuf; cbuf = cbuf->cbuf_next) - if ((cbuf->cbuf_bp.b_flags & B_ERROR) && - (COLUMN_STATE(un, cbuf->cbuf_column) != RCS_ERRED) && - (COLUMN_STATE(un, cbuf->cbuf_column) != RCS_LAST_ERRED)) - cmn_err(CE_WARN, "md %s: write error on %s", - md_shortname(MD_SID(un)), - md_devname(setno, md_expldev(cbuf->cbuf_bp.b_edev), - NULL, 0)); - - md_unit_readerexit(ui); - - ASSERT(cs->cs_frags == 0); - - /* now schedule processing for possible state change */ - daemon_request(&md_mstr_daemon, raid_wrerr, - (daemon_queue_t *)cs, REQ_OLD); - -} - -/* - * NAME: raid_write_ponly - * DESCRIPTION: RAID metadevice write routine - * in the case where only the parity column can be written - * PARAMETERS: md_raidcs_t *cs - pointer to a child structure - */ -static void -raid_write_ponly(md_raidcs_t *cs) -{ - md_raidps_t *ps; - mr_unit_t *un = cs->cs_un; - - ps = cs->cs_ps; - /* decrement pwfrags if needed, but not frags */ - ASSERT(!(cs->cs_flags & MD_RCS_PWDONE)); - raid_free_parent(ps, RFP_DECR_PWFRAGS); - cs->cs_flags |= MD_RCS_PWDONE; - cs->cs_frags = 1; - cs->cs_stage = RAID_WRITE_PONLY_DONE; - cs->cs_call = raid_stage; - cs->cs_error_call = raid_write_error; - cs->cs_retry_call = raid_write_no_retry; - if (WRITE_ALT(un, cs->cs_pcolumn)) { - cs->cs_frags++; - raidio(cs, RIO_ALT | RIO_EXTRA | RIO_PARITY | RIO_WRITE); - } - raidio(cs, RIO_PARITY | RIO_WRITE); -} - -/* - * NAME: raid_write_ploop - * DESCRIPTION: RAID metadevice write routine, constructs parity from - * data in other columns. - * PARAMETERS: md_raidcs_t *cs - pointer to a child structure - */ -static void -raid_write_ploop(md_raidcs_t *cs) -{ - mr_unit_t *un = cs->cs_un; - uint_t *dbuf; - uint_t *pbuf; - size_t wordcnt; - uint_t psum = 0; - - wordcnt = cs->cs_bcount / sizeof (uint_t); - dbuf = (uint_t *)(void *)(cs->cs_dbuffer + DEV_BSIZE); - pbuf = (uint_t *)(void *)(cs->cs_pbuffer + DEV_BSIZE); - while (wordcnt--) - *pbuf++ ^= *dbuf++; - cs->cs_loop++; - - /* - * build parity from scratch using new data, - * skip reading the data and parity columns. - */ - while (cs->cs_loop == cs->cs_dcolumn || cs->cs_loop == cs->cs_pcolumn) - cs->cs_loop++; - - if (cs->cs_loop != un->un_totalcolumncnt) { - cs->cs_frags = 1; - raidio(cs, RIO_DATA | RIO_READ | (cs->cs_loop + 1)); - return; - } - - /* construct checksum for parity buffer */ - wordcnt = cs->cs_bcount / sizeof (uint_t); - pbuf = (uint_t *)(void *)(cs->cs_pbuffer + DEV_BSIZE); - while (wordcnt--) { - psum ^= *pbuf; - pbuf++; - } - RAID_FILLIN_RPW(cs->cs_pbuffer, un, psum, -1, - cs->cs_blkno, cs->cs_blkcnt, cs->cs_pwid, - 1, cs->cs_pcolumn, RAID_PWMAGIC); - - cs->cs_stage = RAID_NONE; - cs->cs_call = raid_write_ponly; - cs->cs_error_call = raid_write_error; - cs->cs_retry_call = raid_write_err_retry; - cs->cs_frags = 1; - if (WRITE_ALT(un, cs->cs_pcolumn)) { - cs->cs_frags++; - raidio(cs, RIO_ALT | RIO_EXTRA | RIO_PARITY | RIO_PREWRITE); - } - raidio(cs, RIO_PARITY | RIO_PREWRITE); -} - -/* - * NAME: raid_write_donly - * DESCRIPTION: RAID metadevice write routine - * Completed writing data to prewrite entry - * in the case where only the data column can be written - * PARAMETERS: md_raidcs_t *cs - pointer to a child structure - */ -static void -raid_write_donly(md_raidcs_t *cs) -{ - md_raidps_t *ps; - mr_unit_t *un = cs->cs_un; - - ps = cs->cs_ps; - /* WARNING: don't release unit reader lock here... */ - /* decrement pwfrags if needed, but not frags */ - ASSERT(!(cs->cs_flags & MD_RCS_PWDONE)); - raid_free_parent(ps, RFP_DECR_PWFRAGS); - cs->cs_flags |= MD_RCS_PWDONE; - cs->cs_frags = 1; - cs->cs_stage = RAID_WRITE_DONLY_DONE; - cs->cs_call = raid_stage; - cs->cs_error_call = raid_write_error; - cs->cs_retry_call = raid_write_err_retry; - if (WRITE_ALT(un, cs->cs_dcolumn)) { - cs->cs_frags++; - raidio(cs, RIO_ALT | RIO_EXTRA | RIO_DATA | RIO_WRITE); - } - raidio(cs, RIO_DATA | RIO_WRITE); -} - -/* - * NAME: raid_write_got_old - * DESCRIPTION: RAID metadevice write routine - * completed read of old data and old parity - * PARAMETERS: md_raidcs_t *cs - pointer to a child structure - */ -static void -raid_write_got_old(md_raidcs_t *cs) -{ - mr_unit_t *un = cs->cs_un; - - ASSERT(IO_READER_HELD(cs->cs_un)); - ASSERT(UNIT_READER_HELD(cs->cs_un)); - - raid_mapin_buf(cs); - genstandardparity(cs); - cs->cs_frags = 2; - cs->cs_call = raid_stage; - cs->cs_stage = RAID_PREWRITE_DONE; - cs->cs_error_call = raid_write_error; - cs->cs_retry_call = raid_write_retry; - - if (WRITE_ALT(un, cs->cs_dcolumn)) { - cs->cs_frags++; - raidio(cs, RIO_ALT | RIO_EXTRA | RIO_DATA | RIO_PREWRITE); - } - - if (WRITE_ALT(un, cs->cs_pcolumn)) { - cs->cs_frags++; - raidio(cs, RIO_ALT | RIO_EXTRA | RIO_PARITY | RIO_PREWRITE); - } - ASSERT(cs->cs_frags < 4); - raidio(cs, RIO_DATA | RIO_PREWRITE); - raidio(cs, RIO_PARITY | RIO_PREWRITE); -} - -/* - * NAME: raid_write_io - * DESCRIPTION: RAID metadevice write I/O routine - * PARAMETERS: mr_unit_t *un - pointer to a unit structure - * md_raidcs_t *cs - pointer to a child structure - */ - -/*ARGSUSED*/ -static void -raid_write_io(mr_unit_t *un, md_raidcs_t *cs) -{ - md_raidps_t *ps = cs->cs_ps; - uint_t *dbuf; - uint_t *ubuf; - size_t wordcnt; - uint_t dsum = 0; - int pcheck; - int dcheck; - - ASSERT((un->un_column[cs->cs_pcolumn].un_devstate & - RCS_INIT) == 0); - ASSERT((un->un_column[cs->cs_dcolumn].un_devstate & - RCS_INIT) == 0); - ASSERT(IO_READER_HELD(un)); - ASSERT(UNIT_READER_HELD(un)); - ASSERT(cs->cs_flags & MD_RCS_HAVE_PW_SLOTS); - if (cs->cs_flags & MD_RCS_LINE) { - - mr_unit_t *un = cs->cs_un; - - ASSERT(un->un_origcolumncnt == un->un_totalcolumncnt); - raid_mapin_buf(cs); - cs->cs_frags = un->un_origcolumncnt; - cs->cs_call = raid_stage; - cs->cs_error_call = raid_write_error; - cs->cs_retry_call = raid_write_no_retry; - cs->cs_stage = RAID_LINE_PWDONE; - genlineparity(cs); - return; - } - - pcheck = erred_check_line(un, cs, &un->un_column[cs->cs_pcolumn]); - dcheck = erred_check_line(un, cs, &un->un_column[cs->cs_dcolumn]); - cs->cs_resync_check = pcheck << RCL_PARITY_OFFSET || dcheck; - - if (pcheck == RCL_ERRED && dcheck == RCL_ERRED) { - int err = EIO; - - if ((un->un_column[cs->cs_pcolumn].un_devstate == - RCS_LAST_ERRED) || - (un->un_column[cs->cs_dcolumn].un_devstate == - RCS_LAST_ERRED)) - err = ENXIO; - raid_error_parent(ps, err); - ASSERT(!(cs->cs_flags & MD_RCS_PWDONE)); - raid_free_child(cs, 1); - raid_free_parent(ps, RFP_DECR_FRAGS - | RFP_RLS_LOCK | RFP_DECR_PWFRAGS); - return; - } - - if (pcheck & RCL_ERRED) { - /* - * handle case of only having data drive - */ - raid_mapin_buf(cs); - wordcnt = cs->cs_bcount / sizeof (uint_t); - - dbuf = (uint_t *)(void *)(cs->cs_dbuffer + DEV_BSIZE); - ubuf = (uint_t *)(void *)(cs->cs_addr); - - while (wordcnt--) { - *dbuf = *ubuf; - dsum ^= *ubuf; - dbuf++; - ubuf++; - } - RAID_FILLIN_RPW(cs->cs_dbuffer, un, dsum, -1, - cs->cs_blkno, cs->cs_blkcnt, cs->cs_pwid, - 1, cs->cs_dcolumn, RAID_PWMAGIC); - cs->cs_frags = 1; - cs->cs_stage = RAID_NONE; - cs->cs_call = raid_write_donly; - cs->cs_error_call = raid_write_error; - cs->cs_retry_call = raid_write_err_retry; - if (WRITE_ALT(un, cs->cs_dcolumn)) { - cs->cs_frags++; - raidio(cs, RIO_DATA | RIO_ALT | RIO_EXTRA | - RIO_PREWRITE); - } - raidio(cs, RIO_DATA | RIO_PREWRITE); - return; - } - - if (dcheck & RCL_ERRED) { - /* - * handle case of only having parity drive - * build parity from scratch using new data, - * skip reading the data and parity columns. - */ - raid_mapin_buf(cs); - cs->cs_loop = 0; - while (cs->cs_loop == cs->cs_dcolumn || - cs->cs_loop == cs->cs_pcolumn) - cs->cs_loop++; - - /* copy new data in to begin building parity */ - bcopy(cs->cs_addr, cs->cs_pbuffer + DEV_BSIZE, cs->cs_bcount); - cs->cs_stage = RAID_NONE; - cs->cs_call = raid_write_ploop; - cs->cs_error_call = raid_write_error; - cs->cs_retry_call = raid_write_err_retry; - cs->cs_frags = 1; - raidio(cs, RIO_DATA | RIO_READ | (cs->cs_loop + 1)); - return; - } - /* - * handle normal cases - * read old data and old parity - */ - cs->cs_frags = 2; - cs->cs_stage = RAID_NONE; - cs->cs_call = raid_write_got_old; - cs->cs_error_call = raid_write_error; - cs->cs_retry_call = raid_write_retry; - ASSERT(ps->ps_magic == RAID_PSMAGIC); - raidio(cs, RIO_DATA | RIO_READ); - raidio(cs, RIO_PARITY | RIO_READ); -} - -static void -raid_enqueue(md_raidcs_t *cs) -{ - mdi_unit_t *ui = cs->cs_ps->ps_ui; - kmutex_t *io_list_mutex = &ui->ui_io_lock->io_list_mutex; - md_raidcs_t *cs1; - - mutex_enter(io_list_mutex); - ASSERT(! (cs->cs_flags & MD_RCS_LLOCKD)); - if (ui->ui_io_lock->io_list_front == NULL) { - ui->ui_io_lock->io_list_front = cs; - ui->ui_io_lock->io_list_back = cs; - } else { - cs1 = ui->ui_io_lock->io_list_back; - cs1->cs_linlck_next = cs; - ui->ui_io_lock->io_list_back = cs; - } - STAT_INC(raid_write_waits); - STAT_MAX(raid_max_write_q_length, raid_write_queue_length); - cs->cs_linlck_next = NULL; - mutex_exit(io_list_mutex); -} - -/* - * NAME: raid_write - * DESCRIPTION: RAID metadevice write routine - * PARAMETERS: mr_unit_t *un - pointer to a unit structure - * md_raidcs_t *cs - pointer to a child structure - */ - -/*ARGSUSED*/ -static int -raid_write(mr_unit_t *un, md_raidcs_t *cs) -{ - int error = 0; - md_raidps_t *ps; - mdi_unit_t *ui; - minor_t mnum; - - ASSERT(IO_READER_HELD(un)); - ps = cs->cs_ps; - ui = ps->ps_ui; - - ASSERT(UNIT_STATE(un) != RUS_INIT); - if (UNIT_STATE(un) == RUS_LAST_ERRED) - error = EIO; - - /* make sure the write doesn't go beyond the column */ - if (cs->cs_blkno + cs->cs_blkcnt > un->un_segsize * un->un_segsincolumn) - error = ENXIO; - if (error) - goto werror; - - getresources(cs); - - /* - * this is an advisory loop that keeps the waiting lists short - * to reduce cpu time. Since there is a race introduced by not - * aquiring all the correct mutexes, use a cv_timedwait to be - * sure the write always will wake up and start. - */ - while (raid_check_pw(cs)) { - mutex_enter(&un->un_mx); - un->un_rflags |= MD_RFLAG_NEEDPW; - STAT_INC(raid_prewrite_waits); - (void) cv_reltimedwait(&un->un_cv, &un->un_mx, md_wr_wait, - TR_CLOCK_TICK); - un->un_rflags &= ~MD_RFLAG_NEEDPW; - mutex_exit(&un->un_mx); - } - - if (raid_line_writer_lock(cs, 1)) - return (0); - - un = (mr_unit_t *)md_unit_readerlock(ui); - cs->cs_un = un; - mnum = MD_SID(un); - - if (un->un_state & RUS_REGEN) { - raid_regen_parity(cs); - un = MD_UNIT(mnum); - cs->cs_un = un; - } - - raid_write_io(un, cs); - return (0); -werror: - /* aquire unit reader lock sinc raid_free_child always drops it */ - raid_error_parent(ps, error); - raid_free_child(cs, 0); - /* decrement both pwfrags and frags */ - raid_free_parent(ps, RFP_DECR_PWFRAGS | RFP_DECR_FRAGS | RFP_RLS_LOCK); - return (0); -} - - -/* - * NAMES: raid_stage - * DESCRIPTION: post-processing routine for a RAID metadevice - * PARAMETERS: md_raidcs_t *cs - pointer to child structure - */ -static void -raid_stage(md_raidcs_t *cs) -{ - md_raidps_t *ps = cs->cs_ps; - mr_unit_t *un = cs->cs_un; - md_raidcbuf_t *cbuf; - buf_t *bp; - void *private; - int flag; - - switch (cs->cs_stage) { - case RAID_READ_DONE: - raid_free_child(cs, 1); - /* decrement readfrags */ - raid_free_parent(ps, RFP_DECR_READFRAGS | RFP_RLS_LOCK); - return; - - case RAID_WRITE_DONE: - case RAID_WRITE_PONLY_DONE: - case RAID_WRITE_DONLY_DONE: - /* - * Completed writing real parity and/or data. - */ - ASSERT(cs->cs_flags & MD_RCS_PWDONE); - raid_free_child(cs, 1); - /* decrement frags but not pwfrags */ - raid_free_parent(ps, RFP_DECR_FRAGS | RFP_RLS_LOCK); - return; - - case RAID_PREWRITE_DONE: - /* - * completed writing data and parity to prewrite entries - */ - /* - * WARNING: don't release unit reader lock here.. - * decrement pwfrags but not frags - */ - raid_free_parent(ps, RFP_DECR_PWFRAGS); - cs->cs_flags |= MD_RCS_PWDONE; - cs->cs_frags = 2; - cs->cs_stage = RAID_WRITE_DONE; - cs->cs_call = raid_stage; - cs->cs_error_call = raid_write_error; - cs->cs_retry_call = raid_write_no_retry; - if (WRITE_ALT(un, cs->cs_pcolumn)) { - cs->cs_frags++; - raidio(cs, RIO_ALT | RIO_EXTRA | RIO_PARITY | - RIO_WRITE); - } - if (WRITE_ALT(un, cs->cs_dcolumn)) { - cs->cs_frags++; - raidio(cs, RIO_ALT | RIO_EXTRA | RIO_DATA | RIO_WRITE); - } - ASSERT(cs->cs_frags < 4); - raidio(cs, RIO_DATA | RIO_WRITE); - raidio(cs, RIO_PARITY | RIO_WRITE); - if (cs->cs_pw_inval_list) { - raid_free_pwinvalidate(cs); - } - return; - - case RAID_LINE_PWDONE: - ASSERT(cs->cs_frags == 0); - raid_free_parent(ps, RFP_DECR_PWFRAGS); - cs->cs_flags |= MD_RCS_PWDONE; - cs->cs_frags = un->un_origcolumncnt; - cs->cs_call = raid_stage; - cs->cs_error_call = raid_write_error; - cs->cs_retry_call = raid_write_no_retry; - cs->cs_stage = RAID_WRITE_DONE; - for (cbuf = cs->cs_buflist; cbuf; cbuf = cbuf->cbuf_next) { - /* - * fill in buffer for write to prewrite area - */ - bp = &cbuf->cbuf_bp; - bp->b_back = bp; - bp->b_forw = bp; - bp->b_un.b_addr = cbuf->cbuf_buffer + DEV_BSIZE; - bp->b_bcount = cbuf->cbuf_bcount; - bp->b_bufsize = cbuf->cbuf_bcount; - bp->b_lblkno = - un->un_column[cbuf->cbuf_column].un_devstart + - cs->cs_blkno; - bp->b_flags &= ~(B_READ | B_WRITE | B_ERROR); - bp->b_flags &= ~nv_available; - bp->b_flags |= B_WRITE | B_BUSY; - bp->b_iodone = (int (*)())raid_done; - bp->b_edev = md_dev64_to_dev( - un->un_column[cbuf->cbuf_column].un_dev); - bp->b_chain = (struct buf *)cs; - private = cs->cs_strategy_private; - flag = cs->cs_strategy_flag; - md_call_strategy(bp, flag, private); - } - raidio(cs, RIO_DATA | RIO_WRITE); - raidio(cs, RIO_PARITY | RIO_WRITE); - if (cs->cs_pw_inval_list) { - raid_free_pwinvalidate(cs); - } - return; - - default: - ASSERT(0); - break; - } -} -/* - * NAME: md_raid_strategy - * DESCRIPTION: RAID metadevice I/O oprations entry point. - * PARAMETERS: buf_t *pb - pointer to a user I/O buffer - * int flag - metadevice specific flag - * void *private - carry over flag ?? - * - */ - -void -md_raid_strategy(buf_t *pb, int flag, void *private) -{ - md_raidps_t *ps; - md_raidcs_t *cs; - int doing_writes; - int err; - mr_unit_t *un; - mdi_unit_t *ui; - size_t count; - diskaddr_t blkno; - caddr_t addr; - off_t offset; - int colcnt; - minor_t mnum; - set_t setno; - - ui = MDI_UNIT(getminor(pb->b_edev)); - md_kstat_waitq_enter(ui); - un = (mr_unit_t *)md_io_readerlock(ui); - setno = MD_MIN2SET(getminor(pb->b_edev)); - - if ((flag & MD_NOBLOCK) == 0) { - if (md_inc_iocount(setno) != 0) { - pb->b_flags |= B_ERROR; - pb->b_error = ENXIO; - pb->b_resid = pb->b_bcount; - md_kstat_waitq_exit(ui); - md_io_readerexit(ui); - biodone(pb); - return; - } - } else { - md_inc_iocount_noblock(setno); - } - - mnum = MD_SID(un); - colcnt = un->un_totalcolumncnt - 1; - count = pb->b_bcount; - - STAT_CHECK(raid_512, count == 512); - STAT_CHECK(raid_1024, count == 1024); - STAT_CHECK(raid_1024_8192, count > 1024 && count < 8192); - STAT_CHECK(raid_8192, count == 8192); - STAT_CHECK(raid_8192_bigger, count > 8192); - - (void *) md_unit_readerlock(ui); - if (!(flag & MD_STR_NOTTOP)) { - err = md_checkbuf(ui, (md_unit_t *)un, pb); /* check and map */ - if (err != 0) { - md_kstat_waitq_exit(ui); - md_io_readerexit(ui); - return; - } - } - md_unit_readerexit(ui); - - STAT_INC(raid_total_io); - - /* allocate a parent structure for the user I/O */ - ps = kmem_cache_alloc(raid_parent_cache, MD_ALLOCFLAGS); - raid_parent_init(ps); - - /* - * Save essential information from the original buffhdr - * in the md_save structure. - */ - ps->ps_un = un; - ps->ps_ui = ui; - ps->ps_bp = pb; - ps->ps_addr = pb->b_un.b_addr; - - if ((pb->b_flags & B_READ) == 0) { - ps->ps_flags |= MD_RPS_WRITE; - doing_writes = 1; - STAT_INC(raid_writes); - } else { - ps->ps_flags |= MD_RPS_READ; - doing_writes = 0; - STAT_INC(raid_reads); - } - - count = lbtodb(pb->b_bcount); /* transfer count (in blocks) */ - blkno = pb->b_lblkno; /* block number on device */ - addr = 0; - offset = 0; - ps->ps_pwfrags = 1; - ps->ps_frags = 1; - md_kstat_waitq_to_runq(ui); - - do { - cs = kmem_cache_alloc(raid_child_cache, MD_ALLOCFLAGS); - raid_child_init(cs); - cs->cs_ps = ps; - cs->cs_un = un; - cs->cs_mdunit = mnum; - cs->cs_strategy_flag = flag; - cs->cs_strategy_private = private; - cs->cs_addr = addr; - cs->cs_offset = offset; - count = raid_iosetup(un, blkno, count, cs); - if (cs->cs_flags & MD_RCS_LINE) { - blkno += (cs->cs_blkcnt * colcnt); - offset += (cs->cs_bcount * colcnt); - } else { - blkno += cs->cs_blkcnt; - offset += cs->cs_bcount; - } - /* for each cs bump up the ps_pwfrags and ps_frags fields */ - if (count) { - mutex_enter(&ps->ps_mx); - ps->ps_pwfrags++; - ps->ps_frags++; - mutex_exit(&ps->ps_mx); - if (doing_writes) - (void) raid_write(un, cs); - else - (void) raid_read(un, cs); - } - } while (count); - if (doing_writes) { - (void) raid_write(un, cs); - } else - (void) raid_read(un, cs); - - if (! (flag & MD_STR_NOTTOP) && panicstr) { - while (! (ps->ps_flags & MD_RPS_DONE)) { - md_daemon(1, &md_done_daemon); - drv_usecwait(10); - } - kmem_cache_free(raid_parent_cache, ps); - } -} - -/* - * NAMES: raid_snarf - * DESCRIPTION: RAID metadevice SNARF entry point - * PARAMETERS: md_snarfcmd_t cmd, - * set_t setno - * RETURNS: - */ -static int -raid_snarf(md_snarfcmd_t cmd, set_t setno) -{ - mr_unit_t *un; - mddb_recid_t recid; - int gotsomething; - int all_raid_gotten; - mddb_type_t typ1; - uint_t ncol; - mddb_de_ic_t *dep; - mddb_rb32_t *rbp; - size_t newreqsize; - mr_unit_t *big_un; - mr_unit32_od_t *small_un; - - - if (cmd == MD_SNARF_CLEANUP) - return (0); - - all_raid_gotten = 1; - gotsomething = 0; - typ1 = (mddb_type_t)md_getshared_key(setno, - raid_md_ops.md_driver.md_drivername); - recid = mddb_makerecid(setno, 0); - - while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0) { - if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) { - continue; - } - - dep = mddb_getrecdep(recid); - dep->de_flags = MDDB_F_RAID; - rbp = dep->de_rb; - switch (rbp->rb_revision) { - case MDDB_REV_RB: - case MDDB_REV_RBFN: - if ((rbp->rb_private & MD_PRV_CONVD) == 0) { - /* - * This means, we have an old and small record - * and this record hasn't already been - * converted. Before we create an incore - * metadevice from this we have to convert it to - * a big record. - */ - small_un = - (mr_unit32_od_t *)mddb_getrecaddr(recid); - ncol = small_un->un_totalcolumncnt; - newreqsize = sizeof (mr_unit_t) + - ((ncol - 1) * sizeof (mr_column_t)); - big_un = (mr_unit_t *)kmem_zalloc(newreqsize, - KM_SLEEP); - raid_convert((caddr_t)small_un, (caddr_t)big_un, - SMALL_2_BIG); - kmem_free(small_un, dep->de_reqsize); - dep->de_rb_userdata = big_un; - dep->de_reqsize = newreqsize; - un = big_un; - rbp->rb_private |= MD_PRV_CONVD; - } else { - /* - * Record has already been converted. Just - * get its address. - */ - un = (mr_unit_t *)mddb_getrecaddr(recid); - } - un->c.un_revision &= ~MD_64BIT_META_DEV; - break; - case MDDB_REV_RB64: - case MDDB_REV_RB64FN: - /* Big device */ - un = (mr_unit_t *)mddb_getrecaddr(recid); - un->c.un_revision |= MD_64BIT_META_DEV; - un->c.un_flag |= MD_EFILABEL; - break; - } - MDDB_NOTE_FN(rbp->rb_revision, un->c.un_revision); - - /* - * Create minor device node for snarfed entry. - */ - (void) md_create_minor_node(MD_MIN2SET(MD_SID(un)), MD_SID(un)); - - if (MD_UNIT(MD_SID(un)) != NULL) { - mddb_setrecprivate(recid, MD_PRV_PENDDEL); - continue; - } - all_raid_gotten = 0; - if (raid_build_incore((void *)un, 1) == 0) { - mddb_setrecprivate(recid, MD_PRV_GOTIT); - md_create_unit_incore(MD_SID(un), &raid_md_ops, 1); - gotsomething = 1; - } else if (un->mr_ic) { - kmem_free(un->un_column_ic, sizeof (mr_column_ic_t) * - un->un_totalcolumncnt); - kmem_free(un->mr_ic, sizeof (*un->mr_ic)); - } - } - - if (!all_raid_gotten) { - return (gotsomething); - } - - recid = mddb_makerecid(setno, 0); - while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0) - if (!(mddb_getrecprivate(recid) & MD_PRV_GOTIT)) - mddb_setrecprivate(recid, MD_PRV_PENDDEL); - - return (0); -} - -/* - * NAMES: raid_halt - * DESCRIPTION: RAID metadevice HALT entry point - * PARAMETERS: md_haltcmd_t cmd - - * set_t setno - - * RETURNS: - */ -static int -raid_halt(md_haltcmd_t cmd, set_t setno) -{ - set_t i; - mdi_unit_t *ui; - minor_t mnum; - - if (cmd == MD_HALT_CLOSE) - return (0); - - if (cmd == MD_HALT_OPEN) - return (0); - - if (cmd == MD_HALT_UNLOAD) - return (0); - - if (cmd == MD_HALT_CHECK) { - for (i = 0; i < md_nunits; i++) { - mnum = MD_MKMIN(setno, i); - if ((ui = MDI_UNIT(mnum)) == NULL) - continue; - if (ui->ui_opsindex != raid_md_ops.md_selfindex) - continue; - if (md_unit_isopen(ui)) - return (1); - } - return (0); - } - - if (cmd != MD_HALT_DOIT) - return (1); - - for (i = 0; i < md_nunits; i++) { - mnum = MD_MKMIN(setno, i); - if ((ui = MDI_UNIT(mnum)) == NULL) - continue; - if (ui->ui_opsindex != raid_md_ops.md_selfindex) - continue; - reset_raid((mr_unit_t *)MD_UNIT(mnum), mnum, 0); - } - return (0); -} - -/* - * NAMES: raid_close_all_devs - * DESCRIPTION: Close all the devices of the unit. - * PARAMETERS: mr_unit_t *un - pointer to unit structure - * RETURNS: - */ -void -raid_close_all_devs(mr_unit_t *un, int init_pw, int md_cflags) -{ - int i; - mr_column_t *device; - - for (i = 0; i < un->un_totalcolumncnt; i++) { - device = &un->un_column[i]; - if (device->un_devflags & MD_RAID_DEV_ISOPEN) { - ASSERT((device->un_dev != (md_dev64_t)0) && - (device->un_dev != NODEV64)); - if ((device->un_devstate & RCS_OKAY) && init_pw) - (void) init_pw_area(un, device->un_dev, - device->un_pwstart, i); - md_layered_close(device->un_dev, md_cflags); - device->un_devflags &= ~MD_RAID_DEV_ISOPEN; - } - } -} - -/* - * NAMES: raid_open_all_devs - * DESCRIPTION: Open all the components (columns) of the device unit. - * PARAMETERS: mr_unit_t *un - pointer to unit structure - * RETURNS: - */ -static int -raid_open_all_devs(mr_unit_t *un, int md_oflags) -{ - minor_t mnum = MD_SID(un); - int i; - int not_opened = 0; - int commit = 0; - int col = -1; - mr_column_t *device; - set_t setno = MD_MIN2SET(MD_SID(un)); - side_t side = mddb_getsidenum(setno); - mdkey_t key; - mdi_unit_t *ui = MDI_UNIT(mnum); - - ui->ui_tstate &= ~MD_INACCESSIBLE; - - for (i = 0; i < un->un_totalcolumncnt; i++) { - md_dev64_t tmpdev; - - device = &un->un_column[i]; - - if (COLUMN_STATE(un, i) & RCS_ERRED) { - not_opened++; - continue; - } - - if (device->un_devflags & MD_RAID_DEV_ISOPEN) - continue; - - tmpdev = device->un_dev; - /* - * Open by device id - */ - key = HOTSPARED(un, i) ? - device->un_hs_key : device->un_orig_key; - if ((md_getmajor(tmpdev) != md_major) && - md_devid_found(setno, side, key) == 1) { - tmpdev = md_resolve_bydevid(mnum, tmpdev, key); - } - if (md_layered_open(mnum, &tmpdev, md_oflags)) { - device->un_dev = tmpdev; - not_opened++; - continue; - } - device->un_dev = tmpdev; - device->un_devflags |= MD_RAID_DEV_ISOPEN; - } - - /* if open errors and errored devices are 1 then device can run */ - if (not_opened > 1) { - cmn_err(CE_WARN, - "md: %s failed to open. open error on %s\n", - md_shortname(MD_SID(un)), - md_devname(MD_UN2SET(un), device->un_orig_dev, NULL, 0)); - - ui->ui_tstate |= MD_INACCESSIBLE; - - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_OPEN_FAIL, SVM_TAG_METADEVICE, - MD_UN2SET(un), MD_SID(un)); - - return (not_opened > 1); - } - - for (i = 0; i < un->un_totalcolumncnt; i++) { - device = &un->un_column[i]; - if (device->un_devflags & MD_RAID_DEV_ISOPEN) { - if (device->un_devstate & RCS_LAST_ERRED) { - /* - * At this point in time there is a possibility - * that errors were the result of a controller - * failure with more than a single column on it - * so clear out last errored columns and let errors - * re-occur is necessary. - */ - raid_set_state(un, i, RCS_OKAY, 0); - commit++; - } - continue; - } - ASSERT(col == -1); - col = i; - } - - if (col != -1) { - raid_set_state(un, col, RCS_ERRED, 0); - commit++; - } - - if (commit) - raid_commit(un, NULL); - - if (col != -1) { - if (COLUMN_STATE(un, col) & RCS_ERRED) { - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ERRED, - SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); - } else if (COLUMN_STATE(un, col) & RCS_LAST_ERRED) { - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_LASTERRED, - SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); - } - } - - return (0); -} - -/* - * NAMES: raid_internal_open - * DESCRIPTION: Do the actual RAID open - * PARAMETERS: minor_t mnum - minor number of the RAID device - * int flag - - * int otyp - - * int md_oflags - RAID open flags - * RETURNS: 0 if successful, nonzero otherwise - */ -int -raid_internal_open(minor_t mnum, int flag, int otyp, int md_oflags) -{ - mr_unit_t *un; - mdi_unit_t *ui; - int err = 0; - int replay_error = 0; - - ui = MDI_UNIT(mnum); - ASSERT(ui != NULL); - - un = (mr_unit_t *)md_unit_openclose_enter(ui); - /* - * this MUST be checked before md_unit_isopen is checked. - * raid_init_columns sets md_unit_isopen to block reset, halt. - */ - if ((UNIT_STATE(un) & (RUS_INIT | RUS_DOI)) && - !(md_oflags & MD_OFLG_ISINIT)) { - md_unit_openclose_exit(ui); - return (EAGAIN); - } - - if ((md_oflags & MD_OFLG_ISINIT) || md_unit_isopen(ui)) { - err = md_unit_incopen(mnum, flag, otyp); - goto out; - } - - md_unit_readerexit(ui); - - un = (mr_unit_t *)md_unit_writerlock(ui); - if (raid_open_all_devs(un, md_oflags) == 0) { - if ((err = md_unit_incopen(mnum, flag, otyp)) != 0) { - md_unit_writerexit(ui); - un = (mr_unit_t *)md_unit_readerlock(ui); - raid_close_all_devs(un, 0, md_oflags); - goto out; - } - } else { - /* - * if this unit contains more than two errored components - * should return error and close all opened devices - */ - - md_unit_writerexit(ui); - un = (mr_unit_t *)md_unit_readerlock(ui); - raid_close_all_devs(un, 0, md_oflags); - md_unit_openclose_exit(ui); - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_OPEN_FAIL, SVM_TAG_METADEVICE, - MD_UN2SET(un), MD_SID(un)); - return (ENXIO); - } - - if (!(MD_STATUS(un) & MD_UN_REPLAYED)) { - replay_error = raid_replay(un); - MD_STATUS(un) |= MD_UN_REPLAYED; - } - - md_unit_writerexit(ui); - un = (mr_unit_t *)md_unit_readerlock(ui); - - if ((replay_error == RAID_RPLY_READONLY) && - ((flag & (FREAD | FWRITE)) == FREAD)) { - md_unit_openclose_exit(ui); - return (0); - } - - /* allocate hotspare if possible */ - (void) raid_hotspares(); - - -out: - md_unit_openclose_exit(ui); - return (err); -} -/* - * NAMES: raid_open - * DESCRIPTION: RAID metadevice OPEN entry point - * PARAMETERS: dev_t dev - - * int flag - - * int otyp - - * cred_t * cred_p - - * int md_oflags - - * RETURNS: - */ -/*ARGSUSED1*/ -static int -raid_open(dev_t *dev, int flag, int otyp, cred_t *cred_p, int md_oflags) -{ - int error = 0; - - if (error = raid_internal_open(getminor(*dev), flag, otyp, md_oflags)) { - return (error); - } - return (0); -} - -/* - * NAMES: raid_internal_close - * DESCRIPTION: RAID metadevice CLOSE actual implementation - * PARAMETERS: minor_t - minor number of the RAID device - * int otyp - - * int init_pw - - * int md_cflags - RAID close flags - * RETURNS: 0 if successful, nonzero otherwise - */ -/*ARGSUSED*/ -int -raid_internal_close(minor_t mnum, int otyp, int init_pw, int md_cflags) -{ - mdi_unit_t *ui = MDI_UNIT(mnum); - mr_unit_t *un; - int err = 0; - - /* single thread */ - un = (mr_unit_t *)md_unit_openclose_enter(ui); - - /* count closed */ - if ((err = md_unit_decopen(mnum, otyp)) != 0) - goto out; - /* close devices, if necessary */ - if (! md_unit_isopen(ui) || (md_cflags & MD_OFLG_PROBEDEV)) { - raid_close_all_devs(un, init_pw, md_cflags); - } - - /* unlock, return success */ -out: - md_unit_openclose_exit(ui); - return (err); -} - -/* - * NAMES: raid_close - * DESCRIPTION: RAID metadevice close entry point - * PARAMETERS: dev_t dev - - * int flag - - * int otyp - - * cred_t * cred_p - - * int md_oflags - - * RETURNS: - */ -/*ARGSUSED1*/ -static int -raid_close(dev_t dev, int flag, int otyp, cred_t *cred_p, int md_cflags) -{ - int retval; - - (void) md_io_writerlock(MDI_UNIT(getminor(dev))); - retval = raid_internal_close(getminor(dev), otyp, 1, md_cflags); - (void) md_io_writerexit(MDI_UNIT(getminor(dev))); - return (retval); -} - -/* - * raid_probe_close_all_devs - */ -void -raid_probe_close_all_devs(mr_unit_t *un) -{ - int i; - mr_column_t *device; - - for (i = 0; i < un->un_totalcolumncnt; i++) { - device = &un->un_column[i]; - - if (device->un_devflags & MD_RAID_DEV_PROBEOPEN) { - md_layered_close(device->un_dev, - MD_OFLG_PROBEDEV); - device->un_devflags &= ~MD_RAID_DEV_PROBEOPEN; - } - } -} -/* - * Raid_probe_dev: - * - * On entry the unit writerlock is held - */ -static int -raid_probe_dev(mdi_unit_t *ui, minor_t mnum) -{ - mr_unit_t *un; - int i; - int not_opened = 0; - int commit = 0; - int col = -1; - mr_column_t *device; - int md_devopen = 0; - - if (md_unit_isopen(ui)) - md_devopen++; - - un = MD_UNIT(mnum); - /* - * If the state has been set to LAST_ERRED because - * of an error when the raid device was open at some - * point in the past, don't probe. We really don't want - * to reset the state in this case. - */ - if (UNIT_STATE(un) == RUS_LAST_ERRED) - return (0); - - ui->ui_tstate &= ~MD_INACCESSIBLE; - - for (i = 0; i < un->un_totalcolumncnt; i++) { - md_dev64_t tmpdev; - - device = &un->un_column[i]; - if (COLUMN_STATE(un, i) & RCS_ERRED) { - not_opened++; - continue; - } - - tmpdev = device->un_dev; - /* - * Currently the flags passed are not needed since - * there cannot be an underlying metadevice. However - * they are kept here for consistency. - * - * Open by device id - */ - tmpdev = md_resolve_bydevid(mnum, tmpdev, HOTSPARED(un, i)? - device->un_hs_key : device->un_orig_key); - if (md_layered_open(mnum, &tmpdev, - MD_OFLG_CONT_ERRS | MD_OFLG_PROBEDEV)) { - device->un_dev = tmpdev; - not_opened++; - continue; - } - device->un_dev = tmpdev; - - device->un_devflags |= MD_RAID_DEV_PROBEOPEN; - } - - /* - * The code below is careful on setting the LAST_ERRED state. - * - * If open errors and exactly one device has failed we can run. - * If more then one device fails we have to figure out when to set - * LAST_ERRED state. The rationale is to avoid unnecessary resyncs - * since they are painful and time consuming. - * - * When more than one component/column fails there are 2 scenerios. - * - * 1. Metadevice has NOT been opened: In this case, the behavior - * mimics the open symantics. ie. Only the first failed device - * is ERRED and LAST_ERRED is not set. - * - * 2. Metadevice has been opened: Here the read/write sematics are - * followed. The first failed devicce is ERRED and on the next - * failed device LAST_ERRED is set. - */ - - if (not_opened > 1 && !md_devopen) { - cmn_err(CE_WARN, - "md: %s failed to open. open error on %s\n", - md_shortname(MD_SID(un)), - md_devname(MD_UN2SET(un), device->un_orig_dev, NULL, 0)); - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_OPEN_FAIL, SVM_TAG_METADEVICE, - MD_UN2SET(un), MD_SID(un)); - raid_probe_close_all_devs(un); - ui->ui_tstate |= MD_INACCESSIBLE; - return (not_opened > 1); - } - - if (!md_devopen) { - for (i = 0; i < un->un_totalcolumncnt; i++) { - device = &un->un_column[i]; - if (device->un_devflags & MD_RAID_DEV_PROBEOPEN) { - if (device->un_devstate & RCS_LAST_ERRED) { - /* - * At this point in time there is a - * possibility that errors were the - * result of a controller failure with - * more than a single column on it so - * clear out last errored columns and - * let errors re-occur is necessary. - */ - raid_set_state(un, i, RCS_OKAY, 0); - commit++; - } - continue; - } - ASSERT(col == -1); - /* - * note if multiple devices are failing then only - * the last one is marked as error - */ - col = i; - } - - if (col != -1) { - raid_set_state(un, col, RCS_ERRED, 0); - commit++; - } - - } else { - for (i = 0; i < un->un_totalcolumncnt; i++) { - device = &un->un_column[i]; - - /* if we have LAST_ERRED go ahead and commit. */ - if (un->un_state & RUS_LAST_ERRED) - break; - /* - * could not open the component - */ - - if (!(device->un_devflags & MD_RAID_DEV_PROBEOPEN)) { - col = i; - raid_set_state(un, col, RCS_ERRED, 0); - commit++; - } - } - } - - if (commit) - raid_commit(un, NULL); - - if (col != -1) { - if (COLUMN_STATE(un, col) & RCS_ERRED) { - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ERRED, - SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); - } else if (COLUMN_STATE(un, col) & RCS_LAST_ERRED) { - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_LASTERRED, - SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); - } - } - - raid_probe_close_all_devs(un); - return (0); -} - -static int -raid_imp_set( - set_t setno -) -{ - mddb_recid_t recid; - int i, gotsomething; - mddb_type_t typ1; - mddb_de_ic_t *dep; - mddb_rb32_t *rbp; - mr_unit_t *un64; - mr_unit32_od_t *un32; - md_dev64_t self_devt; - minor_t *self_id; /* minor needs to be updated */ - md_parent_t *parent_id; /* parent needs to be updated */ - mddb_recid_t *record_id; /* record id needs to be updated */ - hsp_t *hsp_id; - - gotsomething = 0; - - typ1 = (mddb_type_t)md_getshared_key(setno, - raid_md_ops.md_driver.md_drivername); - recid = mddb_makerecid(setno, 0); - - while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0) { - if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) - continue; - - dep = mddb_getrecdep(recid); - rbp = dep->de_rb; - - switch (rbp->rb_revision) { - case MDDB_REV_RB: - case MDDB_REV_RBFN: - /* - * Small device - */ - un32 = (mr_unit32_od_t *)mddb_getrecaddr(recid); - self_id = &(un32->c.un_self_id); - parent_id = &(un32->c.un_parent); - record_id = &(un32->c.un_record_id); - hsp_id = &(un32->un_hsp_id); - - for (i = 0; i < un32->un_totalcolumncnt; i++) { - mr_column32_od_t *device; - - device = &un32->un_column[i]; - if (!md_update_minor(setno, mddb_getsidenum - (setno), device->un_orig_key)) - goto out; - - if (device->un_hs_id != 0) - device->un_hs_id = - MAKERECID(setno, device->un_hs_id); - } - break; - case MDDB_REV_RB64: - case MDDB_REV_RB64FN: - un64 = (mr_unit_t *)mddb_getrecaddr(recid); - self_id = &(un64->c.un_self_id); - parent_id = &(un64->c.un_parent); - record_id = &(un64->c.un_record_id); - hsp_id = &(un64->un_hsp_id); - - for (i = 0; i < un64->un_totalcolumncnt; i++) { - mr_column_t *device; - - device = &un64->un_column[i]; - if (!md_update_minor(setno, mddb_getsidenum - (setno), device->un_orig_key)) - goto out; - - if (device->un_hs_id != 0) - device->un_hs_id = - MAKERECID(setno, device->un_hs_id); - } - break; - } - - /* - * If this is a top level and a friendly name metadevice, - * update its minor in the namespace. - */ - if ((*parent_id == MD_NO_PARENT) && - ((rbp->rb_revision == MDDB_REV_RBFN) || - (rbp->rb_revision == MDDB_REV_RB64FN))) { - - self_devt = md_makedevice(md_major, *self_id); - if (!md_update_top_device_minor(setno, - mddb_getsidenum(setno), self_devt)) - goto out; - } - - /* - * Update unit with the imported setno - */ - mddb_setrecprivate(recid, MD_PRV_GOTIT); - - *self_id = MD_MKMIN(setno, MD_MIN2UNIT(*self_id)); - - if (*hsp_id != -1) - *hsp_id = MAKERECID(setno, DBID(*hsp_id)); - - if (*parent_id != MD_NO_PARENT) - *parent_id = MD_MKMIN(setno, MD_MIN2UNIT(*parent_id)); - *record_id = MAKERECID(setno, DBID(*record_id)); - gotsomething = 1; - } - -out: - return (gotsomething); -} - -static md_named_services_t raid_named_services[] = { - {raid_hotspares, "poke hotspares" }, - {raid_rename_check, MDRNM_CHECK }, - {raid_rename_lock, MDRNM_LOCK }, - {(intptr_t (*)()) raid_rename_unlock, MDRNM_UNLOCK }, - {(intptr_t (*)()) raid_probe_dev, "probe open test" }, - {NULL, 0 } -}; - -md_ops_t raid_md_ops = { - raid_open, /* open */ - raid_close, /* close */ - md_raid_strategy, /* strategy */ - NULL, /* print */ - NULL, /* dump */ - NULL, /* read */ - NULL, /* write */ - md_raid_ioctl, /* ioctl, */ - raid_snarf, /* raid_snarf */ - raid_halt, /* raid_halt */ - NULL, /* aread */ - NULL, /* awrite */ - raid_imp_set, /* import set */ - raid_named_services -}; - -static void -init_init() -{ - /* default to a second */ - if (md_wr_wait == 0) - md_wr_wait = md_hz >> 1; - - raid_parent_cache = kmem_cache_create("md_raid_parent", - sizeof (md_raidps_t), 0, raid_parent_constructor, - raid_parent_destructor, raid_run_queue, NULL, NULL, 0); - raid_child_cache = kmem_cache_create("md_raid_child", - sizeof (md_raidcs_t) - sizeof (buf_t) + biosize(), 0, - raid_child_constructor, raid_child_destructor, - raid_run_queue, NULL, NULL, 0); - raid_cbuf_cache = kmem_cache_create("md_raid_cbufs", - sizeof (md_raidcbuf_t), 0, raid_cbuf_constructor, - raid_cbuf_destructor, raid_run_queue, NULL, NULL, 0); -} - -static void -fini_uninit() -{ - kmem_cache_destroy(raid_parent_cache); - kmem_cache_destroy(raid_child_cache); - kmem_cache_destroy(raid_cbuf_cache); - raid_parent_cache = raid_child_cache = raid_cbuf_cache = NULL; -} - -/* define the module linkage */ -MD_PLUGIN_MISC_MODULE("raid module", init_init(), fini_uninit()) diff --git a/usr/src/uts/common/io/lvm/raid/raid_hotspare.c b/usr/src/uts/common/io/lvm/raid/raid_hotspare.c deleted file mode 100644 index e3363750c181..000000000000 --- a/usr/src/uts/common/io/lvm/raid/raid_hotspare.c +++ /dev/null @@ -1,379 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -#pragma ident "%Z%%M% %I% %E% SMI" - -/* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* - * NAME: raid_hotspare.c - * DESCRIPTION: RAID driver source file containing routines related to - * hospare operation. - * ROUTINES PROVIDED FOR EXTERNAL USE: - * raid_hs_release() - release a hotspare device - * raid_hotspares() - prompt the hospare daemon to attempt needed hotspare work - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -extern mdq_anchor_t md_hs_daemon; -static daemon_request_t hotspare_request; - -extern md_set_t md_set[]; -extern md_ops_t raid_md_ops; - -/* - * NAME: raid_hs_release - * - * DESCRIPTION: Release the hotspare. - * - * PARAMETERS: int error - indication of error on hotspare - * mr_unit_t *un - raid unit - * mddb_recid_t *recids - output records to commit revised hs info - * int hs_index - component to release - * - * LOCKS: Expects Unit Writer Lock to be held across call. - */ -void -raid_hs_release( - hs_cmds_t cmd, - mr_unit_t *un, - mddb_recid_t *recids, - int hs_index -) -{ - mr_column_t *col; - - col = &un->un_column[hs_index]; - - /* close the hotspare device */ - if (col->un_devflags & MD_RAID_DEV_ISOPEN) { - md_layered_close(col->un_dev, MD_OFLG_NULL); - col->un_devflags &= ~MD_RAID_DEV_ISOPEN; - } - - /* return the hotspare to the pool */ - (void) md_hot_spare_ifc(cmd, un->un_hsp_id, 0, 0, recids, - &col->un_hs_key, NULL, NULL); - - col->un_hs_pwstart = 0; - col->un_hs_devstart = 0; - col->un_hs_id = (mddb_recid_t)0; - col->un_hs_key = 0; -} - - -/* - * NAME: check_comp_4_hs - * - * DESCRIPTION: Check whether the input component has an error and can be - * backed with a hot spare (RCS_ERRED state), and initiate - * a resync if so. - * - * PARAMETERS: mr_unit_t *un - raid unit - * int hs_index - component to check - * - * LOCKS: Expects Unit Writer Lock to be held upon entrance. Releases - * the lock prior to calling raid_resync_unit, then reacquires - * it before returning. - */ -static void -check_comp_4_hs( - mr_unit_t *un, - int hs_index -) -{ - mddb_recid_t recids[3]; - minor_t mnum = MD_SID(un); - mdi_unit_t *ui; - rcs_state_t state; - diskaddr_t size; - int err; - mr_column_t *col; - md_error_t mde = mdnullerror; - char devname[MD_MAX_CTDLEN]; - char hs_devname[MD_MAX_CTDLEN]; - set_t setno; - md_dev64_t tmpdev; - diskaddr_t tmpdaddr; - - - /* initialize */ - setno = MD_UN2SET(un); - ui = MDI_UNIT(mnum); - md_unit_readerexit(ui); - (void) md_io_writerlock(ui); - un = (mr_unit_t *)md_unit_writerlock(ui); - col = &un->un_column[hs_index]; - - /* - * add a hotspare for erred column only if not resyncing - */ - if ((!(COLUMN_STATE(un, hs_index) & RCS_ERRED)) || - (raid_state_cnt(un, (RCS_ERRED | RCS_LAST_ERRED)) != 1) || - (raid_state_cnt(un, RCS_RESYNC) > 0)) { - goto errout; - } - - recids[0] = 0; - recids[1] = 0; - /* if there is already a hotspare then just return */ - if (HOTSPARED(un, hs_index) && (col->un_devstate & RCS_ERRED)) { - raid_hs_release(HS_BAD, un, &recids[0], hs_index); - cmn_err(CE_WARN, "md: %s: %s hotspare errored and released", - md_shortname(mnum), - md_devname(MD_MIN2SET(mnum), col->un_dev, NULL, 0)); - col->un_dev = col->un_orig_dev; - col->un_pwstart = col->un_orig_pwstart; - col->un_devstart = col->un_orig_devstart; - raid_commit(un, recids); - - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_HS_FREED, SVM_TAG_METADEVICE, - setno, MD_SID(un)); - } - ASSERT(!HOTSPARED(un, hs_index)); - - state = col->un_devstate; - size = col->un_pwstart + un->un_pwsize + - (un->un_segsize * un->un_segsincolumn); - -again: - /* quit if resync is already active */ - col->un_devflags |= MD_RAID_REGEN_RESYNC; - if (resync_request(mnum, hs_index, 0, NULL)) - goto errout; - - recids[0] = 0; - recids[1] = 0; - - tmpdev = col->un_dev; - tmpdaddr = col->un_hs_pwstart; - - /* get a hotspare */ - if (md_hot_spare_ifc(HS_GET, un->un_hsp_id, size, - ((col->un_orig_pwstart >= 1) && - (col->un_orig_pwstart != MD_DISKADDR_ERROR)), - &col->un_hs_id, &col->un_hs_key, &tmpdev, &tmpdaddr) != 0) { - col->un_dev = tmpdev; - col->un_hs_pwstart = tmpdaddr; - release_resync_request(mnum); - raid_set_state(un, hs_index, state, 1); - goto errout; - } - - col->un_hs_pwstart = tmpdaddr; - - /* - * record id is filled in by raid_commit, recids[0] filled in by - * md_hot_spare_ifc if needed - */ - recids[0] = col->un_hs_id; - recids[1] = 0; - - /* - * close the device and open the hot spare. The device should - * never be a hotspare here. - */ - if (col->un_devflags & MD_RAID_DEV_ISOPEN) { - md_layered_close(col->un_orig_dev, MD_OFLG_NULL); - col->un_devflags &= ~MD_RAID_DEV_ISOPEN; - } - /* - * Try open by device id - */ - tmpdev = md_resolve_bydevid(mnum, tmpdev, col->un_hs_key); - if (md_layered_open(mnum, &tmpdev, MD_OFLG_NULL)) { - md_dev64_t hs_dev = tmpdev; - /* cannot open return to orig */ - raid_hs_release(HS_BAD, un, &recids[0], hs_index); - release_resync_request(mnum); - raid_set_state(un, hs_index, state, 1); - col->un_dev = col->un_orig_dev; - col->un_devstart = col->un_orig_devstart; - col->un_pwstart = col->un_orig_pwstart; - col->un_devflags &= ~MD_RAID_DEV_ISOPEN; - raid_commit(un, recids); - cmn_err(CE_WARN, "md: %s: open error of hotspare %s", - md_shortname(mnum), - md_devname(MD_MIN2SET(mnum), hs_dev, NULL, 0)); - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_HS_FREED, SVM_TAG_HS, setno, - MD_SID(un)); - goto again; - } - - col->un_dev = tmpdev; - - col->un_devflags |= MD_RAID_DEV_ISOPEN; - - /* - * move the values into the device fields. Since in some cases - * the pwstart is not zero this must be added into the start of - * the hotspare to avoid over writting the label - */ - col->un_hs_pwstart += col->un_orig_pwstart; - col->un_pwstart = col->un_hs_pwstart; - col->un_hs_devstart = col->un_hs_pwstart + un->un_pwsize; - col->un_devstart = col->un_hs_devstart; - - /* commit unit and hotspare records and release lock */ - raid_commit(un, recids); - md_unit_writerexit(ui); - md_io_writerexit(ui); - - err = raid_resync_unit(mnum, &mde); - - /* if resync fails, transition back to erred state and reset */ - if (err) { - /* reaquire unit writerr lock */ - un = (mr_unit_t *)md_unit_writerlock(ui); - - raid_set_state(un, hs_index, RCS_ERRED, 0); - - /* - * close the hotspare and return it. Then restore the - * original device back to the original state - */ - raid_hs_release(HS_FREE, un, &recids[0], hs_index); - col->un_dev = col->un_orig_dev; - col->un_devstart = col->un_orig_devstart; - col->un_pwstart = col->un_orig_pwstart; - raid_commit(un, recids); - md_unit_writerexit(ui); - un = (mr_unit_t *)md_unit_readerlock(ui); - return; - } - - setno = MD_MIN2SET(mnum); - - (void) md_devname(setno, col->un_orig_dev, devname, - sizeof (devname)); - (void) md_devname(setno, col->un_dev, hs_devname, - sizeof (hs_devname)); - - cmn_err(CE_NOTE, "md: %s: hotspared device %s with %s", - md_shortname(mnum), devname, hs_devname); - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_HOTSPARED, SVM_TAG_HS, setno, - MD_SID(un)); - (void) md_unit_readerlock(ui); - return; - -errout: - md_unit_writerexit(ui); - md_io_writerexit(ui); - un = (mr_unit_t *)md_unit_readerlock(ui); -} - -/* - * NAME: check_4_hs - * - * DESCRIPTION: Check every component of every raid unit for any device which - * needs to be backed with a hot spare. - * - * PARAMETERS: daemon_request_t *dr - hotspare request daemon - * - * LOCKS: Acquires and releases the Hotspare Request Lock and the RAID - * Driver Lock. Acquires the Unit Writer Lock which is released - * in check_comp_4_hs. - */ -static void -check_4_hs(daemon_request_t *dr) -{ - mdi_unit_t *ui; - mr_unit_t *un; - md_link_t *next; - int i; - - mutex_enter(&dr->dr_mx); /* clear up front so can poke */ - dr->dr_pending = 0; /* again in low level routine if */ - mutex_exit(&dr->dr_mx); /* something found to do */ - - /* - * Scan raid unit list and call component hotspare check routine for - * each component of each unit where resync is inactive. - */ - rw_enter(&raid_md_ops.md_link_rw.lock, RW_READER); - for (next = raid_md_ops.md_head; next != NULL; next = next->ln_next) { - ui = MDI_UNIT(next->ln_id); - un = (mr_unit_t *)md_unit_readerlock(ui); - if (!(un->c.un_status & MD_UN_RESYNC_ACTIVE) && - (raid_state_cnt(un, RCS_RESYNC) == 0) && - (UNIT_STATE(un) & RUS_ERRED) && - (un->un_hsp_id != -1) && - (raid_state_cnt(un, RCS_ERRED) == 1)) { - for (i = 0; i < un->un_totalcolumncnt; i++) - if (un->un_column[i].un_devstate == RCS_ERRED) - check_comp_4_hs(un, i); - } - md_unit_readerexit(ui); - } - rw_exit(&raid_md_ops.md_link_rw.lock); -} - -/* - * NAME: raid_hotspares - * - * DESCRIPTION: Initiate a check of all RAID devices for components which - * may require a hot spare, if it is not already running. - * - * PARAMETERS: NONE - * - * LOCKS: Acquires and releases the Hotspare Request Lock. - */ -intptr_t -raid_hotspares() -{ - /* if available, make request for hotspare to master daemon */ - mutex_enter(&hotspare_request.dr_mx); - if (hotspare_request.dr_pending == 0) { - hotspare_request.dr_pending = 1; - daemon_request(&md_hs_daemon, - check_4_hs, (daemon_queue_t *)&hotspare_request, REQ_OLD); - } - mutex_exit(&hotspare_request.dr_mx); - return (0); -} diff --git a/usr/src/uts/common/io/lvm/raid/raid_ioctl.c b/usr/src/uts/common/io/lvm/raid/raid_ioctl.c deleted file mode 100644 index 3910d85c62a5..000000000000 --- a/usr/src/uts/common/io/lvm/raid/raid_ioctl.c +++ /dev/null @@ -1,3156 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - * Copyright 2012 Milan Jurik. All rights reserved. - */ - -/* - * NAME: raid_ioctl.c - * - * DESCRIPTION: RAID driver source file containing IOCTL operations. - * - * ROUTINES PROVIDED FOR EXTERNAL USE: - * raid_commit() - commits MD database updates for a RAID metadevice - * md_raid_ioctl() - RAID metadevice IOCTL operations entry point. - * - * ROUTINES PROVIDED FOR INTERNAL USE: - * raid_getun() - Performs unit checking on a RAID metadevice - * init_col_nextio() - normal backend when zeroing column of RAID metadevice. - * init_col_int() - I/O interrupt while zeroing column of RAID metadevice. - * raid_init_columns() - Zero one or more columns of a RAID metadevice. - * raid_set() - used to create a RAID metadevice - * raid_get() - used to get the unit structure of a RAID metadevice - * raid_replace() - used to replace a component of a RAID metadevice - * raid_grow() - Concatenate to a RAID metadevice - * raid_change() - change dynamic values of a RAID metadevice - * raid_reset() - used to reset (clear / remove) a RAID metadevice - * raid_get_geom() - used to get the geometry of a RAID metadevice - * raid_get_vtoc() - used to get the VTOC on a RAID metadevice - * raid_set_vtoc() - used to set the VTOC on a RAID metadevice - * raid_get_extvtoc() - used to get the extended VTOC on a RAID metadevice - * raid_set_extvtoc() - used to set the extended VTOC on a RAID metadevice - * raid_getdevs() - return all devices within a RAID metadevice - * raid_admin_ioctl() - IOCTL operations unique to metadevices and RAID - */ - - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -extern int md_status; -extern unit_t md_nunits; -extern set_t md_nsets; -extern md_set_t md_set[]; -extern md_ops_t raid_md_ops; -extern major_t md_major; -extern md_krwlock_t md_unit_array_rw; -extern mdq_anchor_t md_done_daemon; -extern mdq_anchor_t md_ff_daemonq; -extern int mdopen(); -extern int mdclose(); -extern void md_probe_one(probe_req_t *); -extern int md_init_probereq(md_probedev_impl_t *, - daemon_queue_t **); -extern md_resync_t md_cpr_resync; - - -extern void dump_mr_unit(mr_unit_t *); - -typedef struct raid_ci { - DAEMON_QUEUE - struct raid_ci *ci_next; - mr_unit_t *ci_un; - int ci_col; - int ci_err; - int ci_flag; - size_t ci_zerosize; - diskaddr_t ci_blkno; - diskaddr_t ci_lastblk; - buf_t ci_buf; -} raid_ci_t; -/* values for the ci_flag */ -#define COL_INITING (0x0001) -#define COL_INIT_DONE (0x0002) -#define COL_READY (0x0004) - -/* - * NAME: raid_getun - * DESCRIPTION: performs a lot of unit checking on a RAID metadevice - * PARAMETERS: minor_t mnum - minor device number for RAID unit - * md_error_t *mde - pointer to error reporting structure - * int flags - pointer to error reporting structure - * STALE_OK - allow stale MD memory - * NO_OLD - unit must not exist - * NO_LOCK - no IOCTL lock needed - * WR_LOCK - write IOCTL lock needed - * RD_LOCK - read IOCTL lock needed - * IOLOCK *lock - pointer to IOCTL lock - * - * LOCKS: obtains unit reader or writer lock via IOLOCK - * - */ -static mr_unit_t * -raid_getun(minor_t mnum, md_error_t *mde, int flags, IOLOCK *lock) -{ - mr_unit_t *un; - mdi_unit_t *ui; - set_t setno = MD_MIN2SET(mnum); - - if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits)) { - (void) mdmderror(mde, MDE_INVAL_UNIT, mnum); - return (NULL); - } - - if (!(flags & STALE_OK)) { - if (md_get_setstatus(setno) & MD_SET_STALE) { - (void) mdmddberror(mde, MDE_DB_STALE, mnum, setno); - return (NULL); - } - } - - ui = MDI_UNIT(mnum); - if (flags & NO_OLD) { - if (ui != NULL) { - (void) mdmderror(mde, MDE_UNIT_ALREADY_SETUP, mnum); - return (NULL); - } - return ((mr_unit_t *)1); - } - - if (ui == NULL) { - (void) mdmderror(mde, MDE_UNIT_NOT_SETUP, mnum); - return (NULL); - } - if (flags & ARRAY_WRITER) - md_array_writer(lock); - else if (flags & ARRAY_READER) - md_array_reader(lock); - - if (!(flags & NO_LOCK)) { - if (flags & WR_LOCK) { - (void) md_ioctl_io_lock(lock, ui); - (void) md_ioctl_writerlock(lock, ui); - } else /* RD_LOCK */ - (void) md_ioctl_readerlock(lock, ui); - } - un = (mr_unit_t *)MD_UNIT(mnum); - - if (un->c.un_type != MD_METARAID) { - (void) mdmderror(mde, MDE_NOT_RAID, mnum); - return (NULL); - } - - return (un); -} - - -/* - * NAME: raid_commit - * DESCRIPTION: commits MD database updates for a RAID metadevice - * PARAMETERS: mr_unit_t *un - RAID unit to update in the MD database - * mddb_recid_t *extras - array of other record IDs to update - * - * LOCKS: assumes caller holds unit writer lock - * - */ -void -raid_commit(mr_unit_t *un, mddb_recid_t *extras) -{ - mddb_recid_t *recids; - int ri = 0; - int nrecids = 0; - - if (md_get_setstatus(MD_UN2SET(un)) & MD_SET_STALE) - return; - - /* Count the extra recids */ - if (extras != NULL) { - while (extras[nrecids] != 0) { - nrecids++; - } - } - - /* - * Allocate space for two recids in addition to the extras: - * one for the unit structure, one for the null terminator. - */ - nrecids += 2; - recids = (mddb_recid_t *) - kmem_zalloc(nrecids * sizeof (mddb_recid_t), KM_SLEEP); - - if (un != NULL) { - ASSERT(MDI_UNIT(MD_SID(un)) ? UNIT_WRITER_HELD(un) : 1); - recids[ri++] = un->c.un_record_id; - } - - if (extras != NULL) { - while (*extras != 0) { - recids[ri++] = *extras; - extras++; - } - } - - if (ri > 0) { - mddb_commitrecs_wrapper(recids); - } - - kmem_free(recids, nrecids * sizeof (mddb_recid_t)); -} - -static int -raid_check_pw(mr_unit_t *un) -{ - buf_t bp; - char *buf; - mr_column_t *colptr; - minor_t mnum = MD_SID(un); - int i; - int err = 0; - minor_t unit; - - buf = kmem_zalloc((uint_t)DEV_BSIZE, KM_SLEEP); - - for (i = 0; i < un->un_totalcolumncnt; i++) { - md_dev64_t tmpdev; - - colptr = &un->un_column[i]; - - tmpdev = colptr->un_dev; - /* - * Open by device id - * If this device is hotspared - * use the hotspare key - */ - tmpdev = md_resolve_bydevid(mnum, tmpdev, HOTSPARED(un, i) ? - colptr->un_hs_key : colptr->un_orig_key); - if (md_layered_open(mnum, &tmpdev, MD_OFLG_NULL)) { - colptr->un_dev = tmpdev; - return (1); - } - colptr->un_dev = tmpdev; - - bzero((caddr_t)&bp, sizeof (buf_t)); - bp.b_back = &bp; - bp.b_forw = &bp; - bp.b_flags = B_READ | B_BUSY; - sema_init(&bp.b_io, 0, NULL, - SEMA_DEFAULT, NULL); - sema_init(&bp.b_sem, 0, NULL, - SEMA_DEFAULT, NULL); - bp.b_edev = md_dev64_to_dev(colptr->un_dev); - bp.b_lblkno = colptr->un_pwstart; - bp.b_bcount = DEV_BSIZE; - bp.b_bufsize = DEV_BSIZE; - bp.b_un.b_addr = (caddr_t)buf; - bp.b_offset = -1; - (void) md_call_strategy(&bp, 0, NULL); - if (biowait(&bp)) - err = 1; - if (i == 0) { - if (un->c.un_revision & MD_64BIT_META_DEV) { - unit = ((raid_pwhdr_t *)buf)->rpw_unit; - } else { - unit = ((raid_pwhdr32_od_t *)buf)->rpw_unit; - } - } - /* - * depending upon being an 64bit or 32 bit raid, the - * pre write headers have different layout - */ - if (un->c.un_revision & MD_64BIT_META_DEV) { - if ((((raid_pwhdr_t *)buf)->rpw_column != i) || - (((raid_pwhdr_t *)buf)->rpw_unit != unit)) - err = 1; - } else { - if ((((raid_pwhdr32_od_t *)buf)->rpw_column != i) || - (((raid_pwhdr32_od_t *)buf)->rpw_unit != unit)) - err = 1; - } - md_layered_close(colptr->un_dev, MD_OFLG_NULL); - if (err) - break; - } - kmem_free(buf, DEV_BSIZE); - return (err); -} - -/* - * NAME: init_col_nextio - * DESCRIPTION: normal backend process when zeroing column of a RAID metadevice. - * PARAMETERS: raid_ci_t *cur - struct for column being zeroed - * - * LOCKS: assumes caller holds unit reader lock, - * preiodically releases and reacquires unit reader lock, - * broadcasts on unit conditional variable (un_cv) - * - */ -#define INIT_RLS_CNT 10 -static void -init_col_nextio(raid_ci_t *cur) -{ - mr_unit_t *un; - - un = cur->ci_un; - - cur->ci_blkno += cur->ci_zerosize; - - mutex_enter(&un->un_mx); - /* ===> update un_percent_done */ - un->un_init_iocnt += btodb(cur->ci_buf.b_bcount); - mutex_exit(&un->un_mx); - - /* - * When gorwing a device, normal I/O is still going on. - * The init thread still holds the unit reader lock which - * prevents I/O from doing state changes. - * So every INIT_RLS_CNT init I/Os, we will release the - * unit reader lock. - * - * CAVEAT: - * We know we are in the middle of a grow operation and the - * unit cannot be grown or removed (through reset or halt) - * so the mr_unit_t structure will not move or disappear. - * In addition, we know that only one of the init I/Os - * can be in col_init_nextio at a time because they are - * placed on the md_done_daemon queue and md only processes - * one element of this queue at a time. In addition, any - * code that needs to acquire the unit writer lock to change - * state is supposed to be on the md_mstr_daemon queue so - * it can be processing while we sit here waiting to get the - * unit reader lock back. - */ - - if (cur->ci_blkno < cur->ci_lastblk) { - /* truncate last chunk to end_addr if needed */ - if (cur->ci_blkno + cur->ci_zerosize > cur->ci_lastblk) { - cur->ci_zerosize = (size_t) - (cur->ci_lastblk - cur->ci_blkno); - } - - /* set address and length for I/O bufs */ - cur->ci_buf.b_bufsize = dbtob(cur->ci_zerosize); - cur->ci_buf.b_bcount = dbtob(cur->ci_zerosize); - cur->ci_buf.b_lblkno = cur->ci_blkno; - - (void) md_call_strategy(&cur->ci_buf, MD_STR_NOTTOP, NULL); - return; - } - /* finished initializing this column */ - mutex_enter(&un->un_mx); - cur->ci_flag = COL_INIT_DONE; - uniqtime32(&un->un_column[cur->ci_col].un_devtimestamp); - mutex_exit(&un->un_mx); - cv_broadcast(&un->un_cv); -} - -/* - * NAME: init_col_int - * DESCRIPTION: I/O interrupt while zeroing column of a RAID metadevice. - * PARAMETERS: buf_t *cb - I/O buffer for which interrupt occurred - * - * LOCKS: assumes caller holds unit reader or writer lock - * - */ -static int -init_col_int(buf_t *cb) -{ - raid_ci_t *cur; - - cur = (raid_ci_t *)cb->b_chain; - if (cb->b_flags & B_ERROR) { - mutex_enter(&cur->ci_un->un_mx); - cur->ci_err = EIO; - mutex_exit(&cur->ci_un->un_mx); - cv_broadcast(&cur->ci_un->un_cv); - return (1); - } - daemon_request(&md_done_daemon, init_col_nextio, - (daemon_queue_t *)cur, REQ_OLD); - return (1); -} - -/* - * NAME: raid_init_columns - * DESCRIPTION: Zero one or more columns of a RAID metadevice. - * PARAMETERS: minor_t mnum - RAID unit minor identifier - * - * LOCKS: obtains and releases unit reader lock, - * obtains and releases unit writer lock, - * obtains and releases md_unit_array_rw write lock, - * obtains and releases unit mutex (un_mx) lock, - * waits on unit conditional variable (un_cv) - * - */ -static void -raid_init_columns(minor_t mnum) -{ - mr_unit_t *un; - mdi_unit_t *ui; - raid_ci_t *ci_chain = NULL, *cur; - rus_state_t state; - caddr_t zero_addr; - diskaddr_t end_off; - size_t zerosize; - int err = 0; - int ix; - int colcnt = 0; - int col; - set_t setno = MD_MIN2SET(mnum); - - /* - * Increment the raid resync count for cpr - */ - mutex_enter(&md_cpr_resync.md_resync_mutex); - md_cpr_resync.md_raid_resync++; - mutex_exit(&md_cpr_resync.md_resync_mutex); - - /* - * initialization is a multiple step process. The first step - * is to go through the unit structure and start each device - * in the init state writing zeros over the component. - * Next initialize the prewrite areas, so the device can be - * used if a metainit -k is done. Now close the componenets. - * - * Once this complete set the state of each component being - * zeroed and set the correct state for the unit. - * - * last commit the records. - */ - - ui = MDI_UNIT(mnum); - un = md_unit_readerlock(ui); - - /* check for active init on this column */ - /* exiting is cpr safe */ - if ((un->un_init_colcnt > 0) && (un->un_resync_index != -1)) { - md_unit_readerexit(ui); - (void) raid_internal_close(mnum, OTYP_LYR, 0, 0); - /* - * Decrement the raid resync count for cpr - */ - mutex_enter(&md_cpr_resync.md_resync_mutex); - md_cpr_resync.md_raid_resync--; - mutex_exit(&md_cpr_resync.md_resync_mutex); - thread_exit(); - } - - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_INIT_START, SVM_TAG_METADEVICE, setno, - MD_SID(un)); - un->un_init_colcnt = 0; - un->un_init_iocnt = 0; - end_off = un->un_pwsize + (un->un_segsize * un->un_segsincolumn); - zerosize = (size_t)MIN((diskaddr_t)un->un_maxio, end_off); - - /* allocate zero-filled buffer */ - zero_addr = kmem_zalloc(dbtob(zerosize), KM_SLEEP); - - for (ix = 0; ix < un->un_totalcolumncnt; ix++) { - if (un->un_column[ix].un_devstate != RCS_INIT) - continue; - /* allocate new column init structure */ - cur = (raid_ci_t *)kmem_zalloc((sizeof (raid_ci_t)), KM_SLEEP); - ASSERT(cur != NULL); - un->un_init_colcnt++; - cur->ci_next = ci_chain; - ci_chain = cur; - cur->ci_un = un; - cur->ci_col = ix; - cur->ci_err = 0; - cur->ci_flag = COL_INITING; - cur->ci_zerosize = zerosize; - cur->ci_blkno = un->un_column[ix].un_pwstart; - cur->ci_lastblk = cur->ci_blkno + un->un_pwsize - + (un->un_segsize * un->un_segsincolumn); - /* initialize static buf fields */ - cur->ci_buf.b_un.b_addr = zero_addr; - cur->ci_buf.b_chain = (buf_t *)cur; - cur->ci_buf.b_back = &cur->ci_buf; - cur->ci_buf.b_forw = &cur->ci_buf; - cur->ci_buf.b_iodone = init_col_int; - cur->ci_buf.b_flags = B_BUSY | B_WRITE; - cur->ci_buf.b_edev = md_dev64_to_dev(un->un_column[ix].un_dev); - sema_init(&cur->ci_buf.b_io, 0, NULL, SEMA_DEFAULT, NULL); - sema_init(&cur->ci_buf.b_sem, 0, NULL, SEMA_DEFAULT, NULL); - /* set address and length for I/O bufs */ - cur->ci_buf.b_bufsize = dbtob(zerosize); - cur->ci_buf.b_bcount = dbtob(zerosize); - cur->ci_buf.b_lblkno = un->un_column[ix].un_pwstart; - cur->ci_buf.b_offset = -1; - - if (! (un->un_column[ix].un_devflags & MD_RAID_DEV_ISOPEN)) { - md_dev64_t tmpdev = un->un_column[ix].un_dev; - /* - * Open by device id - * If this column is hotspared then - * use the hotspare key - */ - tmpdev = md_resolve_bydevid(mnum, tmpdev, - HOTSPARED(un, ix) ? - un->un_column[ix].un_hs_key : - un->un_column[ix].un_orig_key); - if ((cur->ci_err = md_layered_open(mnum, &tmpdev, - MD_OFLG_NULL)) == 0) - un->un_column[ix].un_devflags |= - MD_RAID_DEV_ISOPEN; - un->un_column[ix].un_dev = tmpdev; - } - if (cur->ci_err == 0) - md_call_strategy(&cur->ci_buf, MD_STR_NOTTOP, NULL); - } - - md_unit_readerexit(ui); - state = un->un_state; - colcnt = un->un_init_colcnt; - mutex_enter(&un->un_mx); - while (colcnt) { - cv_wait(&un->un_cv, &un->un_mx); - - colcnt = 0; - for (cur = ci_chain; cur != NULL; cur = cur->ci_next) { - col = cur->ci_col; - if ((cur->ci_flag != COL_INITING) || (cur->ci_err)) { - if (cur->ci_err) - err = cur->ci_err; - else if (cur->ci_flag == COL_INIT_DONE) { - (void) init_pw_area(un, - un->un_column[col].un_dev, - un->un_column[col].un_pwstart, - col); - cur->ci_flag = COL_READY; - } - } else { - colcnt++; - } - } - } - mutex_exit(&un->un_mx); - - /* This prevents new opens */ - rw_enter(&md_unit_array_rw.lock, RW_WRITER); - (void) md_io_writerlock(ui); - un = (mr_unit_t *)md_unit_writerlock(ui); - while (ci_chain) { - cur = ci_chain; - - /* take this element out of the chain */ - ci_chain = cur->ci_next; - /* free this element */ - sema_destroy(&cur->ci_buf.b_io); - sema_destroy(&cur->ci_buf.b_sem); - if (cur->ci_err) - raid_set_state(cur->ci_un, cur->ci_col, - RCS_INIT_ERRED, 0); - else - raid_set_state(cur->ci_un, cur->ci_col, - RCS_OKAY, 0); - kmem_free(cur, sizeof (raid_ci_t)); - } - - /* free the zeroed buffer */ - kmem_free(zero_addr, dbtob(zerosize)); - - /* determine new unit state */ - if (err == 0) { - if (state == RUS_INIT) - un->un_state = RUS_OKAY; - else { - un->c.un_total_blocks = un->un_grow_tb; - md_nblocks_set(mnum, un->c.un_total_blocks); - un->un_grow_tb = 0; - if (raid_state_cnt(un, RCS_OKAY) == - un->un_totalcolumncnt) - un->un_state = RUS_OKAY; - } - } else { /* error orcurred */ - if (state & RUS_INIT) - un->un_state = RUS_DOI; - } - uniqtime32(&un->un_timestamp); - MD_STATUS(un) &= ~MD_UN_GROW_PENDING; - un->un_init_colcnt = 0; - un->un_init_iocnt = 0; - raid_commit(un, NULL); - md_unit_writerexit(ui); - (void) md_io_writerexit(ui); - rw_exit(&md_unit_array_rw.lock); - if (err) { - if (un->un_state & RUS_DOI) { - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_INIT_FATAL, - SVM_TAG_METADEVICE, setno, MD_SID(un)); - } else { - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_INIT_FAILED, - SVM_TAG_METADEVICE, setno, MD_SID(un)); - } - } else { - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_INIT_SUCCESS, - SVM_TAG_METADEVICE, setno, MD_SID(un)); - } - (void) raid_internal_close(mnum, OTYP_LYR, 0, 0); - /* - * Decrement the raid resync count for cpr - */ - mutex_enter(&md_cpr_resync.md_resync_mutex); - md_cpr_resync.md_raid_resync--; - mutex_exit(&md_cpr_resync.md_resync_mutex); - thread_exit(); - /*NOTREACHED*/ -} - -static int -raid_init_unit(minor_t mnum, md_error_t *ep) -{ - mdi_unit_t *ui; - mr_unit_t *un; - int rval, i; - set_t setno = MD_MIN2SET(mnum); - - ui = MDI_UNIT(mnum); - if (md_get_setstatus(setno) & MD_SET_STALE) - return (mdmddberror(ep, MDE_DB_STALE, mnum, setno)); - - /* Don't start an init if the device is not available */ - if ((ui == NULL) || (ui->ui_tstate & MD_DEV_ERRORED)) { - return (mdmderror(ep, MDE_RAID_OPEN_FAILURE, mnum)); - } - - if (raid_internal_open(mnum, (FREAD | FWRITE), - OTYP_LYR, MD_OFLG_ISINIT)) { - rval = mdmderror(ep, MDE_RAID_OPEN_FAILURE, mnum); - goto out; - } - - un = md_unit_readerlock(ui); - un->un_percent_done = 0; - md_unit_readerexit(ui); - /* start resync_unit thread */ - (void) thread_create(NULL, 0, raid_init_columns, - (void *)(uintptr_t)mnum, 0, &p0, TS_RUN, minclsyspri); - - return (0); - -out: - un = md_unit_writerlock(ui); - MD_STATUS(un) &= ~MD_UN_GROW_PENDING; - /* recover state */ - for (i = 0; i < un->un_totalcolumncnt; i++) - if (COLUMN_STATE(un, i) == RCS_INIT) - raid_set_state(un, i, RCS_ERRED, 0); - if (un->un_state & RUS_INIT) - un->un_state = RUS_DOI; - raid_commit(un, NULL); - md_unit_writerexit(ui); - if (un->un_state & RUS_DOI) { - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_INIT_FATAL, - SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); - } else { - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_INIT_FAILED, - SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); - } - return (rval); -} - -/* - * NAME: raid_regen - * - * DESCRIPTION: regenerate all the parity on the raid device. This - * routine starts a thread that will regenerate the - * parity on a raid device. If an I/O error occurs during - * this process the entire device is placed in error. - * - * PARAMETERS: md_set_params_t *msp - ioctl packet - */ -static void -regen_unit(minor_t mnum) -{ - mdi_unit_t *ui = MDI_UNIT(mnum); - mr_unit_t *un = MD_UNIT(mnum); - buf_t buf, *bp; - caddr_t buffer; - int err = 0; - diskaddr_t total_segments; - diskaddr_t line; - size_t iosize; - - /* - * Increment raid resync count for cpr - */ - mutex_enter(&md_cpr_resync.md_resync_mutex); - md_cpr_resync.md_raid_resync++; - mutex_exit(&md_cpr_resync.md_resync_mutex); - - iosize = dbtob(un->un_segsize); - buffer = kmem_alloc(iosize, KM_SLEEP); - bp = &buf; - total_segments = un->un_segsincolumn; - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_REGEN_START, SVM_TAG_METADEVICE, - MD_UN2SET(un), MD_SID(un)); - un->un_percent_done = 0; - init_buf(bp, B_READ | B_BUSY, iosize); - - for (line = 0; line < total_segments; line++) { - bp->b_lblkno = line * - ((un->un_origcolumncnt - 1) * un->un_segsize); - bp->b_un.b_addr = buffer; - bp->b_bcount = iosize; - bp->b_iodone = NULL; - /* - * The following assignment is only correct because - * md_raid_strategy is fine when it's only a minor number - * and not a real dev_t. Yuck. - */ - bp->b_edev = mnum; - md_raid_strategy(bp, MD_STR_NOTTOP, NULL); - if (biowait(bp)) { - err = 1; - break; - } - un->un_percent_done = (uint_t)((line * 1000) / - un->un_segsincolumn); - /* just to avoid rounding errors */ - if (un->un_percent_done > 1000) - un->un_percent_done = 1000; - reset_buf(bp, B_READ | B_BUSY, iosize); - } - destroy_buf(bp); - kmem_free(buffer, iosize); - - (void) md_io_writerlock(ui); - (void) raid_internal_close(mnum, OTYP_LYR, 0, 0); - (void) md_io_writerexit(ui); - un = md_unit_writerlock(ui); - if (!err && - (raid_state_cnt(un, RCS_OKAY) == un->un_totalcolumncnt)) - un->un_state = RUS_OKAY; - raid_commit(un, NULL); - md_unit_writerexit(ui); - if (err || - raid_state_cnt(un, RCS_OKAY) != un->un_totalcolumncnt) { - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_REGEN_FAILED, - SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); - } else { - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_REGEN_DONE, SVM_TAG_METADEVICE, - MD_UN2SET(un), MD_SID(un)); - } - - /* - * Decrement the raid resync count for cpr - */ - mutex_enter(&md_cpr_resync.md_resync_mutex); - md_cpr_resync.md_raid_resync--; - mutex_exit(&md_cpr_resync.md_resync_mutex); - thread_exit(); -} - -static int -raid_regen_unit(minor_t mnum, md_error_t *ep) -{ - mdi_unit_t *ui; - mr_unit_t *un; - int i; - set_t setno = MD_MIN2SET(mnum); - - ui = MDI_UNIT(mnum); - un = (mr_unit_t *)MD_UNIT(mnum); - - if (md_get_setstatus(setno) & MD_SET_STALE) - return (mdmddberror(ep, MDE_DB_STALE, mnum, setno)); - - /* Don't start a regen if the device is not available */ - if ((ui == NULL) || (ui->ui_tstate & MD_DEV_ERRORED)) { - return (mdmderror(ep, MDE_RAID_OPEN_FAILURE, mnum)); - } - - if (raid_internal_open(mnum, (FREAD | FWRITE), OTYP_LYR, 0)) { - (void) md_unit_writerlock(ui); - for (i = 0; i < un->un_totalcolumncnt; i++) - raid_set_state(un, i, RCS_ERRED, 0); - md_unit_writerexit(ui); - return (mdmderror(ep, MDE_RAID_OPEN_FAILURE, mnum)); - } - - /* start resync_unit thread */ - (void) thread_create(NULL, 0, regen_unit, - (void *)(uintptr_t)mnum, 0, &p0, TS_RUN, minclsyspri); - - return (0); -} - -static int -raid_regen(md_regen_param_t *mrp, IOLOCK *lock) -{ - minor_t mnum = mrp->mnum; - mr_unit_t *un; - - mdclrerror(&mrp->mde); - - un = md_unit_readerlock(MDI_UNIT(mnum)); - - if (MD_STATUS(un) & MD_UN_GROW_PENDING) { - md_unit_readerexit(MDI_UNIT(mnum)); - return (mdmderror(&mrp->mde, MDE_IN_USE, mnum)); - } - - if ((MD_STATUS(un) & MD_UN_RESYNC_ACTIVE) || - (raid_state_cnt(un, RCS_RESYNC))) { - md_unit_readerexit(MDI_UNIT(mnum)); - return (mdmderror(&mrp->mde, MDE_RESYNC_ACTIVE, mnum)); - } - - if ((raid_state_cnt(un, RCS_INIT) != 0) || (un->un_state & RUS_INIT)) { - md_unit_readerexit(MDI_UNIT(mnum)); - return (mdmderror(&mrp->mde, MDE_IN_USE, mnum)); - } - - if ((raid_state_cnt(un, RCS_OKAY) != un->un_totalcolumncnt) || - (! (un->un_state & RUS_OKAY))) { - md_unit_readerexit(MDI_UNIT(mnum)); - return (mdmderror(&mrp->mde, MDE_RAID_NOT_OKAY, mnum)); - } - - md_unit_readerexit(MDI_UNIT(mnum)); - - /* get locks and recheck to be sure something did not change */ - if ((un = raid_getun(mnum, &mrp->mde, WRITERS, lock)) == NULL) - return (0); - - if ((raid_state_cnt(un, RCS_OKAY) != un->un_totalcolumncnt) || - (! (un->un_state & RUS_OKAY))) { - return (mdmderror(&mrp->mde, MDE_RAID_NOT_OKAY, mnum)); - } - - raid_set_state(un, 0, RCS_REGEN, 0); - raid_commit(un, NULL); - md_ioctl_droplocks(lock); - return (raid_regen_unit(mnum, &mrp->mde)); -} - -/* - * NAME: raid_set - * DESCRIPTION: used to create a RAID metadevice - * PARAMETERS: md_set_params_t *d - pointer to set data structure - * int mode - must be FWRITE - * - * LOCKS: none - * - */ -static int -raid_set(void *d, int mode) -{ - minor_t mnum; - mr_unit_t *un; - mddb_recid_t mr_recid; - mddb_recid_t *recids; - mddb_type_t typ1; - int err; - set_t setno; - int num_recs; - int rid; - int col; - md_set_params_t *msp = d; - - - mnum = msp->mnum; - setno = MD_MIN2SET(mnum); - - mdclrerror(&msp->mde); - - if (raid_getun(mnum, &msp->mde, NO_OLD, NULL) == NULL) - return (0); - - typ1 = (mddb_type_t)md_getshared_key(setno, - raid_md_ops.md_driver.md_drivername); - - /* create the db record for this mdstruct */ - - if (msp->options & MD_CRO_64BIT) { -#if defined(_ILP32) - return (mdmderror(&msp->mde, MDE_UNIT_TOO_LARGE, mnum)); -#else - mr_recid = mddb_createrec(msp->size, typ1, 0, - MD_CRO_64BIT | MD_CRO_RAID | MD_CRO_FN, setno); -#endif - } else { - mr_recid = mddb_createrec(msp->size, typ1, 0, - MD_CRO_32BIT | MD_CRO_RAID | MD_CRO_FN, setno); - } - - if (mr_recid < 0) - return (mddbstatus2error(&msp->mde, - (int)mr_recid, mnum, setno)); - - /* get the address of the mdstruct */ - un = (mr_unit_t *)mddb_getrecaddr(mr_recid); - /* - * It is okay that we muck with the mdstruct here, - * since no one else will know about the mdstruct - * until we commit it. If we crash, the record will - * be automatically purged, since we haven't - * committed it yet. - */ - - /* copy in the user's mdstruct */ - if (err = ddi_copyin((caddr_t)(uintptr_t)msp->mdp, un, - msp->size, mode)) { - mddb_deleterec_wrapper(mr_recid); - return (EFAULT); - } - /* All 64 bit metadevices only support EFI labels. */ - if (msp->options & MD_CRO_64BIT) { - un->c.un_flag |= MD_EFILABEL; - } - - /* - * allocate the real recids array. since we may have to commit - * underlying metadevice records, we need an array of size: - * total number of components in raid + 3 (1 for the raid itself, - * one for the hotspare, one for the end marker). - */ - num_recs = un->un_totalcolumncnt + 3; - rid = 0; - recids = kmem_alloc(num_recs * sizeof (mddb_recid_t), KM_SLEEP); - recids[rid++] = mr_recid; - - MD_SID(un) = mnum; - MD_RECID(un) = recids[0]; - MD_CAPAB(un) = MD_CAN_PARENT | MD_CAN_SP; - MD_PARENT(un) = MD_NO_PARENT; - un->un_resync_copysize = 0; - un->c.un_revision |= MD_FN_META_DEV; - - if (UNIT_STATE(un) == RUS_INIT) - MD_STATUS(un) |= MD_UN_GROW_PENDING; - - if ((UNIT_STATE(un) != RUS_INIT) && raid_check_pw(un)) { - mddb_deleterec_wrapper(mr_recid); - err = mderror(&msp->mde, MDE_RAID_INVALID); - goto out; - } - - if (err = raid_build_incore(un, 0)) { - if (un->mr_ic) { - kmem_free(un->un_column_ic, sizeof (mr_column_ic_t) * - un->un_totalcolumncnt); - kmem_free(un->mr_ic, sizeof (*un->mr_ic)); - } - - md_nblocks_set(mnum, -1ULL); - MD_UNIT(mnum) = NULL; - - mddb_deleterec_wrapper(mr_recid); - goto out; - } - - /* - * Update unit availability - */ - md_set[setno].s_un_avail--; - - recids[rid] = 0; - if (un->un_hsp_id != -1) { - /* increment the reference count of the hot spare pool */ - err = md_hot_spare_ifc(HSP_INCREF, un->un_hsp_id, 0, 0, - &recids[rid], NULL, NULL, NULL); - if (err) { - md_nblocks_set(mnum, -1ULL); - MD_UNIT(mnum) = NULL; - - mddb_deleterec_wrapper(mr_recid); - goto out; - } - rid++; - } - - /* - * set the parent on any metadevice components. - * NOTE: currently soft partitions are the only metadevices - * which can appear within a RAID metadevice. - */ - for (col = 0; col < un->un_totalcolumncnt; col++) { - mr_column_t *mr_col = &un->un_column[col]; - md_unit_t *comp_un; - - if (md_getmajor(mr_col->un_dev) == md_major) { - comp_un = MD_UNIT(md_getminor(mr_col->un_dev)); - recids[rid++] = MD_RECID(comp_un); - md_set_parent(mr_col->un_dev, MD_SID(un)); - } - } - - /* set the end marker */ - recids[rid] = 0; - - mddb_commitrecs_wrapper(recids); - md_create_unit_incore(mnum, &raid_md_ops, 1); - - SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_CREATE, SVM_TAG_METADEVICE, setno, - MD_SID(un)); - -out: - kmem_free(recids, (num_recs * sizeof (mddb_recid_t))); - if (err) - return (err); - - /* only attempt to init a device that is in the init state */ - if (UNIT_STATE(un) != RUS_INIT) - return (0); - - return (raid_init_unit(mnum, &msp->mde)); -} - -/* - * NAME: raid_get - * DESCRIPTION: used to get the unit structure of a RAID metadevice - * PARAMETERS: md_i_get_t *migp - pointer to get data structure - * int mode - must be FREAD - * IOLOCK *lock - pointer to IOCTL lock - * - * LOCKS: obtains unit reader lock via IOLOCK - * - */ -static int -raid_get( - void *migp, - int mode, - IOLOCK *lock -) -{ - minor_t mnum; - mr_unit_t *un; - md_i_get_t *migph = migp; - - - mnum = migph->id; - - mdclrerror(&migph->mde); - - if ((un = raid_getun(mnum, &migph->mde, - RD_LOCK, lock)) == NULL) - return (0); - - if (migph->size == 0) { - migph->size = un->c.un_size; - return (0); - } - - if (migph->size < un->c.un_size) { - return (EFAULT); - } - if (ddi_copyout(un, (void *)(uintptr_t)migph->mdp, - un->c.un_size, mode)) - return (EFAULT); - - return (0); -} - - -/* - * NAME: raid_replace - * DESCRIPTION: used to replace a component of a RAID metadevice - * PARAMETERS: replace_params_t *mrp - pointer to replace data structure - * IOLOCK *lock - pointer to IOCTL lock - * - * LOCKS: obtains unit writer lock via IOLOCK (through raid_getun), - * obtains and releases md_unit_array_rw write lock - * - */ -static int -raid_replace( - replace_params_t *mrp, - IOLOCK *lock -) -{ - minor_t mnum = mrp->mnum; - md_dev64_t odev = mrp->old_dev; - md_error_t *ep = &mrp->mde; - mr_unit_t *un; - rcs_state_t state; - int ix, col = -1; - int force = 0; - int err = 0; - replace_cmd_t cmd; - set_t setno; - side_t side; - mdkey_t devkey; - int nkeys; - mddb_recid_t extra_recids[3] = { 0, 0, 0 }; - int extra_rids = 0; - md_error_t mde = mdnullerror; - sv_dev_t sv = {MD_SET_BAD, MD_SIDEWILD, MD_KEYWILD}; - - mdclrerror(ep); - setno = MD_MIN2SET(mnum); - side = mddb_getsidenum(setno); - - un = md_unit_readerlock(MDI_UNIT(mnum)); - - if ((MD_STATUS(un) & MD_UN_RESYNC_ACTIVE) || - (raid_state_cnt(un, RCS_RESYNC) != 0)) { - md_unit_readerexit(MDI_UNIT(mnum)); - return (mdmderror(ep, MDE_RESYNC_ACTIVE, mnum)); - } - - if (un->un_state & RUS_DOI) { - md_unit_readerexit(MDI_UNIT(mnum)); - return (mdmderror(ep, MDE_RAID_DOI, mnum)); - } - - if ((raid_state_cnt(un, RCS_INIT) != 0) || (un->un_state & RUS_INIT) || - (MD_STATUS(un) & MD_UN_GROW_PENDING)) { - md_unit_readerexit(MDI_UNIT(mnum)); - return (mdmderror(ep, MDE_IN_USE, mnum)); - } - - md_unit_readerexit(MDI_UNIT(mnum)); - - /* get locks and recheck to be sure something did not change */ - if ((un = raid_getun(mnum, ep, WRITERS, lock)) == NULL) - return (0); - - if (md_getkeyfromdev(setno, side, odev, &devkey, &nkeys) != 0) { - return (mddeverror(ep, MDE_NAME_SPACE, odev)); - } - - for (ix = 0; ix < un->un_totalcolumncnt; ix++) { - md_dev64_t tmpdevt = un->un_column[ix].un_orig_dev; - /* - * Try to resolve devt again if NODEV64 - */ - if (tmpdevt == NODEV64) { - tmpdevt = md_resolve_bydevid(mnum, tmpdevt, - un->un_column[ix].un_orig_key); - un->un_column[ix].un_orig_dev = tmpdevt; - } - - if (un->un_column[ix].un_orig_dev == odev) { - col = ix; - break; - } else { - if (un->un_column[ix].un_orig_dev == NODEV64) { - /* - * Now we use the keys to match. - * If no key found, continue. - */ - if (nkeys == 0) { - continue; - } - if (un->un_column[ix].un_orig_key == devkey) { - if (nkeys > 1) - return (mddeverror(ep, - MDE_MULTNM, odev)); - col = ix; - break; - } - } - } - } - - if (col == -1) - return (mdcomperror(ep, MDE_CANT_FIND_COMP, - mnum, odev)); - - if ((MD_STATUS(un) & MD_UN_RESYNC_ACTIVE) || - (raid_state_cnt(un, RCS_RESYNC) != 0)) - return (mdmderror(ep, MDE_RESYNC_ACTIVE, mnum)); - - if (un->un_state & RUS_DOI) - return (mdcomperror(ep, MDE_REPL_INVAL_STATE, mnum, - un->un_column[col].un_dev)); - - if ((raid_state_cnt(un, RCS_INIT) != 0) || (un->un_state & RUS_INIT) || - (MD_STATUS(un) & MD_UN_GROW_PENDING)) - return (mdmderror(ep, MDE_IN_USE, mnum)); - - if ((mrp->cmd == FORCE_ENABLE_COMP) || (mrp->cmd == FORCE_REPLACE_COMP)) - force = 1; - if ((mrp->cmd == FORCE_ENABLE_COMP) || (mrp->cmd == ENABLE_COMP)) - cmd = ENABLE_COMP; - if ((mrp->cmd == FORCE_REPLACE_COMP) || (mrp->cmd == REPLACE_COMP)) - cmd = REPLACE_COMP; - - if (un->un_state == RUS_LAST_ERRED) { - /* Must use -f force flag for unit in LAST_ERRED state */ - if (!force) - return (mdmderror(ep, MDE_RAID_NEED_FORCE, mnum)); - - /* Must use -f force flag on ERRED column first */ - if (un->un_column[col].un_devstate != RCS_ERRED) { - for (ix = 0; ix < un->un_totalcolumncnt; ix++) { - if (un->un_column[ix].un_devstate & RCS_ERRED) - return (mdcomperror(ep, - MDE_RAID_COMP_ERRED, mnum, - un->un_column[ix].un_dev)); - } - } - - /* must use -f force flag on LAST_ERRED columns next */ - if ((un->un_column[col].un_devstate != RCS_LAST_ERRED) && - (un->un_column[col].un_devstate != RCS_ERRED)) - return (mdcomperror(ep, MDE_RAID_COMP_ERRED, - mnum, un->un_column[col].un_dev)); - } - - if (un->un_state == RUS_ERRED) { - if (! (un->un_column[col].un_devstate & - (RCS_ERRED | RCS_INIT_ERRED))) - return (mdcomperror(ep, MDE_RAID_COMP_ERRED, - mnum, un->un_column[ix].un_dev)); - } - - ASSERT(!(un->un_column[col].un_devflags & MD_RAID_ALT_ISOPEN)); - ASSERT(!(un->un_column[col].un_devflags & MD_RAID_WRITE_ALT)); - - state = un->un_column[col].un_devstate; - if (state & RCS_INIT_ERRED) { - MD_STATUS(un) |= MD_UN_GROW_PENDING; - un->un_percent_done = 0; - raid_set_state(un, col, RCS_INIT, 0); - } else if (((mrp->options & MDIOCTL_NO_RESYNC_RAID) == 0) && - resync_request(mnum, col, 0, ep)) - return (mdmderror(ep, MDE_RESYNC_ACTIVE, mnum)); - - - if (cmd == REPLACE_COMP) { - md_dev64_t tmpdev = mrp->new_dev; - - /* - * open the device by device id - */ - tmpdev = md_resolve_bydevid(mnum, tmpdev, mrp->new_key); - if (md_layered_open(mnum, &tmpdev, MD_OFLG_NULL)) { - return (mdcomperror(ep, MDE_COMP_OPEN_ERR, mnum, - tmpdev)); - } - - /* - * If it's a metadevice, make sure it gets reparented - */ - if (md_getmajor(tmpdev) == md_major) { - minor_t new_mnum = md_getminor(tmpdev); - md_unit_t *new_un = MD_UNIT(new_mnum); - - md_set_parent(tmpdev, MD_SID(un)); - extra_recids[extra_rids++] = MD_RECID(new_un); - } - - mrp->new_dev = tmpdev; - un->un_column[col].un_orig_dev = tmpdev; - un->un_column[col].un_orig_key = mrp->new_key; - un->un_column[col].un_orig_pwstart = mrp->start_blk; - un->un_column[col].un_orig_devstart = - mrp->start_blk + un->un_pwsize; - - /* - * If the old device was a metadevice, make sure to - * reset its parent. - */ - if (md_getmajor(odev) == md_major) { - minor_t old_mnum = md_getminor(odev); - md_unit_t *old_un = MD_UNIT(old_mnum); - - md_reset_parent(odev); - extra_recids[extra_rids++] = - MD_RECID(old_un); - } - - if (HOTSPARED(un, col)) { - md_layered_close(mrp->new_dev, MD_OFLG_NULL); - un->un_column[col].un_alt_dev = mrp->new_dev; - un->un_column[col].un_alt_pwstart = mrp->start_blk; - un->un_column[col].un_alt_devstart = - mrp->start_blk + un->un_pwsize; - un->un_column[col].un_devflags |= MD_RAID_COPY_RESYNC; - } else { - /* - * not hot spared. Close the old device and - * move the new device in. - */ - if (un->un_column[col].un_devflags & MD_RAID_DEV_ISOPEN) - md_layered_close(odev, MD_OFLG_NULL); - un->un_column[col].un_devflags |= MD_RAID_DEV_ISOPEN; - un->un_column[col].un_dev = mrp->new_dev; - un->un_column[col].un_pwstart = mrp->start_blk; - un->un_column[col].un_devstart = - mrp->start_blk + un->un_pwsize; - if ((mrp->options & MDIOCTL_NO_RESYNC_RAID) == 0) { - un->un_column[col].un_devflags |= - MD_RAID_REGEN_RESYNC; - } - } - /* - * If the old device is not a metadevice then - * save off the set number and key so that it - * can be removed from the namespace later. - */ - if (md_getmajor(odev) != md_major) { - sv.setno = setno; - sv.key = devkey; - } - } - - if (cmd == ENABLE_COMP) { - md_dev64_t tmpdev = un->un_column[col].un_orig_dev; - mdkey_t raidkey = un->un_column[col].un_orig_key; - - /* - * We trust the dev_t because we cannot determine the - * dev_t from the device id since a new disk is in the - * same location. Since this is a call from metareplace -e dx - * AND it is SCSI a new dev_t is not generated. So the - * dev_t from the mddb is used. Before enabling the device - * we check to make sure that multiple entries for the same - * device does not exist in the namespace. If they do we - * fail the ioctl. - * One of the many ways multiple entries in the name space - * can occur is if one removed the failed component in a - * RAID metadevice and put another disk that was part of - * another metadevice. After reboot metadevadm would correctly - * update the device name for the metadevice whose component - * has moved. However now in the metadb there are two entries - * for the same name (ctds) that belong to different - * metadevices. One is valid, the other is a ghost or "last - * know as" ctds. - */ - tmpdev = md_resolve_bydevid(mnum, tmpdev, raidkey); - if (tmpdev == NODEV64) - tmpdev = md_getdevnum(setno, side, raidkey, - MD_TRUST_DEVT); - /* - * check for multiple entries in namespace for the - * same dev - */ - - if (md_getkeyfromdev(setno, side, tmpdev, &devkey, - &nkeys) != 0) - return (mddeverror(ep, MDE_NAME_SPACE, tmpdev)); - /* - * If number of keys are greater that - * 1, then we have an invalid - * namespace. STOP and return. - */ - if (nkeys > 1) - return (mddeverror(ep, MDE_MULTNM, tmpdev)); - if (devkey != raidkey) - return (mdcomperror(ep, MDE_CANT_FIND_COMP, - mnum, tmpdev)); - - if (un->un_column[col].un_orig_dev == NODEV64) - un->un_column[col].un_orig_dev = tmpdev; - - if (HOTSPARED(un, col)) { - un->un_column[col].un_alt_dev = - un->un_column[col].un_orig_dev; - un->un_column[col].un_alt_pwstart = - un->un_column[col].un_orig_pwstart; - un->un_column[col].un_alt_devstart = - un->un_column[col].un_orig_devstart; - un->un_column[col].un_devflags |= MD_RAID_COPY_RESYNC; - } else { - if (!(un->un_column[col].un_devflags & - MD_RAID_DEV_ISOPEN)) { - if (md_layered_open(mnum, &tmpdev, - MD_OFLG_NULL)) { - un->un_column[col].un_dev = tmpdev; - return (mdcomperror(ep, - MDE_COMP_OPEN_ERR, mnum, tmpdev)); - } - ASSERT(tmpdev != NODEV64 && - tmpdev != 0); - - if ((md_getmajor(tmpdev) != md_major) && - (md_devid_found(setno, side, raidkey) - == 1)) { - if (md_update_namespace_did(setno, side, - raidkey, &mde) != 0) { - cmn_err(CE_WARN, - "md: could not" - " update namespace\n"); - } - } - un->un_column[col].un_dev = - un->un_column[col].un_orig_dev; - } - un->un_column[col].un_devflags |= MD_RAID_DEV_ISOPEN; - un->un_column[col].un_devflags |= MD_RAID_REGEN_RESYNC; - } - } - if (mrp->has_label) { - un->un_column[col].un_devflags |= MD_RAID_HAS_LABEL; - } else { - un->un_column[col].un_devflags &= ~MD_RAID_HAS_LABEL; - } - - raid_commit(un, extra_recids); - - /* If the component has been replaced - clean up the name space */ - if (sv.setno != MD_SET_BAD) { - md_rem_names(&sv, 1); - } - - md_ioctl_droplocks(lock); - - if ((cmd == ENABLE_COMP) || (cmd == FORCE_ENABLE_COMP)) { - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ENABLE, SVM_TAG_METADEVICE, - setno, MD_SID(un)); - } else { - SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REPLACE, SVM_TAG_METADEVICE, - setno, MD_SID(un)); - } - - if (un->un_column[col].un_devstate & RCS_INIT) - err = raid_init_unit(mnum, ep); - else if ((mrp->options & MDIOCTL_NO_RESYNC_RAID) == 0) - err = raid_resync_unit(mnum, ep); - - mdclrerror(ep); - if (!err) - return (0); - - /* be sure state */ - /* is already set by this time */ - /* fix state and commit record */ - un = md_unit_writerlock(MDI_UNIT(mnum)); - if (state & RCS_INIT_ERRED) - raid_set_state(un, col, state, 1); - else if (state & RCS_OKAY) - raid_set_state(un, col, RCS_ERRED, 0); - else - raid_set_state(un, col, state, 1); - raid_commit(un, NULL); - md_unit_writerexit(MDI_UNIT(mnum)); - mdclrerror(ep); - return (0); -} - - -/* - * NAME: raid_set_sync - * DESCRIPTION: used to sync a component of a RAID metadevice - * PARAMETERS: md_resync_ioctl_t *mrp - pointer to resync data structure - * int mode - must be FWRITE - * IOLOCK *lock - pointer to IOCTL lock - * - * LOCKS: obtains unit writer lock via IOLOCK (through raid_getun), - * obtains and releases md_unit_array_rw write lock - * - */ -static int -raid_set_sync( - md_resync_ioctl_t *rip, - IOLOCK *lock -) -{ - minor_t mnum = rip->ri_mnum; - mr_unit_t *un; - int init = 0; - int resync = 0; - int regen = 0; - int ix; - int err; - - mdclrerror(&rip->mde); - - if ((un = raid_getun(mnum, &rip->mde, WRITERS, lock)) == NULL) - return (0); - - if (un->un_state & RUS_DOI) - return (mdmderror(&rip->mde, MDE_RAID_DOI, mnum)); - - if (un->c.un_status & MD_UN_RESYNC_ACTIVE) - return (mdmderror(&rip->mde, MDE_RESYNC_ACTIVE, mnum)); - - /* This prevents new opens */ - - rip->ri_flags = 0; - if (un->un_state & RUS_REGEN) - regen++; - - if (raid_state_cnt(un, RCS_RESYNC)) - resync++; - - if (raid_state_cnt(un, RCS_INIT) || (un->un_state & RUS_INIT)) - init++; - - ASSERT(!(resync && init && regen)); - md_ioctl_droplocks(lock); - rip->ri_percent_done = 0; - - if (init) { - MD_STATUS(un) |= MD_UN_GROW_PENDING; - return (raid_init_unit(mnum, &rip->mde)); - } - - /* - * If resync is needed, it will call raid_internal_open forcing - * replay before the open completes. - * Otherwise, call raid_internal_open directly to force - * replay to complete during boot (metasync -r). - * NOTE: the unit writer lock must remain held while setting - * MD_UN_RESYNC_ACTIVE but must be released before - * calling raid_resync_unit or raid_internal_open. - */ - if (resync) { - ASSERT(resync < 2); - un = md_unit_writerlock(MDI_UNIT(mnum)); - MD_STATUS(un) |= MD_UN_RESYNC_ACTIVE; - /* Must release unit writer lock for resync */ - /* - * correctly setup the devices before trying to start the - * resync operation. - */ - for (ix = 0; un->un_totalcolumncnt; ix++) { - if (un->un_column[ix].un_devstate & RCS_RESYNC) { - if ((un->un_column[ix].un_devflags & - MD_RAID_COPY_RESYNC) && - HOTSPARED(un, ix)) { - un->un_column[ix].un_alt_dev = - un->un_column[ix].un_orig_dev; - un->un_column[ix].un_alt_devstart = - un->un_column[ix].un_orig_devstart; - un->un_column[ix].un_alt_pwstart = - un->un_column[ix].un_orig_pwstart; - } - break; - } - } - ASSERT(un->un_column[ix].un_devflags & - (MD_RAID_COPY_RESYNC | MD_RAID_REGEN_RESYNC)); - rip->ri_percent_done = 0; - un->un_column[ix].un_devflags |= MD_RAID_RESYNC; - (void) resync_request(mnum, ix, 0, NULL); - md_unit_writerexit(MDI_UNIT(mnum)); - err = raid_resync_unit(mnum, &rip->mde); - return (err); - } - - if (regen) { - err = raid_regen_unit(mnum, &rip->mde); - return (err); - } - - /* The unit requires not work so just force replay of the device */ - if (raid_internal_open(mnum, (FREAD | FWRITE), OTYP_LYR, 0)) - return (mdmderror(&rip->mde, - MDE_RAID_OPEN_FAILURE, mnum)); - (void) raid_internal_close(mnum, OTYP_LYR, 0, 0); - - return (0); -} - -/* - * NAME: raid_get_resync - * DESCRIPTION: used to check resync status on a component of a RAID metadevice - * PARAMETERS: md_resync_ioctl_t *mrp - pointer to resync data structure - * int mode - must be FWRITE - * IOLOCK *lock - pointer to IOCTL lock - * - * LOCKS: none - * - */ -static int -raid_get_resync( - md_resync_ioctl_t *rip, - IOLOCK *lock -) -{ - minor_t mnum = rip->ri_mnum; - mr_unit_t *un; - u_longlong_t percent; - int cnt; - int ix; - uint64_t d; - - mdclrerror(&rip->mde); - - if ((un = raid_getun(mnum, &rip->mde, RD_LOCK, lock)) == NULL) - return (0); - - rip->ri_flags = 0; - if (MD_STATUS(un) & MD_UN_RESYNC_ACTIVE) { - d = un->un_segsincolumn; - percent = d ? ((1000 * un->un_resync_line_index) / d) : 0; - if (percent > 1000) - percent = 1000; /* can't go over 100% */ - rip->ri_percent_done = (int)percent; - rip->ri_flags |= MD_RI_INPROGRESS; - } - - if (UNIT_STATE(un) & RUS_INIT) { - d = un->un_segsize * un->un_segsincolumn * - un->un_totalcolumncnt; - percent = - d ? ((1000 * (u_longlong_t)un->un_init_iocnt) / d) : 0; - if (percent > 1000) - percent = 1000; /* can't go over 100% */ - rip->ri_percent_done = (int)percent; - rip->ri_flags |= MD_GROW_INPROGRESS; - } else if (MD_STATUS(un) & MD_UN_GROW_PENDING) { - d = un->un_segsize * un->un_segsincolumn * un->un_init_colcnt; - percent = - d ? (((u_longlong_t)un->un_init_iocnt * 1000) / d) : 0; - if (percent > 1000) - percent = 1000; - rip->ri_percent_done = (int)percent; - rip->ri_flags |= MD_GROW_INPROGRESS; - } - - if (un->un_state & RUS_REGEN) - rip->ri_percent_done = un->un_percent_done; - - cnt = 0; - for (ix = 0; ix < un->un_totalcolumncnt; ix++) { - switch (un->un_column[ix].un_devstate) { - case RCS_INIT: - case RCS_ERRED: - case RCS_LAST_ERRED: - cnt++; - break; - default: - break; - } - } - d = un->un_totalcolumncnt; - rip->ri_percent_dirty = d ? (((u_longlong_t)cnt * 100) / d) : 0; - return (0); -} - -/* - * NAME: raid_grow - * DESCRIPTION: Concatenate to a RAID metadevice - * PARAMETERS: md_grow_params_t *mgp - * - pointer to IOCGROW data structure - * int mode - must be FWRITE - * IOLOCK *lockp - IOCTL read/write and unit_array_rw lock - * - * LOCKS: obtains unit writer lock via IOLOCK (through raid_getun), - * obtains and releases md_unit_array_rw write lock - * - */ -static int -raid_grow(void *mgp, int mode, IOLOCK *lock) -{ - minor_t mnum; - mr_unit_t *un, *new_un; - mdi_unit_t *ui; - mddb_type_t typ1; - mddb_recid_t mr_recid; - mddb_recid_t old_vtoc = 0; - mddb_recid_t *recids; - md_create_rec_option_t options; - int err; - int col, i; - int64_t tb, atb; - u_longlong_t unrev; - int tc; - int rval = 0; - set_t setno; - mr_column_ic_t *mrc; - int num_recs, rid; - md_grow_params_t *mgph = mgp; - - - mnum = mgph->mnum; - - mdclrerror(&mgph->mde); - - ui = MDI_UNIT(mnum); - un = md_unit_readerlock(ui); - - if (MD_STATUS(un) & MD_UN_GROW_PENDING) { - md_unit_readerexit(ui); - return (mdmderror(&mgph->mde, MDE_IN_USE, mnum)); - } - - if (MD_STATUS(un) & MD_UN_RESYNC_ACTIVE) { - md_unit_readerexit(ui); - return (mdmderror(&mgph->mde, MDE_RESYNC_ACTIVE, mnum)); - } - - if (UNIT_STATE(un) & RUS_LAST_ERRED) { - md_unit_readerexit(ui); - return (mdmderror(&mgph->mde, MDE_RAID_LAST_ERRED, mnum)); - } - - if (UNIT_STATE(un) & RUS_DOI) { - md_unit_readerexit(ui); - return (mdmderror(&mgph->mde, MDE_RAID_DOI, mnum)); - } - - if ((raid_state_cnt(un, RCS_INIT) != 0) || (un->un_state & RUS_INIT)) { - md_unit_readerexit(ui); - return (mdmderror(&mgph->mde, MDE_IN_USE, mnum)); - } - - md_unit_readerexit(ui); - - if ((un = raid_getun(mnum, &mgph->mde, WRITERS, lock)) == - NULL) - return (0); - - if (MD_STATUS(un) & MD_UN_GROW_PENDING) - return (mdmderror(&mgph->mde, MDE_IN_USE, mnum)); - - if (MD_STATUS(un) & MD_UN_RESYNC_ACTIVE) - return (mdmderror(&mgph->mde, MDE_RESYNC_ACTIVE, mnum)); - - if (un->c.un_size >= mgph->size) - return (EINVAL); - - if (UNIT_STATE(un) & RUS_LAST_ERRED) - return (mdmderror(&mgph->mde, MDE_RAID_LAST_ERRED, mnum)); - - if (UNIT_STATE(un) & RUS_DOI) - return (mdmderror(&mgph->mde, MDE_RAID_DOI, mnum)); - - if ((raid_state_cnt(un, RCS_INIT) != 0) || (un->un_state & RUS_INIT)) - return (mdmderror(&mgph->mde, MDE_IN_USE, mnum)); - - setno = MD_MIN2SET(mnum); - - typ1 = (mddb_type_t)md_getshared_key(setno, - raid_md_ops.md_driver.md_drivername); - - /* - * Preserve the friendly name nature of the device that is - * growing. - */ - options = MD_CRO_RAID; - if (un->c.un_revision & MD_FN_META_DEV) - options |= MD_CRO_FN; - if (mgph->options & MD_CRO_64BIT) { -#if defined(_ILP32) - return (mdmderror(&mgph->mde, MDE_UNIT_TOO_LARGE, mnum)); -#else - mr_recid = mddb_createrec(mgph->size, typ1, 0, - MD_CRO_64BIT | options, setno); -#endif - } else { - mr_recid = mddb_createrec(mgph->size, typ1, 0, - MD_CRO_32BIT | options, setno); - } - if (mr_recid < 0) { - rval = mddbstatus2error(&mgph->mde, (int)mr_recid, - mnum, setno); - return (rval); - } - - /* get the address of the new unit */ - new_un = (mr_unit_t *)mddb_getrecaddr(mr_recid); - - /* - * It is okay that we muck with the new unit here, - * since no one else will know about the unit struct - * until we commit it. If we crash, the record will - * be automatically purged, since we haven't - * committed it yet and the old unit struct will be found. - */ - - /* copy in the user's unit struct */ - err = ddi_copyin((void *)(uintptr_t)mgph->mdp, new_un, - mgph->size, mode); - if (err) { - mddb_deleterec_wrapper(mr_recid); - return (EFAULT); - } - - /* make sure columns are being added */ - if (un->un_totalcolumncnt >= new_un->un_totalcolumncnt) { - mddb_deleterec_wrapper(mr_recid); - return (EINVAL); - } - - /* - * Save a few of the new unit structs fields. - * Before they get clobbered. - */ - tc = new_un->un_totalcolumncnt; - tb = new_un->c.un_total_blocks; - atb = new_un->c.un_actual_tb; - unrev = new_un->c.un_revision; - - /* - * Copy the old unit struct (static stuff) - * into new unit struct - */ - bcopy((caddr_t)un, (caddr_t)new_un, un->c.un_size); - - /* - * Restore a few of the new unit struct values. - */ - new_un->un_totalcolumncnt = tc; - new_un->c.un_actual_tb = atb; - new_un->un_grow_tb = tb; - new_un->c.un_revision = unrev; - new_un->c.un_record_id = mr_recid; - new_un->c.un_size = mgph->size; - - ASSERT(new_un->mr_ic == un->mr_ic); - - /* - * Save old column slots - */ - mrc = un->un_column_ic; - - /* - * Allocate new column slot - */ - new_un->un_column_ic = (mr_column_ic_t *) - kmem_zalloc(sizeof (mr_column_ic_t) * new_un->un_totalcolumncnt, - KM_SLEEP); - - /* - * Restore old column slots - * Free the old column slots - */ - bcopy(mrc, new_un->un_column_ic, - sizeof (mr_column_ic_t) * un->un_totalcolumncnt); - kmem_free(mrc, sizeof (mr_column_ic_t) * un->un_totalcolumncnt); - - /* All 64 bit metadevices only support EFI labels. */ - if (mgph->options & MD_CRO_64BIT) { - new_un->c.un_flag |= MD_EFILABEL; - /* - * If the device was previously smaller than a terabyte, - * and had a vtoc record attached to it, we remove the - * vtoc record, because the layout has changed completely. - */ - if (((un->c.un_revision & MD_64BIT_META_DEV) == 0) && - (un->c.un_vtoc_id != 0)) { - old_vtoc = un->c.un_vtoc_id; - new_un->c.un_vtoc_id = - md_vtoc_to_efi_record(old_vtoc, setno); - } - } - - - /* - * allocate the real recids array. since we may have to commit - * underlying metadevice records, we need an array of size: - * total number of new components being attach + 2 (one for the - * raid itself, one for the end marker). - */ - num_recs = new_un->un_totalcolumncnt + 2; - rid = 0; - recids = kmem_alloc(num_recs * sizeof (mddb_recid_t), KM_SLEEP); - recids[rid++] = mr_recid; - - for (col = un->un_totalcolumncnt; - (col < new_un->un_totalcolumncnt); col++) { - mr_column_t *mr_col = &new_un->un_column[col]; - md_unit_t *comp_un; - - if (raid_build_pw_reservation(new_un, col) != 0) { - /* release pwslots already allocated by grow */ - for (i = un->un_totalcolumncnt; i < col; i++) { - raid_free_pw_reservation(new_un, i); - } - kmem_free(new_un->un_column_ic, - sizeof (mr_column_ic_t) * - new_un->un_totalcolumncnt); - kmem_free(new_un->mr_ic, sizeof (*un->mr_ic)); - kmem_free(recids, num_recs * sizeof (mddb_recid_t)); - mddb_deleterec_wrapper(mr_recid); - return (EINVAL); - } - /* - * set parent on metadevices being added. - * NOTE: currently soft partitions are the only metadevices - * which can appear within a RAID metadevice. - */ - if (md_getmajor(mr_col->un_dev) == md_major) { - comp_un = MD_UNIT(md_getminor(mr_col->un_dev)); - recids[rid++] = MD_RECID(comp_un); - md_set_parent(mr_col->un_dev, MD_SID(new_un)); - } - new_un->un_column[col].un_devflags = 0; - } - - /* set end marker */ - recids[rid] = 0; - - /* commit new unit struct */ - mddb_commitrecs_wrapper(recids); - - /* delete old unit struct */ - mddb_deleterec_wrapper(un->c.un_record_id); - - /* place new unit in in-core array */ - md_nblocks_set(mnum, new_un->c.un_total_blocks); - MD_UNIT(mnum) = new_un; - - /* - * If old_vtoc has a non zero value, we know: - * - This unit crossed the border from smaller to larger one TB - * - There was a vtoc record for the unit, - * - This vtoc record is no longer needed, because - * a new efi record has been created for this un. - */ - if (old_vtoc != 0) { - mddb_deleterec_wrapper(old_vtoc); - } - - /* free recids */ - kmem_free(recids, num_recs * sizeof (mddb_recid_t)); - - SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_GROW, SVM_TAG_METADEVICE, - MD_UN2SET(new_un), MD_SID(new_un)); - MD_STATUS(new_un) |= MD_UN_GROW_PENDING; - - /* - * Since the md_ioctl_writelock aquires the unit write lock - * and open/close aquires the unit reader lock it is necessary - * to drop the unit write lock and then reaquire it as needed - * later. - */ - md_unit_writerexit(ui); - - if (raid_internal_open(mnum, (FREAD | FWRITE), OTYP_LYR, 0)) { - rval = mdmderror(&mgph->mde, MDE_RAID_OPEN_FAILURE, mnum); - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_OPEN_FAIL, SVM_TAG_METADEVICE, - MD_UN2SET(new_un), MD_SID(new_un)); - return (rval); - } - (void) md_unit_writerlock(ui); - for (i = 0; i < new_un->un_totalcolumncnt; i++) { - if (new_un->un_column[i].un_devstate & RCS_OKAY) - (void) init_pw_area(new_un, new_un->un_column[i].un_dev, - new_un->un_column[i].un_pwstart, i); - } - md_unit_writerexit(ui); - (void) raid_internal_close(mnum, OTYP_LYR, 0, 0); - (void) md_unit_writerlock(ui); - /* create a background thread to initialize the columns */ - md_ioctl_droplocks(lock); - - return (raid_init_unit(mnum, &mgph->mde)); -} - -/* - * NAME: raid_reset - * DESCRIPTION: used to reset (clear / remove) a RAID metadevice - * PARAMETERS: md_i_reset_t *mirp - pointer to reset data structure - * - * LOCKS: obtains and releases md_unit_array_rw write lock - * - */ -static int -raid_reset(md_i_reset_t *mirp) -{ - minor_t mnum = mirp->mnum; - mr_unit_t *un; - mdi_unit_t *ui; - set_t setno = MD_MIN2SET(mnum); - - mdclrerror(&mirp->mde); - - rw_enter(&md_unit_array_rw.lock, RW_WRITER); - /* - * NOTE: need to get md_unit_writerlock to avoid conflict - * with raid_init thread. - */ - if ((un = raid_getun(mnum, &mirp->mde, NO_LOCK, NULL)) == - NULL) { - rw_exit(&md_unit_array_rw.lock); - return (0); - } - ui = MDI_UNIT(mnum); - - if (MD_HAS_PARENT(MD_PARENT(un))) { - rw_exit(&md_unit_array_rw.lock); - return (mdmderror(&mirp->mde, MDE_IN_USE, mnum)); - } - - un = (mr_unit_t *)md_unit_openclose_enter(ui); - if (md_unit_isopen(MDI_UNIT(mnum))) { - md_unit_openclose_exit(ui); - rw_exit(&md_unit_array_rw.lock); - return (mdmderror(&mirp->mde, MDE_IS_OPEN, mnum)); - } - md_unit_openclose_exit(ui); - if (UNIT_STATE(un) != RUS_OKAY && !mirp->force) { - rw_exit(&md_unit_array_rw.lock); - return (mdmderror(&mirp->mde, MDE_RAID_NEED_FORCE, mnum)); - } - - reset_raid(un, mnum, 1); - - /* - * Update unit availability - */ - md_set[setno].s_un_avail++; - - /* - * If MN set, reset s_un_next so all nodes can have - * the same view of the next available slot when - * nodes are -w and -j - */ - if (MD_MNSET_SETNO(setno)) { - (void) md_upd_set_unnext(setno, MD_MIN2UNIT(mnum)); - } - - rw_exit(&md_unit_array_rw.lock); - - return (0); -} - -/* - * NAME: raid_get_geom - * DESCRIPTION: used to get the geometry of a RAID metadevice - * PARAMETERS: mr_unit_t *un - RAID unit to get the geometry for - * struct dk_geom *gp - pointer to geometry data structure - * - * LOCKS: none - * - */ -static int -raid_get_geom( - mr_unit_t *un, - struct dk_geom *geomp -) -{ - md_get_geom((md_unit_t *)un, geomp); - - return (0); -} - -/* - * NAME: raid_get_vtoc - * DESCRIPTION: used to get the VTOC on a RAID metadevice - * PARAMETERS: mr_unit_t *un - RAID unit to get the VTOC from - * struct vtoc *vtocp - pointer to VTOC data structure - * - * LOCKS: none - * - */ -static int -raid_get_vtoc( - mr_unit_t *un, - struct vtoc *vtocp -) -{ - md_get_vtoc((md_unit_t *)un, vtocp); - - return (0); -} - -/* - * NAME: raid_set_vtoc - * DESCRIPTION: used to set the VTOC on a RAID metadevice - * PARAMETERS: mr_unit_t *un - RAID unit to set the VTOC on - * struct vtoc *vtocp - pointer to VTOC data structure - * - * LOCKS: none - * - */ -static int -raid_set_vtoc( - mr_unit_t *un, - struct vtoc *vtocp -) -{ - return (md_set_vtoc((md_unit_t *)un, vtocp)); -} - - -/* - * NAME: raid_get_extvtoc - * DESCRIPTION: used to get the extended VTOC on a RAID metadevice - * PARAMETERS: mr_unit_t *un - RAID unit to get the VTOC from - * struct extvtoc *vtocp - pointer to extended VTOC data structure - * - * LOCKS: none - * - */ -static int -raid_get_extvtoc( - mr_unit_t *un, - struct extvtoc *vtocp -) -{ - md_get_extvtoc((md_unit_t *)un, vtocp); - - return (0); -} - -/* - * NAME: raid_set_extvtoc - * DESCRIPTION: used to set the extended VTOC on a RAID metadevice - * PARAMETERS: mr_unit_t *un - RAID unit to set the VTOC on - * struct extvtoc *vtocp - pointer to extended VTOC data structure - * - * LOCKS: none - * - */ -static int -raid_set_extvtoc( - mr_unit_t *un, - struct extvtoc *vtocp -) -{ - return (md_set_extvtoc((md_unit_t *)un, vtocp)); -} - - - -/* - * NAME: raid_get_cgapart - * DESCRIPTION: used to get the dk_map on a RAID metadevice - * PARAMETERS: mr_unit_t *un - RAID unit to set the VTOC on - * struct vtoc *dkmapp - pointer to dk_map data structure - * - * LOCKS: none - * - */ - -static int -raid_get_cgapart( - mr_unit_t *un, - struct dk_map *dkmapp -) -{ - md_get_cgapart((md_unit_t *)un, dkmapp); - return (0); -} - -/* - * NAME: raid_getdevs - * DESCRIPTION: return all devices within a RAID metadevice - * PARAMETERS: md_getdevs_params_t *mgdp - * - pointer to getdevs IOCTL data structure - * int mode - should be FREAD - * IOLOCK *lockp - IOCTL read/write lock - * - * LOCKS: obtains unit reader lock via IOLOCK - * - */ -static int -raid_getdevs( - void *mgdp, - int mode, - IOLOCK *lock -) -{ - minor_t mnum; - mr_unit_t *un; - md_dev64_t *udevs; - int i, cnt; - md_dev64_t unit_dev; - md_getdevs_params_t *mgdph = mgdp; - - - mnum = mgdph->mnum; - - /* check out unit */ - mdclrerror(&mgdph->mde); - - if ((un = raid_getun(mnum, &mgdph->mde, RD_LOCK, lock)) == NULL) - return (0); - - udevs = (md_dev64_t *)(uintptr_t)mgdph->devs; - - for (cnt = 0, i = 0; i < un->un_totalcolumncnt; i++, cnt++) { - if (cnt < mgdph->cnt) { - unit_dev = un->un_column[i].un_orig_dev; - if (md_getmajor(unit_dev) != md_major) { - if ((unit_dev = md_xlate_mini_2_targ - (unit_dev)) == NODEV64) - return (ENODEV); - } - - if (ddi_copyout((caddr_t)&unit_dev, - (caddr_t)&udevs[cnt], sizeof (*udevs), mode) != 0) - return (EFAULT); - } - if (HOTSPARED(un, i)) { - cnt++; - if (cnt >= mgdph->cnt) - continue; - - unit_dev = un->un_column[i].un_dev; - if (md_getmajor(unit_dev) != md_major) { - if ((unit_dev = md_xlate_mini_2_targ - (unit_dev)) == NODEV64) - return (ENODEV); - } - - if (ddi_copyout((caddr_t)&unit_dev, - (caddr_t)&udevs[cnt], sizeof (*udevs), mode) != 0) - return (EFAULT); - } - } - mgdph->cnt = cnt; - return (0); -} - -/* - * NAME: raid_change - * DESCRIPTION: used to change the following dynamic values: - * the hot spare pool - * in the unit structure of a RAID metadevice - * PARAMETERS: md_change_params_t *mcp - pointer to change data structure - * IOLOCK *lock - pointer to IOCTL lock - * - * LOCKS: obtains unit writer lock via IOLOCK (through raid_getun) - * - */ -static int -raid_change( - md_raid_params_t *mrp, - IOLOCK *lock -) -{ - minor_t mnum = mrp->mnum; - mr_unit_t *un; - int ix; - mddb_recid_t recids[3] = {0, 0, 0}; - int err; - int irecid; - int inc_new_hsp = 0; - - mdclrerror(&mrp->mde); - - if ((un = raid_getun(mnum, &mrp->mde, WR_LOCK, lock)) == NULL) - return (0); - - if (!mrp->params.change_hsp_id) - return (0); - - /* verify that no hotspare is in use */ - for (ix = 0; ix < un->un_totalcolumncnt; ix++) { - if (HOTSPARED(un, ix)) { - return (mdmderror(&mrp->mde, MDE_HS_IN_USE, mnum)); - } - } - - /* replace the hot spare pool */ - - irecid = 0; - if (mrp->params.hsp_id != -1) { - /* increment the reference count of the new hsp */ - err = md_hot_spare_ifc(HSP_INCREF, mrp->params.hsp_id, 0, 0, - &recids[0], NULL, NULL, NULL); - if (err) { - return (mdhsperror(&mrp->mde, MDE_INVAL_HSP, - mrp->params.hsp_id)); - } - inc_new_hsp = 1; - irecid++; - } - - if (un->un_hsp_id != -1) { - /* decrement the reference count of the old hsp */ - err = md_hot_spare_ifc(HSP_DECREF, un->un_hsp_id, 0, 0, - &recids[irecid], NULL, NULL, NULL); - if (err) { - err = mdhsperror(&mrp->mde, MDE_INVAL_HSP, - mrp->params.hsp_id); - if (inc_new_hsp) { - (void) md_hot_spare_ifc(HSP_DECREF, - mrp->params.hsp_id, 0, 0, - &recids[0], NULL, NULL, NULL); - /* - * Don't need to commit the record, - * because it wasn't committed before - */ - } - return (err); - } - } - - un->un_hsp_id = mrp->params.hsp_id; - - raid_commit(un, recids); - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_CHANGE, SVM_TAG_METADEVICE, - MD_UN2SET(un), MD_SID(un)); - - /* Now trigger hot spare processing in case one is needed. */ - if ((un->un_hsp_id != -1) && (un->un_state == RUS_ERRED)) - (void) raid_hotspares(); - - return (0); -} - -/* - * NAME: raid_admin_ioctl - * DESCRIPTION: IOCTL operations unique to metadevices and RAID - * PARAMETERS: int cmd - IOCTL command to be executed - * void *data - pointer to IOCTL data structure - * int mode - either FREAD or FWRITE - * IOLOCK *lockp - IOCTL read/write lock - * - * LOCKS: none - * - */ -static int -raid_admin_ioctl( - int cmd, - void *data, - int mode, - IOLOCK *lockp -) -{ - size_t sz = 0; - void *d = NULL; - int err = 0; - - /* We can only handle 32-bit clients for internal commands */ - if ((mode & DATAMODEL_MASK) != DATAMODEL_ILP32) { - return (EINVAL); - } - - - /* dispatch ioctl */ - switch (cmd) { - - case MD_IOCSET: - { - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (md_set_params_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = raid_set(d, mode); - break; - } - - case MD_IOCGET: - { - if (! (mode & FREAD)) - return (EACCES); - - sz = sizeof (md_i_get_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = raid_get(d, mode, lockp); - break; - } - - case MD_IOCREPLACE: - { - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (replace_params_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = raid_replace((replace_params_t *)d, lockp); - break; - } - - case MD_IOCSETSYNC: - { - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (md_resync_ioctl_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = raid_set_sync((md_resync_ioctl_t *)d, lockp); - break; - } - - case MD_IOCGETSYNC: - { - if (! (mode & FREAD)) - return (EACCES); - - sz = sizeof (md_resync_ioctl_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - err = raid_get_resync((md_resync_ioctl_t *)d, lockp); - - break; - } - - case MD_IOCGROW: - { - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (md_grow_params_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = raid_grow(d, mode, lockp); - break; - } - - case MD_IOCCHANGE: - { - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (md_raid_params_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = raid_change((md_raid_params_t *)d, lockp); - break; - } - - case MD_IOCRESET: - { - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (md_i_reset_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = raid_reset((md_i_reset_t *)d); - break; - } - - case MD_IOCGET_DEVS: - { - if (! (mode & FREAD)) - return (EACCES); - - sz = sizeof (md_getdevs_params_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = raid_getdevs(d, mode, lockp); - break; - } - - case MD_IOCSETREGEN: - { - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (md_regen_param_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = raid_regen((md_regen_param_t *)d, lockp); - break; - } - - case MD_IOCPROBE_DEV: - { - md_probedev_impl_t *p = NULL; - md_probedev_t *ph = NULL; - daemon_queue_t *hdr = NULL; - int i; - size_t sz1 = 0; - - - if (! (mode & FREAD)) - return (EACCES); - - sz = sizeof (md_probedev_t); - - d = kmem_alloc(sz, KM_SLEEP); - - /* now copy in the data */ - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - goto free_mem; - } - - /* - * Sanity test the args. Test name should have the keyword - * probe. - */ - p = kmem_alloc(sizeof (md_probedev_impl_t), KM_SLEEP); - p->probe_sema = NULL; - p->probe_mx = NULL; - p->probe.mnum_list = (uint64_t)NULL; - - ph = (md_probedev_t *)d; - p->probe.nmdevs = ph->nmdevs; - (void) strcpy(p->probe.test_name, ph->test_name); - bcopy(&ph->md_driver, &(p->probe.md_driver), - sizeof (md_driver_t)); - - if ((p->probe.nmdevs < 1) || - (strstr(p->probe.test_name, "probe") == NULL)) { - err = EINVAL; - goto free_mem; - } - - sz1 = sizeof (minor_t) * p->probe.nmdevs; - - p->probe.mnum_list = (uint64_t)(uintptr_t)kmem_alloc(sz1, - KM_SLEEP); - - if (ddi_copyin((caddr_t)(uintptr_t)ph->mnum_list, - (caddr_t)(uintptr_t)p->probe.mnum_list, sz1, mode)) { - err = EFAULT; - goto free_mem; - } - - if (err = md_init_probereq(p, &hdr)) - goto free_mem; - - /* - * put the request on the queue and wait. - */ - - daemon_request_new(&md_ff_daemonq, md_probe_one, hdr, REQ_NEW); - - (void) IOLOCK_RETURN(0, lockp); - /* wait for the events to occur */ - for (i = 0; i < p->probe.nmdevs; i++) { - sema_p(PROBE_SEMA(p)); - } - while (md_ioctl_lock_enter() == EINTR) - ; - - /* - * clean up. The hdr list is freed in the probe routines - * since the list is NULL by the time we get here. - */ -free_mem: - if (p) { - if (p->probe_sema != NULL) { - sema_destroy(PROBE_SEMA(p)); - kmem_free(p->probe_sema, sizeof (ksema_t)); - } - if (p->probe_mx != NULL) { - mutex_destroy(PROBE_MX(p)); - kmem_free(p->probe_mx, sizeof (kmutex_t)); - } - if (p->probe.mnum_list) - kmem_free((caddr_t)(uintptr_t) - p->probe.mnum_list, sz1); - - kmem_free(p, sizeof (md_probedev_impl_t)); - } - break; - } - - default: - return (ENOTTY); - } - - /* - * copyout and free any args - */ - if (sz != 0) { - if (err == 0) { - if (ddi_copyout(d, data, sz, mode) != 0) { - err = EFAULT; - } - } - kmem_free(d, sz); - } - return (err); -} - -/* - * NAME: md_raid_ioctl - * DESCRIPTION: RAID metadevice IOCTL operations entry point. - * PARAMETERS: md_dev64_t dev - RAID device identifier - * int cmd - IOCTL command to be executed - * void *data - pointer to IOCTL data structure - * int mode - either FREAD or FWRITE - * IOLOCK *lockp - IOCTL read/write lock - * - * LOCKS: none - * - */ -int -md_raid_ioctl( - dev_t dev, - int cmd, - void *data, - int mode, - IOLOCK *lockp -) -{ - minor_t mnum = getminor(dev); - mr_unit_t *un; - int err = 0; - - /* handle admin ioctls */ - if (mnum == MD_ADM_MINOR) - return (raid_admin_ioctl(cmd, data, mode, lockp)); - - /* check unit */ - if ((MD_MIN2SET(mnum) >= md_nsets) || - (MD_MIN2UNIT(mnum) >= md_nunits) || - ((un = MD_UNIT(mnum)) == NULL)) - return (ENXIO); - - /* is this a supported ioctl? */ - err = md_check_ioctl_against_unit(cmd, un->c); - if (err != 0) { - return (err); - } - - /* dispatch ioctl */ - switch (cmd) { - - case DKIOCINFO: - { - struct dk_cinfo *p; - - if (! (mode & FREAD)) - return (EACCES); - - p = kmem_alloc(sizeof (*p), KM_SLEEP); - - get_info(p, mnum); - if (ddi_copyout((caddr_t)p, data, sizeof (*p), mode) != 0) - err = EFAULT; - - kmem_free(p, sizeof (*p)); - return (err); - } - - case DKIOCGMEDIAINFO: - { - struct dk_minfo p; - - if (! (mode & FREAD)) - return (EACCES); - - get_minfo(&p, mnum); - if (ddi_copyout(&p, data, sizeof (struct dk_minfo), mode) != 0) - err = EFAULT; - - return (err); - } - - case DKIOCGGEOM: - { - struct dk_geom *p; - - if (! (mode & FREAD)) - return (EACCES); - - p = kmem_alloc(sizeof (*p), KM_SLEEP); - - if ((err = raid_get_geom(un, p)) == 0) { - if (ddi_copyout((caddr_t)p, data, sizeof (*p), - mode) != 0) - err = EFAULT; - } - - kmem_free(p, sizeof (*p)); - return (err); - } - - case DKIOCGVTOC: - { - struct vtoc *vtoc; - - if (! (mode & FREAD)) - return (EACCES); - - vtoc = kmem_zalloc(sizeof (*vtoc), KM_SLEEP); - if ((err = raid_get_vtoc(un, vtoc)) != 0) { - kmem_free(vtoc, sizeof (*vtoc)); - return (err); - } - - if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) { - if (ddi_copyout(vtoc, data, sizeof (*vtoc), mode)) - err = EFAULT; - } -#ifdef _SYSCALL32 - else { - struct vtoc32 *vtoc32; - - vtoc32 = kmem_zalloc(sizeof (*vtoc32), KM_SLEEP); - - vtoctovtoc32((*vtoc), (*vtoc32)); - if (ddi_copyout(vtoc32, data, sizeof (*vtoc32), mode)) - err = EFAULT; - kmem_free(vtoc32, sizeof (*vtoc32)); - } -#endif /* _SYSCALL32 */ - - kmem_free(vtoc, sizeof (*vtoc)); - return (err); - } - - case DKIOCSVTOC: - { - struct vtoc *vtoc; - - if (! (mode & FWRITE)) - return (EACCES); - - vtoc = kmem_zalloc(sizeof (*vtoc), KM_SLEEP); - if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) { - if (ddi_copyin(data, vtoc, sizeof (*vtoc), mode)) { - err = EFAULT; - } - } -#ifdef _SYSCALL32 - else { - struct vtoc32 *vtoc32; - - vtoc32 = kmem_zalloc(sizeof (*vtoc32), KM_SLEEP); - - if (ddi_copyin(data, vtoc32, sizeof (*vtoc32), mode)) { - err = EFAULT; - } else { - vtoc32tovtoc((*vtoc32), (*vtoc)); - } - kmem_free(vtoc32, sizeof (*vtoc32)); - } -#endif /* _SYSCALL32 */ - - if (err == 0) - err = raid_set_vtoc(un, vtoc); - - kmem_free(vtoc, sizeof (*vtoc)); - return (err); - } - - case DKIOCGEXTVTOC: - { - struct extvtoc *extvtoc; - - if (! (mode & FREAD)) - return (EACCES); - - extvtoc = kmem_zalloc(sizeof (*extvtoc), KM_SLEEP); - if ((err = raid_get_extvtoc(un, extvtoc)) != 0) { - kmem_free(extvtoc, sizeof (*extvtoc)); - return (err); - } - - if (ddi_copyout(extvtoc, data, sizeof (*extvtoc), mode)) - err = EFAULT; - - kmem_free(extvtoc, sizeof (*extvtoc)); - return (err); - } - - case DKIOCSEXTVTOC: - { - struct extvtoc *extvtoc; - - if (! (mode & FWRITE)) - return (EACCES); - - extvtoc = kmem_zalloc(sizeof (*extvtoc), KM_SLEEP); - if (ddi_copyin(data, extvtoc, sizeof (*extvtoc), mode)) { - err = EFAULT; - } - - if (err == 0) - err = raid_set_extvtoc(un, extvtoc); - - kmem_free(extvtoc, sizeof (*extvtoc)); - return (err); - } - - case DKIOCGAPART: - { - struct dk_map dmp; - - if ((err = raid_get_cgapart(un, &dmp)) != 0) { - return (err); - } - - if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) { - if (ddi_copyout((caddr_t)&dmp, data, sizeof (dmp), - mode) != 0) - err = EFAULT; - } -#ifdef _SYSCALL32 - else { - struct dk_map32 dmp32; - - dmp32.dkl_cylno = dmp.dkl_cylno; - dmp32.dkl_nblk = dmp.dkl_nblk; - - if (ddi_copyout((caddr_t)&dmp32, data, sizeof (dmp32), - mode) != 0) - err = EFAULT; - } -#endif /* _SYSCALL32 */ - - return (err); - } - case DKIOCGETEFI: - { - /* - * This one can be done centralized, - * no need to put in the same code for all types of metadevices - */ - return (md_dkiocgetefi(mnum, data, mode)); - } - - case DKIOCSETEFI: - { - /* - * This one can be done centralized, - * no need to put in the same code for all types of metadevices - */ - return (md_dkiocsetefi(mnum, data, mode)); - } - - case DKIOCPARTITION: - { - return (md_dkiocpartition(mnum, data, mode)); - } - - default: - return (ENOTTY); - } -} - -/* - * rename/exchange named service entry points and support functions follow. - * Most functions are handled generically, except for raid-specific locking - * and checking - */ - -/* - * NAME: raid_may_renexch_self - * DESCRIPTION: support routine for rename check ("MDRNM_CHECK") named service - * PARAMETERS: mr_unit_t *un - unit struct of raid unit to be renamed - * mdi_unit_t *ui - in-core unit struct of same raid unit - * md_rentxn_t *rtxnp - rename transaction state - * - * LOCKS: none - * - */ -static int -raid_may_renexch_self( - mr_unit_t *un, - mdi_unit_t *ui, - md_rentxn_t *rtxnp) -{ - minor_t from_min; - minor_t to_min; - bool_t toplevel; - bool_t related; - - from_min = rtxnp->from.mnum; - to_min = rtxnp->to.mnum; - - if (!un || !ui) { - (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR, - from_min); - return (EINVAL); - } - - ASSERT(!(MD_CAPAB(un) & MD_CAN_META_CHILD)); - if (MD_CAPAB(un) & MD_CAN_META_CHILD) { - (void) mdmderror(&rtxnp->mde, MDE_RENAME_SOURCE_BAD, from_min); - return (EINVAL); - } - - if (MD_PARENT(un) == MD_MULTI_PARENT) { - (void) mdmderror(&rtxnp->mde, MDE_RENAME_SOURCE_BAD, from_min); - return (EINVAL); - } - - toplevel = !MD_HAS_PARENT(MD_PARENT(un)); - - /* we're related if trying to swap with our parent */ - related = (!toplevel) && (MD_PARENT(un) == to_min); - - switch (rtxnp->op) { - case MDRNOP_EXCHANGE: - - if (!related) { - (void) mdmderror(&rtxnp->mde, - MDE_RENAME_TARGET_UNRELATED, to_min); - return (EINVAL); - } - - break; - - case MDRNOP_RENAME: - /* - * if from is top-level and is open, then the kernel is using - * the md_dev64_t. - */ - - if (toplevel && md_unit_isopen(ui)) { - (void) mdmderror(&rtxnp->mde, MDE_RENAME_BUSY, - from_min); - return (EBUSY); - } - break; - - default: - (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR, - from_min); - return (EINVAL); - } - - return (0); /* ok */ -} - -/* - * NAME: raid_rename_check - * DESCRIPTION: ("MDRNM_CHECK") rename/exchange named service entry point - * PARAMETERS: md_rendelta_t *delta - describes changes to be made to this - * raid device for rename transaction - * md_rentxn_t *rtxnp - rename transaction state - * - * LOCKS: none - * - */ -intptr_t -raid_rename_check( - md_rendelta_t *delta, - md_rentxn_t *rtxnp) -{ - int err = 0; - int column; - mr_unit_t *un; - - ASSERT(delta); - ASSERT(rtxnp); - ASSERT(delta->unp); - ASSERT(delta->uip); - - if (!delta || !rtxnp || !delta->unp || !delta->uip) { - (void) mdsyserror(&rtxnp->mde, EINVAL); - return (EINVAL); - } - - un = (mr_unit_t *)delta->unp; - - for (column = 0; column < un->un_totalcolumncnt; column++) { - rcs_state_t colstate; - - colstate = un->un_column[column].un_devstate; - - if (colstate & RCS_LAST_ERRED) { - (void) mdmderror(&rtxnp->mde, MDE_RAID_LAST_ERRED, - md_getminor(delta->dev)); - return (EINVAL); - } - - if (colstate & RCS_INIT_ERRED) { - (void) mdmderror(&rtxnp->mde, MDE_RAID_DOI, - md_getminor(delta->dev)); - return (EINVAL); - } - - /* How did we get this far before detecting this? */ - if (colstate & RCS_RESYNC) { - (void) mdmderror(&rtxnp->mde, MDE_RENAME_BUSY, - md_getminor(delta->dev)); - return (EBUSY); - } - - if (colstate & RCS_ERRED) { - (void) mdmderror(&rtxnp->mde, MDE_RAID_NOT_OKAY, - md_getminor(delta->dev)); - return (EINVAL); - } - - if (!(colstate & RCS_OKAY)) { - (void) mdmderror(&rtxnp->mde, MDE_RAID_NOT_OKAY, - md_getminor(delta->dev)); - return (EINVAL); - } - - if (HOTSPARED(un, column)) { - (void) mdmderror(&rtxnp->mde, MDE_RAID_NOT_OKAY, - md_getminor(delta->dev)); - return (EINVAL); - } - } - - /* self does additional checks */ - if (delta->old_role == MDRR_SELF) { - err = raid_may_renexch_self((mr_unit_t *)delta->unp, - delta->uip, rtxnp); - } - return (err); -} - -/* - * NAME: raid_rename_lock - * DESCRIPTION: ("MDRNM_LOCK") rename/exchange named service entry point - * PARAMETERS: md_rendelta_t *delta - describes changes to be made to this - * raid device for rename transaction - * md_rentxn_t *rtxnp - rename transaction state - * - * LOCKS: io and unit locks (taken explicitly *not* via ioctl wrappers) - * - */ -intptr_t -raid_rename_lock( - md_rendelta_t *delta, - md_rentxn_t *rtxnp) -{ - minor_t mnum; - - ASSERT(delta); - ASSERT(rtxnp); - - mnum = md_getminor(delta->dev); - if (mnum == rtxnp->to.mnum && rtxnp->op == MDRNOP_RENAME) { - return (0); - } - - ASSERT(delta->uip); - if (!delta->uip) { - (void) mdmderror(&rtxnp->mde, MDE_UNIT_NOT_SETUP, mnum); - return (ENODEV); - } - - ASSERT(delta->unp); - if (!delta->unp) { - - return (ENODEV); - } - - ASSERT(!IO_WRITER_HELD(delta->unp)); - (void) md_io_writerlock(delta->uip); - ASSERT(IO_WRITER_HELD(delta->unp)); - - - ASSERT(!UNIT_WRITER_HELD(delta->unp)); - (void) md_unit_writerlock(delta->uip); - ASSERT(UNIT_WRITER_HELD(delta->unp)); - - return (0); -} - -/* - * NAME: raid_rename_unlock - * DESCRIPTION: ("MDRNM_UNLOCK") rename/exchange named service entry point - * PARAMETERS: md_rendelta_t *delta - describes changes to be made to this - * raid device for rename transaction - * md_rentxn_t *rtxnp - rename transaction state - * - * LOCKS: drops io and unit locks - * - */ -/* ARGSUSED */ -void -raid_rename_unlock( - md_rendelta_t *delta, - md_rentxn_t *rtxnp) -{ - mr_unit_t *un = (mr_unit_t *)delta->unp; - minor_t mnum = MD_SID(un); - int col; - - ASSERT(delta); - ASSERT(delta->unp); - ASSERT(delta->uip); - - ASSERT(UNIT_WRITER_HELD(delta->unp)); - md_unit_writerexit(delta->uip); - ASSERT(!UNIT_WRITER_HELD(delta->unp)); - - if (! (delta->txn_stat.role_swapped) || ! (delta->txn_stat.is_open)) { - goto out; - } - if (raid_internal_open(mnum, (FREAD | FWRITE), - OTYP_LYR, MD_OFLG_ISINIT) == 0) { - for (col = 0; col < un->un_totalcolumncnt; col++) { - if (un->un_column[col].un_devstate & RCS_OKAY) - (void) init_pw_area(un, - un->un_column[col].un_dev, - un->un_column[col].un_pwstart, col); - } - (void) raid_internal_close(mnum, OTYP_LYR, 0, 0); - } - -out: - ASSERT(IO_WRITER_HELD(delta->unp)); - md_io_writerexit(delta->uip); - ASSERT(!IO_WRITER_HELD(delta->unp)); -} -/* end of rename/exchange named service and support functions */ diff --git a/usr/src/uts/common/io/lvm/raid/raid_replay.c b/usr/src/uts/common/io/lvm/raid/raid_replay.c deleted file mode 100644 index fc12886c66da..000000000000 --- a/usr/src/uts/common/io/lvm/raid/raid_replay.c +++ /dev/null @@ -1,842 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -/* - * NAME: raid_replay.c - * - * DESCRIPTION: RAID driver source file containing routines related to replay - * operation. - * - * ROUTINES PROVIDED FOR EXTERNAL USE: - * raid_replay() - replay all the pre write entries in the unit. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include - -/* functions forward declarations */ -static int raid_replay_error(mr_unit_t *un, int column); - -int raid_total_rply_entries = 0; - -/* - * NAMES: raid_rply_dealloc, raid_rply_alloc - * DESCRIPTION: RAID metadevice replay buffer allocation/deallocation routines - * PARAMETERS: mr_unit_t *un - pointer to the unit structure - * mr_unit_t *un - pointer to the unit structure - * RETURNS: - */ -static void -raid_rply_dealloc(mr_unit_t *un, - raid_rplybuf_t **bufs, - raid_rplybuf_t *rwbuf1, - raid_rplybuf_t *rwbuf2) -{ - int i; - raid_rplybuf_t *tmp; - - for (i = 0, tmp = *bufs; i < un->un_totalcolumncnt; i++, tmp++) { - if (tmp->rpl_data) { - kmem_free(tmp->rpl_data, DEV_BSIZE); - tmp->rpl_data = NULL; - } - if (tmp->rpl_buf) { - kmem_free(tmp->rpl_buf, sizeof (buf_t)); - tmp->rpl_buf = NULL; - } - } - kmem_free(*bufs, sizeof (raid_rplybuf_t) * un->un_totalcolumncnt); - *bufs = NULL; - if (rwbuf1->rpl_data) { - kmem_free(rwbuf1->rpl_data, dbtob(un->un_iosize)); - rwbuf1->rpl_data = NULL; - } - if (rwbuf1->rpl_buf) { - kmem_free((caddr_t)rwbuf1->rpl_buf, sizeof (buf_t)); - rwbuf1->rpl_buf = NULL; - } - if (rwbuf2->rpl_data) { - kmem_free(rwbuf2->rpl_data, dbtob(un->un_iosize)); - rwbuf2->rpl_data = NULL; - } - if (rwbuf2->rpl_buf) { - kmem_free((caddr_t)rwbuf2->rpl_buf, sizeof (buf_t)); - rwbuf2->rpl_buf = NULL; - } -} - -static void -raid_rply_alloc(mr_unit_t *un, - raid_rplybuf_t **bufs, - raid_rplybuf_t *rwbuf1, - raid_rplybuf_t *rwbuf2) -{ - int i; - raid_rplybuf_t *tmp; - buf_t *bp; - - /* intialization */ - *bufs = kmem_zalloc(sizeof (raid_rplybuf_t) * un->un_totalcolumncnt, - KM_SLEEP); - ASSERT(*bufs != NULL); - bzero((caddr_t)rwbuf1, sizeof (raid_rplybuf_t)); - bzero((caddr_t)rwbuf2, sizeof (raid_rplybuf_t)); - - /* allocate all the buffers required for the replay processing */ - for (i = 0, tmp = *bufs; i < un->un_totalcolumncnt; i++, tmp++) { - tmp->rpl_data = kmem_zalloc(DEV_BSIZE, KM_SLEEP); - ASSERT(tmp->rpl_data != NULL); - tmp->rpl_buf = kmem_zalloc(sizeof (buf_t), KM_SLEEP); - ASSERT(tmp->rpl_buf != NULL); - bp = (buf_t *)tmp->rpl_buf; - bp->b_back = bp; - bp->b_forw = bp; - bp->b_flags = B_BUSY; - bp->b_offset = -1; - /* Initialize semaphores */ - sema_init(&bp->b_io, 0, NULL, - SEMA_DEFAULT, NULL); - sema_init(&bp->b_sem, 0, NULL, - SEMA_DEFAULT, NULL); - } - - rwbuf1->rpl_data = kmem_zalloc(dbtob(un->un_iosize), KM_SLEEP); - ASSERT(rwbuf1->rpl_data != NULL); - rwbuf1->rpl_buf = kmem_zalloc(sizeof (buf_t), KM_SLEEP); - ASSERT(rwbuf1->rpl_buf != NULL); - rwbuf2->rpl_data = kmem_zalloc(dbtob(un->un_iosize), KM_SLEEP); - ASSERT(rwbuf2->rpl_data != NULL); - rwbuf2->rpl_buf = kmem_zalloc(sizeof (buf_t), KM_SLEEP); - ASSERT(rwbuf2->rpl_buf != NULL); - - bp = (buf_t *)rwbuf1->rpl_buf; - bp->b_back = bp; - bp->b_forw = bp; - bp->b_flags = B_BUSY; - bp->b_offset = -1; - /* Initialize semaphores */ - sema_init(&bp->b_io, 0, NULL, - SEMA_DEFAULT, NULL); - sema_init(&bp->b_sem, 0, NULL, - SEMA_DEFAULT, NULL); - bp = (buf_t *)rwbuf2->rpl_buf; - bp->b_back = bp; - bp->b_forw = bp; - bp->b_flags = B_BUSY; - bp->b_offset = -1; - /* Initialize semaphores */ - sema_init(&bp->b_io, 0, NULL, - SEMA_DEFAULT, NULL); - sema_init(&bp->b_sem, 0, NULL, - SEMA_DEFAULT, NULL); -} - -/* - * NAMES: rpl_insert, rpl_delete, rpl_find - * DESCRIPTION: RAID metadevice replay list processing APIs - * PARAMETERS: raid_rplylst_t *list - pointer to the replay list. - * raid_pwhdr_t *pwptr - pointer to a pre-write header. - * RETURNS: - */ -static void -rpl_insert(raid_rplylst_t **listp, raid_rplylst_t *newp) -{ - raid_rplylst_t *tmp, **prevp; - - for (prevp = listp; ((tmp = *prevp) != NULL); prevp = &tmp->rpl_next) { - if (tmp->rpl_id > newp->rpl_id) { - break; - } - } - newp->rpl_next = tmp; - *prevp = newp; -} - -static void -rpl_delete(raid_rplylst_t **prevp, raid_rplylst_t *oldp) -{ - - ASSERT((caddr_t)oldp); - raid_total_rply_entries --; - *prevp = oldp->rpl_next; - kmem_free((caddr_t)oldp, sizeof (raid_rplylst_t)); -} - -static raid_rplylst_t * -rpl_find(raid_rplylst_t *list, long long pw_id) -{ - raid_rplylst_t *tmp; - - for (tmp = list; tmp; tmp = tmp->rpl_next) { - if (pw_id == tmp->rpl_id) { - return (tmp); - } - } - return ((raid_rplylst_t *)NULL); -} - -/* - * NAMES: enq_rplylst - * DESCRIPTION: Enqueue a pre-write header into the replay list. - * PARAMETERS: raid_rplylst_t *list - pointer to the replay list. - * raid_pwhdr_t *pwptr - pointer to a pre-write header. - * RETURNS: - */ -static void -enq_rplylst(raid_rplylst_t **listp, raid_pwhdr_t *pwhp, - uint_t slot, int column) -{ - raid_rplylst_t *newp, *oldp; - - /* check if the pre-write existed in the list */ - if ((pwhp->rpw_colcount <= 2) && - (oldp = rpl_find(*listp, pwhp->rpw_id))) { - bcopy((caddr_t)pwhp, (caddr_t)&oldp->rpl_pwhdr2, - sizeof (raid_pwhdr_t)); - oldp->rpl_slot2 = slot; - oldp->rpl_column2 = column; - } else { - raid_total_rply_entries ++; - newp = (raid_rplylst_t *)kmem_zalloc(sizeof (raid_rplylst_t), - KM_SLEEP); - ASSERT(newp != NULL); - bcopy((caddr_t)pwhp, (caddr_t)&newp->rpl_pwhdr1, - sizeof (raid_pwhdr_t)); - bzero((caddr_t)&newp->rpl_pwhdr2, sizeof (raid_pwhdr_t)); - - newp->rpl_id = pwhp->rpw_id; - newp->rpl_column1 = column; - newp->rpl_slot1 = slot; - newp->rpl_next = (raid_rplylst_t *)NULL; - newp->rpl_colcnt = pwhp->rpw_colcount; - rpl_insert(listp, newp); - } -} - -/* - * NAMES: pw_read_done and pw_write_done - * DESCRIPTION: don't know the usage yet ??? (TBD) - * PARAMETERS: - * RETURNS: - */ -static int -pw_read_done(buf_t *bp) -{ - ASSERT(SEMA_HELD(&bp->b_sem)); - ASSERT((bp->b_flags & B_DONE) == 0); - - bp->b_flags |= B_DONE; - - if (bp->b_flags & B_ASYNC) - sema_v(&bp->b_sem); - else - /* wakeup the thread waiting on this buf */ - sema_v(&bp->b_io); - return (0); -} - -static int -pw_write_done(buf_t *bp) -{ - ASSERT(SEMA_HELD(&bp->b_sem)); - ASSERT((bp->b_flags & B_DONE) == 0); - - bp->b_flags |= B_DONE; - - if (bp->b_flags & B_ASYNC) - sema_v(&bp->b_sem); - else - /* wakeup the thread waiting on this buf */ - sema_v(&bp->b_io); - - return (0); -} - -/* - * NAMES: raid_pwhdr_read - * DESCRIPTION: issue a syncronous read to read a pre-write header - * PARAMETERS: mr_unit_t *un - pointer to the unit structure - * int pw_slot - pre-write entry slot number - * int column - column number for the pre-write entry - * raid_rplybuf_t *bufp - pointer to the replay buffer structure - * RETURNS: - */ -static void -raid_pwhdr_read(mr_unit_t *un, int pw_slot, int column, raid_rplybuf_t *bufp) -{ - buf_t *bp; - - /* set up pointers from raid_rplybuf_t *bufp */ - bp = (buf_t *)bufp->rpl_buf; - - /* calculate the data address or block number */ - bp->b_un.b_addr = bufp->rpl_data; - bp->b_lblkno = un->un_column[column].un_pwstart + - pw_slot * un->un_iosize; - bp->b_edev = md_dev64_to_dev(un->un_column[column].un_dev); - bp->b_bufsize = DEV_BSIZE; - bp->b_bcount = DEV_BSIZE; - bp->b_flags = (B_READ | B_BUSY); - bp->b_iodone = pw_read_done; - (void) md_call_strategy(bp, 0, NULL); -} - -/* - * NAMES: raid_pw_read - * DESCRIPTION: issue a syncronous read to read a pre-write entry - * PARAMETERS: mr_unit_t *un - pointer to the unit structure - * int column - column number for the pre-write entry - * u_int slot - pre-write entry slot number - * raid_rplybuf_t *bufp - pointer to the replay buffer structure - * RETURNS: - */ -static int -raid_pw_read(mr_unit_t *un, int column, uint_t slot, raid_rplybuf_t *bufp) -{ - buf_t *bp; - int error; - uint_t blkcnt = un->un_iosize; - uint_t bytecnt = blkcnt * DEV_BSIZE; - - /* if this column is no longer accessible, return */ - if (!COLUMN_ISUP(un, column)) - return (RAID_RPLY_COMPREPLAY); - - /* set up pointers from raid_rplybuf_t *bufp */ - bp = (buf_t *)bufp->rpl_buf; - - /* calculate the data address or block number */ - bp->b_un.b_addr = bufp->rpl_data; - bp->b_bufsize = bytecnt; - bp->b_bcount = bytecnt; - bp->b_flags = (B_READ | B_BUSY); - bp->b_edev = md_dev64_to_dev(un->un_column[column].un_dev); - bp->b_lblkno = un->un_column[column].un_pwstart + (slot * blkcnt); - bp->b_iodone = pw_read_done; - (void) md_call_strategy(bp, 0, NULL); - if (biowait(bp)) { - error = raid_replay_error(un, column); - return (error); - } - return (0); -} - -/* - * NAMES: raid_pw_write - * DESCRIPTION: issue a syncronous write to write a pre-write entry - * PARAMETERS: mr_unit_t *un - pointer to the unit structure - * int column - column number for the pre-write entry - * raid_pwhdr_t *pwhp - needed for some infos about the pw header - * raid_rplybuf_t *bufp - pointer to the replay buffer structure - * RETURNS: - */ -static int -raid_pw_write(mr_unit_t *un, int column, raid_pwhdr_t *pwhp, - raid_rplybuf_t *bufp) -{ - buf_t *bp; - int error; - - /* if this column is no longer accessible, return */ - if (!COLUMN_ISUP(un, column)) - return (RAID_RPLY_COMPREPLAY); - - /* set up pointers from raid_rplybuf_t *bufp */ - bp = (buf_t *)bufp->rpl_buf; - - /* calculate the data address or block number */ - bp->b_un.b_addr = bufp->rpl_data + DEV_BSIZE; - bp->b_bufsize = dbtob(pwhp->rpw_blkcnt); - bp->b_bcount = dbtob(pwhp->rpw_blkcnt); - bp->b_flags = (B_WRITE | B_BUSY); - bp->b_edev = md_dev64_to_dev(un->un_column[column].un_dev); - bp->b_lblkno = un->un_column[column].un_devstart + pwhp->rpw_blkno; - bp->b_iodone = pw_write_done; - (void) md_call_strategy(bp, 0, NULL); - if (biowait(bp)) { - error = raid_replay_error(un, column); - return (error); - } - return (0); -} - -/* - * NAMES: genchecksum - * DESCRIPTION: generate check sum for a pre-write entry - * PARAMETERS: caddr_t addr - where the data bytes are - * int bcount - number of bytes in the pre-write entry - * RETURNS: - */ -static uint_t -genchecksum(caddr_t addr, size_t bcount) -{ - uint_t *dbuf; - size_t wordcnt; - uint_t dsum = 0; - - wordcnt = bcount / sizeof (uint_t); - dbuf = (uint_t *)(void *)(addr); - - while (wordcnt--) { - dsum ^= *dbuf; - dbuf++; - } - return (dsum); -} - -/* - * NAMES: raid_rply_verify - * DESCRIPTION: verify the pre-write entry for replay - * PARAMETERS: mr_unit_t *un - pointer to unit structure - * int col1 - column number 1 - * int goodsum1 - flag to indicate good checksum - * int *do_1 - flag to indicate whether we should replay - * the first pre-write - * int col2 - column number 2 - * int goodsum2 - flag to indicate good checksum - * int *do_2 - flag to indicate whether we should replay - * the first pre-write - * RETURNS: - */ -static void -raid_rply_verify(mr_unit_t *un, int col1, int goodsum1, int *do_1, - int col2, int goodsum2, int *do_2) -{ - int good_state1 = 0; - int good_state2 = 0; - - *do_1 = 0; *do_2 = 0; /* prepare for the worst */ - if (COLUMN_ISUP(un, col1)) { - good_state1 = 1; - } - if (COLUMN_ISUP(un, col2)) { - good_state2 = 1; - } - if ((good_state1 & good_state2) && (goodsum1 & goodsum2)) { - /* if both columns check out, do it */ - *do_1 = 1; *do_2 = 1; - } else if ((good_state1 & goodsum1) && !good_state2) { - /* if one column is okay and the other is errored, do it */ - *do_1 = 1; *do_2 = 0; - } else if ((good_state2 & goodsum2) && !good_state1) { - /* if one column is okay and the other is errored, do it */ - *do_2 = 1; *do_1 = 0; - } -} - -/* - * NAMES: raid_rplyeach - * DESCRIPTION: issue a syncronous read to read a pre-write header - * PARAMETERS: mr_unit_t *un - pointer to the unit structure - * raid_rplylst_t *eachp - pointer to the replay list entry - * raid_rplybuf_t *rwbuf1 - pointer to the replay buffer structure - * raid_rplybuf_t *rwbuf2 - pointer to the replay buffer structure - * RETURNS: - */ -static int -raid_rplyeach( - mr_unit_t *un, - raid_rplylst_t *eachp, - raid_rplybuf_t *rwbuf1, - raid_rplybuf_t *rwbuf2 -) -{ - raid_pwhdr_t *pwhp1; - raid_pwhdr_t *pwhp2; - uint_t dsum1 = 0; - uint_t dsum2 = 0; - int good_pw1 = 0; - int good_pw2 = 0; - int do_1 = 0; - int do_2 = 0; - int error = 0; - - /* First verify the normal case - two pre-write entries are all good */ - if ((eachp->rpl_pwhdr1.rpw_magic == RAID_PWMAGIC && - eachp->rpl_pwhdr2.rpw_magic == RAID_PWMAGIC) && - (eachp->rpl_pwhdr1.rpw_blkcnt == eachp->rpl_pwhdr2.rpw_blkcnt)) { - - ASSERT(eachp->rpl_pwhdr1.rpw_id == eachp->rpl_pwhdr2.rpw_id); - - /* read the pre-write entries */ - error = raid_pw_read(un, eachp->rpl_column1, - eachp->rpl_slot1, rwbuf1); - pwhp1 = &eachp->rpl_pwhdr1; - if (error) { - if (error != RAID_RPLY_COMPREPLAY) - return (error); - good_pw1 = FALSE; - } else { - /* generate checksum for each pre-write entry */ - dsum1 = genchecksum(rwbuf1->rpl_data + DEV_BSIZE, - dbtob(pwhp1->rpw_blkcnt)); - good_pw1 = (dsum1 == pwhp1->rpw_sum); - } - - error = raid_pw_read(un, eachp->rpl_column2, eachp->rpl_slot2, - rwbuf2); - pwhp2 = &eachp->rpl_pwhdr2; - if (error) { - if (error != RAID_RPLY_COMPREPLAY) - return (error); - good_pw2 = FALSE; - } else { - /* generate checksum for pre-write entry */ - dsum2 = genchecksum(rwbuf2->rpl_data + DEV_BSIZE, - dbtob(pwhp2->rpw_blkcnt)); - good_pw2 = (dsum2 == pwhp2->rpw_sum); - } - - /* verify the checksums and states */ - raid_rply_verify(un, eachp->rpl_column1, good_pw1, &do_1, - eachp->rpl_column2, good_pw2, &do_2); - - /* write (replay) the pre-write entries */ - if (do_1) { - error = raid_pw_write(un, eachp->rpl_column1, - &eachp->rpl_pwhdr1, rwbuf1); - if (error && (error != RAID_RPLY_COMPREPLAY)) { - return (error); - } - } - if (do_2) { - error = raid_pw_write(un, eachp->rpl_column2, - &eachp->rpl_pwhdr2, rwbuf2); - if (error && (error != RAID_RPLY_COMPREPLAY)) { - return (error); - } - } - return (0); - } - if (eachp->rpl_pwhdr1.rpw_magic == RAID_PWMAGIC) { - /* - * if partner was errored at time of write - * or due to open or replay, replay this entry - */ - if ((eachp->rpl_pwhdr1.rpw_columnnum == -1) || - (! COLUMN_ISUP(un, eachp->rpl_pwhdr1.rpw_columnnum))) { - /* read the pre-write entry */ - error = raid_pw_read(un, eachp->rpl_column1, - eachp->rpl_slot1, rwbuf1); - if (error) - return (error); - /* generate checksum for the pre-write entry */ - pwhp1 = &eachp->rpl_pwhdr1; - dsum1 = genchecksum(rwbuf1->rpl_data + DEV_BSIZE, - dbtob(pwhp1->rpw_blkcnt)); - if (dsum1 == pwhp1->rpw_sum) { - error = raid_pw_write(un, eachp->rpl_column1, - &eachp->rpl_pwhdr1, rwbuf1); - if (error && (error != RAID_RPLY_COMPREPLAY)) { - return (error); - } - } - } - return (0); - } - - return (0); -} - -static int -replay_line(mr_unit_t *un, raid_rplylst_t *eachp, raid_rplybuf_t *rplybuf) -{ - raid_pwhdr_t *pwhdr1, *pwhdr2; - raid_rplylst_t *eachpn; - int i; - int cnt; - diskaddr_t blkno; - uint_t blkcnt; - long long id; - int dsum; - int error; - int colcnt, col, col2; - int down; - - if (eachp->rpl_id == 0) - return (0); - /* - * check: 1 - enough equal ids - * 2 - all have same columncnt - * 3 - all have same blkno - * 4 - all have same blkcnt - * - * read each and check the checksum - * write each - */ - - cnt = eachp->rpl_colcnt; - id = eachp->rpl_id; - pwhdr1 = &eachp->rpl_pwhdr1; - blkno = pwhdr1->rpw_blkno; - blkcnt = pwhdr1->rpw_blkcnt; - - error = raid_pw_read(un, eachp->rpl_column1, eachp->rpl_slot1, rplybuf); - dsum = genchecksum(rplybuf->rpl_data + DEV_BSIZE, - dbtob(pwhdr1->rpw_blkcnt)); - - if (dsum != pwhdr1->rpw_sum) - return (0); - - if (error) { - if (error == RAID_RPLY_COMPREPLAY) - return (0); - else - return (1); - } - - eachpn = eachp->rpl_next; - for (i = 1; i < cnt; i++) { - if (eachpn == NULL) - break; - col2 = eachpn->rpl_column1; - ASSERT(col2 < un->un_totalcolumncnt); - pwhdr2 = &eachpn->rpl_pwhdr1; - if ((pwhdr2->rpw_blkno != blkno) || - (pwhdr2->rpw_blkcnt != blkcnt) || - (eachpn->rpl_id != id) || - (pwhdr2->rpw_colcount != cnt)) { - return (0); - } - - error = raid_pw_read(un, col2, eachpn->rpl_slot1, rplybuf); - dsum = genchecksum(rplybuf->rpl_data + DEV_BSIZE, - dbtob(pwhdr2->rpw_blkcnt)); - if (dsum != pwhdr2->rpw_sum) - return (0); - eachpn = eachpn->rpl_next; - } - colcnt = i; - - if (error) - return (0); - - down = raid_state_cnt(un, RCS_ERRED); - if ((i != un->un_totalcolumncnt) && - (i != (un->un_totalcolumncnt - down))) - return (0); - - /* there ara enough columns to write correctly */ - eachpn = eachp; - for (i = 0; i < colcnt; i++) { - col = eachpn->rpl_column1; - error = raid_pw_read(un, col, eachpn->rpl_slot1, rplybuf); - error = raid_pw_write(un, col, &eachpn->rpl_pwhdr1, rplybuf); - eachpn->rpl_id = 0; - if (error && (error != RAID_RPLY_COMPREPLAY)) - return (1); - eachpn = eachpn->rpl_next; - } - return (0); -} - -/* - * NAMES: raid_replay_error - * DESCRIPTION: RAID metadevice replay error handling routine (TBD) - * PARAMETERS: - * RETURNS: - */ -static int -raid_replay_error(mr_unit_t *un, int column) -{ - int error = RAID_RPLY_COMPREPLAY; - - raid_set_state(un, column, RCS_ERRED, 0); - raid_commit(un, NULL); - - if (UNIT_STATE(un) == RUS_LAST_ERRED) { - error = RAID_RPLY_READONLY; - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_LASTERRED, SVM_TAG_METADEVICE, - MD_UN2SET(un), MD_SID(un)); - } else if (UNIT_STATE(un) == RUS_ERRED) { - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ERRED, SVM_TAG_METADEVICE, - MD_UN2SET(un), MD_SID(un)); - } - - return (error); -} - -/* - * NAMES: raid_replay - * DESCRIPTION: RAID metadevice main replay processing routine - * PARAMETERS: mr_unit_t *un - pointer to an unit structure - * RETURNS: - */ - -int -raid_replay(mr_unit_t *un) -{ - raid_rplylst_t *rplylst = NULL; - raid_rplylst_t **prevp, *eachp; - raid_rplybuf_t *rplybuf; - raid_rplybuf_t rwbuf1; - raid_rplybuf_t rwbuf2; - mr_column_t *colptr; - raid_pwhdr_t pwhdr; - raid_pwhdr_t *pwhdrp = &pwhdr; - int error = 0; - int i, j; - diskaddr_t max_blkno = un->un_segsize * un->un_segsincolumn; - int totalcolumns = un->un_totalcolumncnt; - - raid_rply_alloc(un, &rplybuf, &rwbuf1, &rwbuf2); - - /* build a replay list based on the order of pre-write id */ - for (i = 0; i < un->un_pwcnt; i++) { - /* issue a synchronous read for each column */ - for (j = 0; j < un->un_totalcolumncnt; j++) { - if (COLUMN_ISUP(un, j)) { - raid_pwhdr_read(un, i, j, &rplybuf[j]); - /* wait for I/O completion for each column */ - if (biowait((buf_t *)rplybuf[j].rpl_buf)) { - /* potential state transition */ - error = raid_replay_error(un, j); - if (error == RAID_RPLY_COMPREPLAY) - continue; - else - goto replay_failed; - } - if (un->c.un_revision & MD_64BIT_META_DEV) { - pwhdrp = (raid_pwhdr_t *) - rplybuf[j].rpl_data; - } else { - RAID_CONVERT_RPW((raid_pwhdr32_od_t *) - rplybuf[j].rpl_data, - pwhdrp); - } - - /* first check pre-write magic number */ - if (pwhdrp->rpw_magic != RAID_PWMAGIC) { - continue; - } - if (pwhdrp->rpw_column != j) { - continue; - } - if (pwhdrp->rpw_id == (long long) 0) { - continue; - } - if (pwhdrp->rpw_blkcnt > (un->un_iosize - 1)) { - continue; - } - if (pwhdrp->rpw_blkcnt == 0) { - continue; - } - if (pwhdrp->rpw_blkno > max_blkno) { - continue; - } - if ((pwhdrp->rpw_columnnum < 0) || - (pwhdrp->rpw_columnnum > totalcolumns)) { - continue; - } - if (((pwhdrp->rpw_colcount != 1) && - (pwhdrp->rpw_colcount != 2) && - (pwhdrp->rpw_colcount != totalcolumns))) { - continue; - } - - enq_rplylst(&rplylst, pwhdrp, i, j); - } - } - } - - /* replay each entry in the replay list */ - prevp = &rplylst; - while ((eachp = *prevp) != NULL) { - /* zero out the pre-write headers in the buffer */ - bzero((caddr_t)rwbuf1.rpl_data, sizeof (raid_pwhdr_t)); - bzero((caddr_t)rwbuf2.rpl_data, sizeof (raid_pwhdr_t)); - - if (eachp->rpl_colcnt <= 2) - error = raid_rplyeach(un, eachp, &rwbuf1, &rwbuf2); - else - error = replay_line(un, eachp, &rwbuf1); - - if (error && (error != RAID_RPLY_COMPREPLAY)) { - goto replay_failed; - } - - /* free the processed replay list entry */ - rpl_delete(prevp, eachp); - prevp = &rplylst; - } - - /* zero out all pre-write entries in this unit */ - for (j = 0; j < un->un_totalcolumncnt; j++) { - if (COLUMN_ISUP(un, j)) { - colptr = &un->un_column[j]; - if (init_pw_area(un, colptr->un_dev, - colptr->un_pwstart, j)) - break; - } - } - - /* deallocate all the buffer resource allocated in this routine */ - raid_rply_dealloc(un, &rplybuf, &rwbuf1, &rwbuf2); - - return (RAID_RPLY_SUCCESS); - -replay_failed: - - /* first release the list */ - prevp = &rplylst; - while ((eachp = *prevp) != NULL) { - rpl_delete(prevp, eachp); - prevp = &rplylst; - } - - /* then release buffers */ - raid_rply_dealloc(un, &rplybuf, &rwbuf1, &rwbuf2); - - /* also reset the pre-write id variable to one */ - un->un_pwid = 1; - raid_total_rply_entries = 0; - - return (error); -} diff --git a/usr/src/uts/common/io/lvm/raid/raid_resync.c b/usr/src/uts/common/io/lvm/raid/raid_resync.c deleted file mode 100644 index dd2a5184369c..000000000000 --- a/usr/src/uts/common/io/lvm/raid/raid_resync.c +++ /dev/null @@ -1,1078 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -/* - * NAME: raid_resync.c - * DESCRIPTION: RAID driver source file containing routines related to resync - * operation. - * ROUTINES PROVIDED FOR EXTERNAL USE: - * resync_request() - get resync lock if available - * release_resync_request() - relinquish resync lock - * erred_check_line() - provide write instruction for erred column - * init_pw_area() - initialize pre-write area - * copy_pw_area() - copy pre-write area from one device to another - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#define NOCOLUMN (-1) - -extern md_set_t md_set[]; -extern kmem_cache_t *raid_child_cache; -extern kmem_cache_t *raid_parent_cache; -extern md_resync_t md_cpr_resync; -extern major_t md_major; -extern void raid_parent_init(md_raidps_t *ps); -extern void raid_child_init(md_raidcs_t *ps); - -/* - * NAMES: xor - * DESCRIPTION: Xor two chunks of data together. The data referenced by - * addr1 and addr2 are xor'd together for size and written into - * addr1. - * PARAMETERS: caddr_t addr1 - address of first chunk of data and destination - * caddr_t addr2 - address of second chunk of data - * u_int size - number to xor - */ -static void -xor(caddr_t addr1, caddr_t addr2, size_t size) -{ - while (size--) { - *addr1++ ^= *addr2++; - } -} - -/* - * NAME: release_resync_request - * - * DESCRIPTION: Release resync active flag and reset unit values accordingly. - * - * PARAMETERS: minor_t mnum - minor number identity of metadevice - * - * LOCKS: Expects Unit Writer Lock to be held across call. - */ -void -release_resync_request( - minor_t mnum -) -{ - mr_unit_t *un; - - un = MD_UNIT(mnum); - ASSERT(un != NULL); - - un->c.un_status &= ~MD_UN_RESYNC_ACTIVE; - - un->un_column[un->un_resync_index].un_devflags &= ~MD_RAID_RESYNC; - un->un_column[un->un_resync_index].un_devflags &= ~MD_RAID_RESYNC_ERRED; - un->un_column[un->un_resync_index].un_devflags &= - ~(MD_RAID_COPY_RESYNC | MD_RAID_REGEN_RESYNC); - - un->un_resync_line_index = 0; - un->un_resync_index = NOCOLUMN; -} - -/* - * NAME: resync_request - * - * DESCRIPTION: Request resync. If resync is available (no current active - * resync), mark unit as resync active and initialize. - * - * PARAMETERS: minor_t mnum - minor number identity of metadevice - * int column_index - index of column to resync - * int copysize - copysize of ioctl request - * md_error_t *ep - error output parameter - * - * RETURN: 0 if resync is available, 1 otherwise. - * - * LOCKS: Expects Unit Writer Lock to be held across call. - * - * NOTE: Sets un_resync_copysize to the input value in copysize, the - * existing value from an incomplete previous resync with an - * input value in copysize, or the lesser of the unit segment - * size or maxio. - */ -/* ARGSUSED */ -int -resync_request( - minor_t mnum, - int column_index, - size_t copysize, - md_error_t *mde -) -{ - mr_unit_t *un; - - un = MD_UNIT(mnum); - ASSERT(un != NULL); - - /* if resync or grow not already active, set resync active for unit */ - if (! (un->un_column[column_index].un_devflags & MD_RAID_RESYNC) && - ((un->c.un_status & MD_UN_RESYNC_ACTIVE) || - (un->c.un_status & MD_UN_GROW_PENDING) || - (un->un_column[column_index].un_devstate & RCS_RESYNC))) { - if (mde) - return (mdmderror(mde, MDE_GROW_DELAYED, mnum)); - return (1); - } - - if (un->un_column[column_index].un_devstate & - (RCS_ERRED | RCS_LAST_ERRED)) - un->un_column[column_index].un_devflags |= MD_RAID_DEV_ERRED; - else - un->un_column[column_index].un_devflags &= ~MD_RAID_DEV_ERRED; - un->c.un_status |= MD_UN_RESYNC_ACTIVE; - un->un_resync_index = column_index; - un->un_resync_line_index = 0; - raid_set_state(un, column_index, RCS_RESYNC, 0); - - return (0); -} - -/* - * Name: alloc_bufs - * - * DESCRIPTION: Initialize resync_comp buffers. - * - * PARAMETERS: size_t bsize - size of buffer - * buf_t *read_buf1 - first read buf - * buf_t *read_buf2 - second read buf - * buf_t *write_buf - write buf - */ -static void -alloc_bufs(md_raidcs_t *cs, size_t bsize) -{ - /* allocate buffers, write uses the read_buf1 buffer */ - cs->cs_dbuffer = kmem_zalloc(bsize, KM_SLEEP); - cs->cs_pbuffer = kmem_zalloc(bsize, KM_SLEEP); -} - -void -init_buf(buf_t *bp, int flags, size_t size) -{ - /* zero buf */ - bzero((caddr_t)bp, sizeof (buf_t)); - - /* set b_back and b_forw to point back to buf */ - bp->b_back = bp; - bp->b_forw = bp; - - /* set flags size */ - bp->b_flags = flags; - bp->b_bufsize = size; - bp->b_offset = -1; - - /* setup semaphores */ - sema_init(&bp->b_io, 0, NULL, SEMA_DEFAULT, NULL); - sema_init(&bp->b_sem, 0, NULL, SEMA_DEFAULT, NULL); -} - -void -destroy_buf(buf_t *bp) -{ - sema_destroy(&bp->b_io); - sema_destroy(&bp->b_sem); -} - -void -reset_buf(buf_t *bp, int flags, size_t size) -{ - destroy_buf(bp); - init_buf(bp, flags, size); -} - -/* - * NAME: free_bufs - * - * DESCRIPTION: Free up buffers. - * - * PARAMETERS: size_t bsize - size of buffer - * buf_t *read_buf1 - first read buf - * buf_t *read_buf2 - second read buf - * buf_t *write_buf - write buf - */ -static void -free_bufs(size_t bsize, md_raidcs_t *cs) -{ - kmem_free(cs->cs_dbuffer, bsize); - kmem_free(cs->cs_pbuffer, bsize); -} - -/* - * NAME: init_pw_area - * - * DESCRIPTION: Initialize pre-write area to all zeros. - * - * PARAMETERS: minor_t mnum - minor number identity of metadevice - * md_dev64_t dev_to_write - index of column to resync - * int column_index - index of column to resync - * - * RETURN: 1 if write error on resync device, otherwise 0 - * - * LOCKS: Expects Unit Reader Lock to be held across call. - */ -int -init_pw_area( - mr_unit_t *un, - md_dev64_t dev_to_write, - diskaddr_t pwstart, - uint_t col -) -{ - buf_t buf; - caddr_t databuffer; - size_t copysize; - size_t bsize; - int error = 0; - int i; - - ASSERT(un != NULL); - ASSERT(un->un_column[col].un_devflags & MD_RAID_DEV_ISOPEN); - - bsize = un->un_iosize; - copysize = dbtob(bsize); - databuffer = kmem_zalloc(copysize, KM_SLEEP); - init_buf(&buf, (B_BUSY | B_WRITE), copysize); - - for (i = 0; i < un->un_pwcnt; i++) { - /* magic field is 0 for 4.0 compatability */ - RAID_FILLIN_RPW(databuffer, un, 0, 0, - 0, 0, 0, - 0, col, 0); - buf.b_un.b_addr = (caddr_t)databuffer; - buf.b_edev = md_dev64_to_dev(dev_to_write); - buf.b_bcount = dbtob(bsize); - buf.b_lblkno = pwstart + (i * un->un_iosize); - - /* write buf */ - (void) md_call_strategy(&buf, MD_STR_NOTTOP, NULL); - - if (biowait(&buf)) { - error = 1; - break; - } - reset_buf(&buf, (B_BUSY | B_WRITE), copysize); - } /* for */ - - destroy_buf(&buf); - kmem_free(databuffer, copysize); - - return (error); -} - -/* - * NAME: raid_open_alt - * - * DESCRIPTION: opens the alt device used during resync. - * - * PARAMETERS: un - * - * RETURN: 0 - successfull - * 1 - failed - * - * LOCKS: requires unit writer lock - */ - -static int -raid_open_alt(mr_unit_t *un, int index) -{ - mr_column_t *column = &un->un_column[index]; - set_t setno = MD_MIN2SET(MD_SID(un)); - side_t side = mddb_getsidenum(setno); - md_dev64_t tmpdev = column->un_alt_dev; - - /* correct locks */ - ASSERT(UNIT_WRITER_HELD(un)); - /* not already writing to */ - ASSERT(! (column->un_devflags & MD_RAID_WRITE_ALT)); - /* not already open */ - ASSERT(! (column->un_devflags & MD_RAID_ALT_ISOPEN)); - - if (tmpdev != NODEV64) { - /* - * Open by device id. We use orig_key since alt_dev - * has been set by the caller to be the same as orig_dev. - */ - if ((md_getmajor(tmpdev) != md_major) && - md_devid_found(setno, side, column->un_orig_key) == 1) { - tmpdev = md_resolve_bydevid(MD_SID(un), tmpdev, - column->un_orig_key); - } - if (md_layered_open(MD_SID(un), &tmpdev, MD_OFLG_NULL)) { - /* failed open */ - column->un_alt_dev = tmpdev; - return (1); - } else { - /* open suceeded */ - column->un_alt_dev = tmpdev; - column->un_devflags |= MD_RAID_ALT_ISOPEN; - return (0); - } - } else - /* no alt device to open */ - return (1); -} - - -/* - * NAME: raid_close_alt - * - * DESCRIPTION: closes the alt device used during resync. - * - * PARAMETERS: un - raid unit structure - * indes - raid column - * - * RETURN: none - * - * LOCKS: requires unit writer lock - */ - -static void -raid_close_alt(mr_unit_t *un, int index) -{ - mr_column_t *column = &un->un_column[index]; - md_dev64_t tmpdev = column->un_alt_dev; - - ASSERT(UNIT_WRITER_HELD(un)); /* correct locks */ - ASSERT(! (column->un_devflags & MD_RAID_WRITE_ALT)); /* not writing */ - ASSERT(column->un_devflags & MD_RAID_ALT_ISOPEN); /* already open */ - ASSERT(tmpdev != NODEV64); /* is a device */ - - md_layered_close(column->un_alt_dev, MD_OFLG_NULL); - column->un_devflags &= ~MD_RAID_ALT_ISOPEN; - column->un_alt_dev = NODEV64; -} - -static diskaddr_t -raid_resync_fillin_cs(diskaddr_t line, uint_t line_count, md_raidcs_t *cs) -{ - mr_unit_t *un = cs->cs_un; - - ASSERT(line < un->un_segsincolumn); - - cs->cs_line = line; - cs->cs_blkno = line * un->un_segsize; - cs->cs_blkcnt = un->un_segsize * line_count; - cs->cs_lastblk = cs->cs_blkno + cs->cs_blkcnt - 1; - raid_line_reader_lock(cs, 1); - - return (line + line_count); -} - -/* states returned by raid_resync_line */ - -#define RAID_RESYNC_OKAY 0 -#define RAID_RESYNC_RDERROR 2 -#define RAID_RESYNC_WRERROR 3 -#define RAID_RESYNC_STATE 4 - -int -raid_resync_region( - md_raidcs_t *cs, - diskaddr_t line, - uint_t line_count, - int *single_read, - hs_cmds_t *hs_state, - int *err_col, - md_dev64_t dev_to_write, - diskaddr_t write_dev_start) -{ - mr_unit_t *un = cs->cs_un; - buf_t *readb1 = &cs->cs_pbuf; - buf_t *readb2 = &cs->cs_dbuf; - buf_t *writeb = &cs->cs_hbuf; - diskaddr_t off; - size_t tcopysize; - size_t copysize; - int resync; - int quit = 0; - size_t leftinseg; - int i; - - resync = un->un_resync_index; - off = line * un->un_segsize; - copysize = un->un_resync_copysize; - - /* find first column to read, skip resync column */ - - leftinseg = un->un_segsize * line_count; - while (leftinseg) { - - /* truncate last chunk to end if needed */ - if (copysize > leftinseg) - tcopysize = leftinseg; - else - tcopysize = copysize; - leftinseg -= tcopysize; - - /* - * One of two scenarios: - * 1) resync device with hotspare ok. This implies that - * we are copying from a good hotspare to a new good original - * device. In this case readb1 is used as the buf for - * the read from the hotspare device. - * 2) For all other cases, including when in case 1) and an - * error is detected on the (formerly good) hotspare device, - * readb1 is used for the initial read. readb2 is used for - * all other reads. Each readb2 buffer is xor'd into the - * readb1 buffer. - * - * In both cases, writeb is used for the write, using readb1's - * buffer. - * - * For case 2, we could alternatively perform the read for all - * devices concurrently to improve performance. However, - * this could diminish performance for concurrent reads and - * writes if low on memory. - */ - - /* read first buffer */ - - /* switch to read from good columns if single_read */ - if (*single_read) { - if (un->un_column[resync].un_dev == NODEV64) - return (RAID_RESYNC_RDERROR); - - reset_buf(readb1, B_READ | B_BUSY, - dbtob(copysize)); - readb1->b_bcount = dbtob(tcopysize); - readb1->b_un.b_addr = cs->cs_pbuffer; - readb1->b_edev = md_dev64_to_dev( - un->un_column[resync].un_dev); - readb1->b_lblkno = - un->un_column[resync].un_devstart + off; - (void) md_call_strategy(readb1, MD_STR_NOTTOP, NULL); - if (biowait(readb1)) { - /* - * at this point just start rebuilding the - * data and go on since the other column - * are ok. - */ - *single_read = 0; - *hs_state = HS_BAD; - un->un_column[resync].un_devflags &= - ~MD_RAID_COPY_RESYNC; - un->un_column[resync].un_devflags |= - MD_RAID_REGEN_RESYNC; - } - } - - /* if reading from all non-resync columns */ - if (!*single_read) { - /* for each column, read line and xor into write buf */ - bzero(cs->cs_pbuffer, dbtob(tcopysize)); - for (i = 0; i < un->un_totalcolumncnt; i++) { - - if (un->un_column[i].un_dev == NODEV64) - return (RAID_RESYNC_RDERROR); - - /* skip column getting resync'ed */ - if (i == resync) { - continue; - } - reset_buf(readb1, B_READ | B_BUSY, - dbtob(copysize)); - readb1->b_bcount = dbtob(tcopysize); - readb1->b_un.b_addr = cs->cs_dbuffer; - readb1->b_edev = md_dev64_to_dev( - un->un_column[i].un_dev); - readb1->b_lblkno = - un->un_column[i].un_devstart + off; - - (void) md_call_strategy(readb1, MD_STR_NOTTOP, - NULL); - if (biowait(readb1)) { - *err_col = i; - quit = RAID_RESYNC_RDERROR; - } - - if (quit) - return (quit); - - /* xor readb2 data into readb1 */ - xor(cs->cs_pbuffer, readb1->b_un.b_addr, - dbtob(tcopysize)); - } /* for */ - } - - reset_buf(writeb, B_WRITE | B_BUSY, - dbtob(copysize)); - writeb->b_bcount = dbtob(tcopysize); - writeb->b_un.b_addr = cs->cs_pbuffer; - writeb->b_lblkno = off + write_dev_start; - writeb->b_edev = md_dev64_to_dev(dev_to_write); - - /* set write block number and perform the write */ - (void) md_call_strategy(writeb, MD_STR_NOTTOP, NULL); - if (biowait(writeb)) { - if (*single_read == 0) { - *hs_state = HS_BAD; - } - return (RAID_RESYNC_WRERROR); - } - writeb->b_blkno += tcopysize; - off += tcopysize; - } /* while */ - sema_destroy(&readb1->b_io); - sema_destroy(&readb1->b_sem); - sema_destroy(&readb2->b_io); - sema_destroy(&readb2->b_sem); - sema_destroy(&writeb->b_io); - sema_destroy(&writeb->b_sem); - return (RAID_RESYNC_OKAY); -} - -/* - * NAME: resync_comp - * - * DESCRIPTION: Resync the component. Iterate through the raid unit a line at - * a time, read from the good device(s) and write the resync - * device. - * - * PARAMETERS: minor_t mnum - minor number identity of metadevice - * md_raidcs_t *cs - child save struct - * - * RETURN: 0 - successfull - * 1 - failed - * -1 - aborted - * - * LOCKS: Expects Unit Reader Lock to be held across call. Acquires and - * releases Line Reader Lock for per-line I/O. - */ -static void -resync_comp( - minor_t mnum, - md_raidcs_t *cs -) -{ - mdi_unit_t *ui; - mr_unit_t *un; - mddb_recid_t recids[2]; - rcs_state_t state; - md_dev64_t dev_to_write; - diskaddr_t write_pwstart; - diskaddr_t write_devstart; - md_dev64_t dev; - int resync; - int i; - int single_read = 0; - int err; - int err_cnt; - int last_err; - diskaddr_t line; - diskaddr_t segsincolumn; - size_t bsize; - uint_t line_count; - - /* - * hs_state is the state of the hotspare on the column being resynced - * dev_state is the state of the resync target - */ - hs_cmds_t hs_state; - int err_col = -1; - diskaddr_t resync_end_pos; - - ui = MDI_UNIT(mnum); - ASSERT(ui != NULL); - - un = cs->cs_un; - - md_unit_readerexit(ui); - un = (mr_unit_t *)md_io_writerlock(ui); - un = (mr_unit_t *)md_unit_writerlock(ui); - resync = un->un_resync_index; - state = un->un_column[resync].un_devstate; - line_count = un->un_maxio / un->un_segsize; - if (line_count == 0) { /* handle the case of segsize > maxio */ - line_count = 1; - bsize = un->un_maxio; - } else - bsize = line_count * un->un_segsize; - - un->un_resync_copysize = (uint_t)bsize; - - ASSERT(un->c.un_status & MD_UN_RESYNC_ACTIVE); - ASSERT(un->un_column[resync].un_devflags & - (MD_RAID_COPY_RESYNC | MD_RAID_REGEN_RESYNC)); - - /* - * if the column is not in resync then just bail out. - */ - if (! (un->un_column[resync].un_devstate & RCS_RESYNC)) { - md_unit_writerexit(ui); - md_io_writerexit(ui); - un = (mr_unit_t *)md_unit_readerlock(ui); - return; - } - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_RESYNC_START, SVM_TAG_METADEVICE, - MD_UN2SET(un), MD_SID(un)); - - /* identify device to write and its start block */ - - if (un->un_column[resync].un_alt_dev != NODEV64) { - if (raid_open_alt(un, resync)) { - raid_set_state(un, resync, state, 0); - md_unit_writerexit(ui); - md_io_writerexit(ui); - un = (mr_unit_t *)md_unit_readerlock(ui); - cmn_err(CE_WARN, "md: %s: %s open failed replace " - "terminated", md_shortname(MD_SID(un)), - md_devname(MD_UN2SET(un), - un->un_column[resync].un_alt_dev, - NULL, 0)); - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_RESYNC_FAILED, - SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); - return; - } - ASSERT(un->un_column[resync].un_devflags & MD_RAID_COPY_RESYNC); - dev_to_write = un->un_column[resync].un_alt_dev; - write_devstart = un->un_column[resync].un_alt_devstart; - write_pwstart = un->un_column[resync].un_alt_pwstart; - if (un->un_column[resync].un_devflags & MD_RAID_DEV_ERRED) { - single_read = 0; - hs_state = HS_BAD; - } else { - hs_state = HS_FREE; - single_read = 1; - } - un->un_column[resync].un_devflags |= MD_RAID_WRITE_ALT; - } else { - dev_to_write = un->un_column[resync].un_dev; - write_devstart = un->un_column[resync].un_devstart; - write_pwstart = un->un_column[resync].un_pwstart; - single_read = 0; - hs_state = HS_FREE; - ASSERT(un->un_column[resync].un_devflags & - MD_RAID_REGEN_RESYNC); - } - - alloc_bufs(cs, dbtob(bsize)); - /* initialize pre-write area */ - if (init_pw_area(un, dev_to_write, write_pwstart, resync)) { - un->un_column[resync].un_devflags &= ~MD_RAID_WRITE_ALT; - if (un->un_column[resync].un_alt_dev != NODEV64) { - raid_close_alt(un, resync); - } - md_unit_writerexit(ui); - md_io_writerexit(ui); - if (dev_to_write == un->un_column[resync].un_dev) - hs_state = HS_BAD; - err = RAID_RESYNC_WRERROR; - goto resync_comp_error; - } - - un->c.un_status &= ~MD_UN_RESYNC_CANCEL; - segsincolumn = un->un_segsincolumn; - err_cnt = raid_state_cnt(un, RCS_ERRED | RCS_LAST_ERRED); - - /* commit the record */ - - md_unit_writerexit(ui); - md_io_writerexit(ui); - - - /* resync each line of the unit */ - for (line = 0; line < segsincolumn; line += line_count) { - /* - * Update address range in child struct and lock the line. - * - * The reader version of the line lock is used since only - * resync will use data beyond un_resync_line_index on the - * resync device. - */ - un = (mr_unit_t *)md_io_readerlock(ui); - if (line + line_count > segsincolumn) - line_count = segsincolumn - line; - resync_end_pos = raid_resync_fillin_cs(line, line_count, cs); - (void) md_unit_readerlock(ui); - ASSERT(un->un_resync_line_index == resync_end_pos); - err = raid_resync_region(cs, line, (int)line_count, - &single_read, &hs_state, &err_col, dev_to_write, - write_devstart); - - /* - * if the column failed to resync then stop writing directly - * to the column. - */ - if (err) - un->un_resync_line_index = 0; - - md_unit_readerexit(ui); - raid_line_exit(cs); - md_io_readerexit(ui); - - if (err) - break; - - un = (mr_unit_t *)md_unit_writerlock(ui); - - if (raid_state_cnt(un, RCS_ERRED | RCS_LAST_ERRED) != err_cnt) { - err = RAID_RESYNC_STATE; - md_unit_writerexit(ui); - break; - } - md_unit_writerexit(ui); - } /* for */ - -resync_comp_error: - un = (mr_unit_t *)md_io_writerlock(ui); - (void) md_unit_writerlock(ui); - un->un_column[resync].un_devflags &= ~MD_RAID_WRITE_ALT; - - recids[0] = 0; - recids[1] = 0; - switch (err) { - /* - * successful resync - */ - case RAID_RESYNC_OKAY: - /* initialize pre-write area */ - if ((un->un_column[resync].un_orig_dev != NODEV64) && - (un->un_column[resync].un_orig_dev == - un->un_column[resync].un_alt_dev)) { - /* - * replacing a hot spare - * release the hot spare, which will close the hotspare - * and mark it closed. - */ - raid_hs_release(hs_state, un, &recids[0], resync); - /* - * make the resync target the main device and - * mark open - */ - un->un_column[resync].un_hs_id = 0; - un->un_column[resync].un_dev = - un->un_column[resync].un_orig_dev; - un->un_column[resync].un_devstart = - un->un_column[resync].un_orig_devstart; - un->un_column[resync].un_pwstart = - un->un_column[resync].un_orig_pwstart; - un->un_column[resync].un_devflags |= MD_RAID_DEV_ISOPEN; - /* alt becomes the device so don't close it */ - un->un_column[resync].un_devflags &= ~MD_RAID_WRITE_ALT; - un->un_column[resync].un_devflags &= - ~MD_RAID_ALT_ISOPEN; - un->un_column[resync].un_alt_dev = NODEV64; - } - raid_set_state(un, resync, RCS_OKAY, 0); - break; - - case RAID_RESYNC_WRERROR: - if (HOTSPARED(un, resync) && single_read && - (un->un_column[resync].un_devflags & MD_RAID_COPY_RESYNC)) { - /* - * this is the case where the resync target is - * bad but there is a good hotspare. In this - * case keep the hotspare, and go back to okay. - */ - raid_set_state(un, resync, RCS_OKAY, 0); - cmn_err(CE_WARN, "md: %s: %s write error, replace " - "terminated", md_shortname(MD_SID(un)), - md_devname(MD_UN2SET(un), - un->un_column[resync].un_orig_dev, - NULL, 0)); - break; - } - if (HOTSPARED(un, resync)) { - raid_hs_release(hs_state, un, &recids[0], resync); - un->un_column[resync].un_dev = - un->un_column[resync].un_orig_dev; - un->un_column[resync].un_devstart = - un->un_column[resync].un_orig_devstart; - un->un_column[resync].un_pwstart = - un->un_column[resync].un_orig_pwstart; - } - raid_set_state(un, resync, RCS_ERRED, 0); - if (un->un_column[resync].un_devflags & MD_RAID_REGEN_RESYNC) - dev = un->un_column[resync].un_dev; - else - dev = un->un_column[resync].un_alt_dev; - cmn_err(CE_WARN, "md: %s: %s write error replace terminated", - md_shortname(MD_SID(un)), md_devname(MD_UN2SET(un), dev, - NULL, 0)); - break; - - case RAID_RESYNC_STATE: - if (HOTSPARED(un, resync) && single_read && - (un->un_column[resync].un_devflags & MD_RAID_COPY_RESYNC)) { - /* - * this is the case where the resync target is - * bad but there is a good hotspare. In this - * case keep the hotspare, and go back to okay. - */ - raid_set_state(un, resync, RCS_OKAY, 0); - cmn_err(CE_WARN, "md: %s: needs maintenance, replace " - "terminated", md_shortname(MD_SID(un))); - break; - } - if (HOTSPARED(un, resync)) { - raid_hs_release(hs_state, un, &recids[0], resync); - un->un_column[resync].un_dev = - un->un_column[resync].un_orig_dev; - un->un_column[resync].un_devstart = - un->un_column[resync].un_orig_devstart; - un->un_column[resync].un_pwstart = - un->un_column[resync].un_orig_pwstart; - } - break; - case RAID_RESYNC_RDERROR: - if (HOTSPARED(un, resync)) { - raid_hs_release(hs_state, un, &recids[0], resync); - un->un_column[resync].un_dev = - un->un_column[resync].un_orig_dev; - un->un_column[resync].un_devstart = - un->un_column[resync].un_orig_devstart; - un->un_column[resync].un_pwstart = - un->un_column[resync].un_orig_pwstart; - } - - if ((resync != err_col) && (err_col != NOCOLUMN)) - raid_set_state(un, err_col, RCS_ERRED, 0); - break; - - default: - ASSERT(0); - } - if (un->un_column[resync].un_alt_dev != NODEV64) { - raid_close_alt(un, resync); - } - - /* - * an io operation may have gotten an error and placed a - * column in erred state. This will abort the resync, which - * will end up in last erred. This is ugly so go through - * the columns and do cleanup - */ - err_cnt = 0; - last_err = 0; - for (i = 0; i < un->un_totalcolumncnt; i++) { - if (un->un_column[i].un_devstate & RCS_OKAY) - continue; - if (i == resync) { - raid_set_state(un, i, RCS_ERRED, 1); - err_cnt++; - } else if (err == RAID_RESYNC_OKAY) { - err_cnt++; - } else { - raid_set_state(un, i, RCS_LAST_ERRED, 1); - last_err++; - } - } - if ((err_cnt == 0) && (last_err == 0)) - un->un_state = RUS_OKAY; - else if (last_err == 0) { - un->un_state = RUS_ERRED; - ASSERT(err_cnt == 1); - } else if (last_err > 0) { - un->un_state = RUS_LAST_ERRED; - } - - uniqtime32(&un->un_column[resync].un_devtimestamp); - un->un_resync_copysize = 0; - un->un_column[resync].un_devflags &= - ~(MD_RAID_REGEN_RESYNC | MD_RAID_COPY_RESYNC); - raid_commit(un, recids); - /* release unit writer lock and acquire unit reader lock */ - md_unit_writerexit(ui); - md_io_writerexit(ui); - (void) md_unit_readerlock(ui); - if (err == RAID_RESYNC_OKAY) { - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_RESYNC_DONE, - SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); - } else { - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_RESYNC_FAILED, - SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); - if (raid_state_cnt(un, RCS_ERRED | - RCS_LAST_ERRED) > 1) { - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_LASTERRED, - SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); - } else { - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ERRED, - SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); - } - } - - free_bufs(dbtob(bsize), cs); -} - -/* - * NAME: resync_unit - * - * DESCRIPTION: Start of RAID resync thread. Perform up front allocations, - * initializations and consistency checking, then call - * resync_comp to resync the component. - * - * PARAMETERS: minor_t mnum - minor number identity of metadevice - * - * LOCKS: Acquires and releases Unit Reader Lock to maintain unit - * existence during resync. - * Acquires and releases the resync count lock for cpr. - */ -static void -resync_unit( - minor_t mnum -) -{ - mdi_unit_t *ui; - mr_unit_t *un; - md_raidps_t *ps = NULL; - md_raidcs_t *cs = NULL; - int resync; - - /* - * Increment the raid resync count for cpr - */ - mutex_enter(&md_cpr_resync.md_resync_mutex); - md_cpr_resync.md_raid_resync++; - mutex_exit(&md_cpr_resync.md_resync_mutex); - - ui = MDI_UNIT(mnum); - ASSERT(ui != NULL); - - un = (mr_unit_t *)md_unit_readerlock(ui); - - /* - * Allocate parent and child memory pool structures. These are - * only needed to lock raid lines, so only the minimal - * required fields for this purpose are initialized. - * - * Do not use the reserve pool for resync. - */ - ps = kmem_cache_alloc(raid_parent_cache, MD_ALLOCFLAGS); - raid_parent_init(ps); - cs = kmem_cache_alloc(raid_child_cache, MD_ALLOCFLAGS); - raid_child_init(cs); - resync = un->un_resync_index; - ps->ps_un = un; - ps->ps_ui = ui; - ps->ps_flags = MD_RPS_INUSE; - cs->cs_ps = ps; - cs->cs_un = un; - - ASSERT(!(un->un_column[resync].un_devflags & MD_RAID_WRITE_ALT)); - - resync_comp(mnum, cs); - release_resync_request(mnum); - - kmem_cache_free(raid_child_cache, cs); - kmem_cache_free(raid_parent_cache, ps); - - md_unit_readerexit(ui); - - /* close raid unit */ - (void) raid_internal_close(mnum, OTYP_LYR, 0, 0); - - /* poke hot spare daemon */ - (void) raid_hotspares(); - - /* - * Decrement the raid resync count for cpr - */ - mutex_enter(&md_cpr_resync.md_resync_mutex); - md_cpr_resync.md_raid_resync--; - mutex_exit(&md_cpr_resync.md_resync_mutex); - - thread_exit(); -} - -/* - * NAME: raid_resync_unit - * - * DESCRIPTION: RAID metadevice specific resync routine. - * Open the unit and start resync_unit as a separate thread. - * - * PARAMETERS: minor_t mnum - minor number identity of metadevice - * md_error_t *ep - output error parameter - * - * RETURN: On error return 1 or set ep to nonzero, otherwise return 0. - * - * LOCKS: Acquires and releases Unit Writer Lock. - */ -int -raid_resync_unit( - minor_t mnum, - md_error_t *ep -) -{ - mdi_unit_t *ui; - set_t setno = MD_MIN2SET(mnum); - mr_unit_t *un; - - ui = MDI_UNIT(mnum); - un = MD_UNIT(mnum); - - if (md_get_setstatus(setno) & MD_SET_STALE) - return (mdmddberror(ep, MDE_DB_STALE, mnum, setno)); - - ASSERT(un->un_column[un->un_resync_index].un_devflags & - (MD_RAID_COPY_RESYNC | MD_RAID_REGEN_RESYNC)); - - /* Don't start a resync if the device is not available */ - if ((ui == NULL) || (ui->ui_tstate & MD_DEV_ERRORED)) { - return (mdmderror(ep, MDE_RAID_OPEN_FAILURE, mnum)); - } - - if (raid_internal_open(mnum, FREAD | FWRITE, OTYP_LYR, 0)) { - (void) md_unit_writerlock(ui); - release_resync_request(mnum); - md_unit_writerexit(ui); - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_OPEN_FAIL, SVM_TAG_METADEVICE, - setno, MD_SID(un)); - return (mdmderror(ep, MDE_RAID_OPEN_FAILURE, mnum)); - } - - /* start resync_unit thread */ - (void) thread_create(NULL, 0, resync_unit, (void *)(uintptr_t)mnum, - 0, &p0, TS_RUN, minclsyspri); - - return (0); -} diff --git a/usr/src/uts/common/io/lvm/softpart/sp.c b/usr/src/uts/common/io/lvm/softpart/sp.c deleted file mode 100644 index 0ffe12a33e2c..000000000000 --- a/usr/src/uts/common/io/lvm/softpart/sp.c +++ /dev/null @@ -1,1860 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - * Copyright (c) 2011 Bayard G. Bell. All rights reserved. - */ - -/* - * Soft partitioning metadevice driver (md_sp). - * - * This file contains the primary operations of the soft partitioning - * metadevice driver. This includes all routines for normal operation - * (open/close/read/write). Please see mdvar.h for a definition of - * metadevice operations vector (md_ops_t). This driver is loosely - * based on the stripe driver (md_stripe). - * - * All metadevice administration is done through the use of ioctl's. - * As such, all administrative routines appear in sp_ioctl.c. - * - * Soft partitions are represented both in-core and in the metadb with a - * unit structure. The soft partition-specific information in the unit - * structure includes the following information: - * - Device information (md_dev64_t & md key) about the device on which - * the soft partition is built. - * - Soft partition status information. - * - The size of the soft partition and number of extents used to - * make up that size. - * - An array of exents which define virtual/physical offset - * mappings and lengths for each extent. - * - * Typical soft partition operation proceeds as follows: - * - The unit structure is fetched from the metadb and placed into - * an in-core array (as with other metadevices). This operation - * is performed via sp_build_incore( ) and takes place during - * "snarfing" (when all metadevices are brought in-core at - * once) and when a new soft partition is created. - * - A soft partition is opened via sp_open( ). At open time the - * the soft partition unit structure is verified with the soft - * partition on-disk structures. Additionally, the soft partition - * status is checked (only soft partitions in the OK state may be - * opened). - * - Soft partition I/O is performed via sp_strategy( ) which relies on - * a support routine, sp_mapbuf( ), to do most of the work. - * sp_mapbuf( ) maps a buffer to a particular extent via a binary - * search of the extent array in the soft partition unit structure. - * Once a translation has been performed, the I/O is passed down - * to the next layer, which may be another metadevice or a physical - * disk. Since a soft partition may contain multiple, non-contiguous - * extents, a single I/O may have to be fragmented. - * - Soft partitions are closed using sp_close. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -md_ops_t sp_md_ops; -#ifndef lint -md_ops_t *md_interface_ops = &sp_md_ops; -#endif - -extern unit_t md_nunits; -extern set_t md_nsets; -extern md_set_t md_set[]; - -extern int md_status; -extern major_t md_major; -extern mdq_anchor_t md_done_daemon; -extern mdq_anchor_t md_sp_daemon; -extern kmutex_t md_mx; -extern kcondvar_t md_cv; -extern md_krwlock_t md_unit_array_rw; -extern clock_t md_hz; - -static kmem_cache_t *sp_parent_cache = NULL; -static kmem_cache_t *sp_child_cache = NULL; -static void sp_send_stat_ok(mp_unit_t *); -static void sp_send_stat_err(mp_unit_t *); - -/* - * FUNCTION: sp_parent_constructor() - * INPUT: none. - * OUTPUT: ps - parent save structure initialized. - * RETURNS: void * - ptr to initialized parent save structure. - * PURPOSE: initialize parent save structure. - */ -/*ARGSUSED1*/ -static int -sp_parent_constructor(void *p, void *d1, int d2) -{ - mutex_init(&((md_spps_t *)p)->ps_mx, - NULL, MUTEX_DEFAULT, NULL); - return (0); -} - -static void -sp_parent_init(md_spps_t *ps) -{ - bzero(ps, offsetof(md_spps_t, ps_mx)); -} - -/*ARGSUSED1*/ -static void -sp_parent_destructor(void *p, void *d) -{ - mutex_destroy(&((md_spps_t *)p)->ps_mx); -} - -/* - * FUNCTION: sp_child_constructor() - * INPUT: none. - * OUTPUT: cs - child save structure initialized. - * RETURNS: void * - ptr to initialized child save structure. - * PURPOSE: initialize child save structure. - */ -/*ARGSUSED1*/ -static int -sp_child_constructor(void *p, void *d1, int d2) -{ - bioinit(&((md_spcs_t *)p)->cs_buf); - return (0); -} - -static void -sp_child_init(md_spcs_t *cs) -{ - cs->cs_mdunit = 0; - cs->cs_ps = NULL; - md_bioreset(&cs->cs_buf); -} - -/*ARGSUSED1*/ -static void -sp_child_destructor(void *p, void *d) -{ - biofini(&((md_spcs_t *)p)->cs_buf); -} - -/* - * FUNCTION: sp_run_queue() - * INPUT: none. - * OUTPUT: none. - * RETURNS: void. - * PURPOSE: run the md_daemon to clean up memory pool. - */ -/*ARGSUSED*/ -static void -sp_run_queue(void *d) -{ - if (!(md_status & MD_GBL_DAEMONS_LIVE)) - md_daemon(1, &md_done_daemon); -} - - -/* - * FUNCTION: sp_build_incore() - * INPUT: p - ptr to unit structure. - * snarfing - flag to tell us we are snarfing. - * OUTPUT: non. - * RETURNS: int - 0 (always). - * PURPOSE: place unit structure into in-core unit array (keyed from - * minor number). - */ -int -sp_build_incore(void *p, int snarfing) -{ - mp_unit_t *un = (mp_unit_t *)p; - minor_t mnum; - set_t setno; - md_dev64_t tmpdev; - - mnum = MD_SID(un); - - if (MD_UNIT(mnum) != NULL) - return (0); - - MD_STATUS(un) = 0; - - if (snarfing) { - /* - * if we are snarfing, we get the device information - * from the metadb record (using the metadb key for - * that device). - */ - setno = MD_MIN2SET(mnum); - - tmpdev = md_getdevnum(setno, mddb_getsidenum(setno), - un->un_key, MD_NOTRUST_DEVT); - un->un_dev = tmpdev; - } - - /* place various information in the in-core data structures */ - md_nblocks_set(mnum, un->c.un_total_blocks); - MD_UNIT(mnum) = un; - - return (0); -} - -/* - * FUNCTION: reset_sp() - * INPUT: un - unit structure to be reset/removed. - * mnum - minor number to be reset/removed. - * removing - flag to tell us if we are removing - * permanently or just reseting in-core - * structures. - * OUTPUT: none. - * RETURNS: void. - * PURPOSE: used to either simply reset in-core structures or to - * permanently remove metadevices from the metadb. - */ -void -reset_sp(mp_unit_t *un, minor_t mnum, int removing) -{ - sv_dev_t *sv; - mddb_recid_t vtoc_id; - - /* clean up in-core structures */ - md_destroy_unit_incore(mnum, &sp_md_ops); - - md_nblocks_set(mnum, -1ULL); - MD_UNIT(mnum) = NULL; - - /* - * Attempt release of minor node - */ - md_remove_minor_node(mnum); - - if (!removing) - return; - - /* we are removing the soft partition from the metadb */ - - /* - * Save off device information so we can get to - * it after we do the mddb_deleterec(). - */ - sv = (sv_dev_t *)kmem_alloc(sizeof (sv_dev_t), KM_SLEEP); - sv->setno = MD_MIN2SET(mnum); - sv->key = un->un_key; - vtoc_id = un->c.un_vtoc_id; - - /* - * Remove self from the namespace - */ - if (un->c.un_revision & MD_FN_META_DEV) { - (void) md_rem_selfname(un->c.un_self_id); - } - - /* Remove the unit structure */ - mddb_deleterec_wrapper(un->c.un_record_id); - - if (vtoc_id) - mddb_deleterec_wrapper(vtoc_id); - - SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DELETE, TAG_METADEVICE, - MD_MIN2SET(mnum), MD_MIN2UNIT(mnum)); - - /* - * remove the underlying device name from the metadb. if other - * soft partitions are built on this device, this will simply - * decrease the reference count for this device. otherwise the - * name record for this device will be removed from the metadb. - */ - md_rem_names(sv, 1); - kmem_free(sv, sizeof (sv_dev_t)); -} - -/* - * FUNCTION: sp_send_stat_msg - * INPUT: un - unit reference - * status - status to be sent to master node - * MD_SP_OK - soft-partition is now OK - * MD_SP_ERR " " errored - * OUTPUT: none. - * RETURNS: void. - * PURPOSE: send a soft-partition status change to the master node. If the - * message succeeds we simply return. If it fails we panic as the - * cluster-wide view of the metadevices is now inconsistent. - * CALLING CONTEXT: - * Blockable. No locks can be held. - */ -static void -sp_send_stat_msg(mp_unit_t *un, sp_status_t status) -{ - md_mn_msg_sp_setstat_t sp_msg; - md_mn_kresult_t *kres; - set_t setno = MD_UN2SET(un); - int rval; - const char *str = (status == MD_SP_ERR) ? "MD_SP_ERR" : "MD_SP_OK"; - int nretries = 0; - - sp_msg.sp_setstat_mnum = MD_SID(un); - sp_msg.sp_setstat_status = status; - - kres = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP); - -spss_msg: - rval = mdmn_ksend_message(setno, MD_MN_MSG_SP_SETSTAT2, MD_MSGF_NO_LOG, - 0, (char *)&sp_msg, sizeof (sp_msg), kres); - - if (!MDMN_KSEND_MSG_OK(rval, kres)) { - mdmn_ksend_show_error(rval, kres, "MD_MN_MSG_SP_SETSTAT2"); - /* If we're shutting down already, pause things here. */ - if (kres->kmmr_comm_state == MDMNE_RPC_FAIL) { - while (!md_mn_is_commd_present()) { - delay(md_hz); - } - /* - * commd is available again. Retry the message once. - * If it fails we panic as the system is in an - * unexpected state. - */ - if (nretries++ == 0) - goto spss_msg; - } - /* - * Panic as we are now in an inconsistent state. - */ - cmn_err(CE_PANIC, "md: %s: %s could not be set on all nodes\n", - md_shortname(MD_SID(un)), str); - } - - kmem_free(kres, sizeof (md_mn_kresult_t)); -} - -/* - * FUNCTION: sp_finish_error - * INPUT: ps - parent save structure for error-ed I/O. - * lock_held - set if the unit readerlock is held - * OUTPUT: none. - * RETURNS: void. - * PURPOSE: report a driver error - */ -static void -sp_finish_error(md_spps_t *ps, int lock_held) -{ - struct buf *pb = ps->ps_bp; - mdi_unit_t *ui = ps->ps_ui; - md_dev64_t un_dev; /* underlying device */ - md_dev64_t md_dev = md_expldev(pb->b_edev); /* metadev in error */ - char *str; - - un_dev = md_expldev(ps->ps_un->un_dev); - /* set error type */ - if (pb->b_flags & B_READ) { - str = "read"; - } else { - str = "write"; - } - - - SPPS_FREE(sp_parent_cache, ps); - pb->b_flags |= B_ERROR; - - md_kstat_done(ui, pb, 0); - - if (lock_held) { - md_unit_readerexit(ui); - } - md_biodone(pb); - - cmn_err(CE_WARN, "md: %s: %s error on %s", - md_shortname(md_getminor(md_dev)), str, - md_devname(MD_DEV2SET(md_dev), un_dev, NULL, 0)); -} - - -/* - * FUNCTION: sp_xmit_ok - * INPUT: dq - daemon queue referencing failing ps structure - * OUTPUT: none. - * RETURNS: void. - * PURPOSE: send a message to the master node in a multi-owner diskset to - * update all attached nodes view of the soft-part to be MD_SP_OK. - * CALLING CONTEXT: - * Blockable. No unit lock held. - */ -static void -sp_xmit_ok(daemon_queue_t *dq) -{ - md_spps_t *ps = (md_spps_t *)dq; - - /* Send a MD_MN_MSG_SP_SETSTAT to the master */ - sp_send_stat_msg(ps->ps_un, MD_SP_OK); - - /* - * Successfully transmitted error state to all nodes, now release this - * parent structure. - */ - SPPS_FREE(sp_parent_cache, ps); -} - -/* - * FUNCTION: sp_xmit_error - * INPUT: dq - daemon queue referencing failing ps structure - * OUTPUT: none. - * RETURNS: void. - * PURPOSE: send a message to the master node in a multi-owner diskset to - * update all attached nodes view of the soft-part to be MD_SP_ERR. - * CALLING CONTEXT: - * Blockable. No unit lock held. - */ -static void -sp_xmit_error(daemon_queue_t *dq) -{ - md_spps_t *ps = (md_spps_t *)dq; - - /* Send a MD_MN_MSG_SP_SETSTAT to the master */ - sp_send_stat_msg(ps->ps_un, MD_SP_ERR); - - /* - * Successfully transmitted error state to all nodes, now release this - * parent structure. - */ - SPPS_FREE(sp_parent_cache, ps); -} -static void -sp_send_stat_ok(mp_unit_t *un) -{ - minor_t mnum = MD_SID(un); - md_spps_t *ps; - - ps = kmem_cache_alloc(sp_parent_cache, MD_ALLOCFLAGS); - sp_parent_init(ps); - ps->ps_un = un; - ps->ps_ui = MDI_UNIT(mnum); - - daemon_request(&md_sp_daemon, sp_xmit_ok, (daemon_queue_t *)ps, - REQ_OLD); -} - -static void -sp_send_stat_err(mp_unit_t *un) -{ - minor_t mnum = MD_SID(un); - md_spps_t *ps; - - ps = kmem_cache_alloc(sp_parent_cache, MD_ALLOCFLAGS); - sp_parent_init(ps); - ps->ps_un = un; - ps->ps_ui = MDI_UNIT(mnum); - - daemon_request(&md_sp_daemon, sp_xmit_error, (daemon_queue_t *)ps, - REQ_OLD); -} - - -/* - * FUNCTION: sp_error() - * INPUT: ps - parent save structure for error-ed I/O. - * OUTPUT: none. - * RETURNS: void. - * PURPOSE: report a driver error. - * CALLING CONTEXT: - * Interrupt - non-blockable - */ -static void -sp_error(md_spps_t *ps) -{ - set_t setno = MD_UN2SET(ps->ps_un); - - /* - * Drop the mutex associated with this request before (potentially) - * enqueuing the free onto a separate thread. We have to release the - * mutex before destroying the parent structure. - */ - if (!(ps->ps_flags & MD_SPPS_DONTFREE)) { - if (MUTEX_HELD(&ps->ps_mx)) { - mutex_exit(&ps->ps_mx); - } - } else { - /* - * this should only ever happen if we are panicking, - * since DONTFREE is only set on the parent if panicstr - * is non-NULL. - */ - ASSERT(panicstr); - } - - /* - * For a multi-owner set we need to send a message to the master so that - * all nodes get the errored status when we first encounter it. To avoid - * deadlocking when multiple soft-partitions encounter an error on one - * physical unit we drop the unit readerlock before enqueueing the - * request. That way we can service any messages that require a - * writerlock to be held. Additionally, to avoid deadlocking when at - * the bottom of a metadevice stack and a higher level mirror has - * multiple requests outstanding on this soft-part, we clone the ps - * that failed and pass the error back up the stack to release the - * reference that this i/o may have in the higher-level metadevice. - * The other nodes in the cluster just have to modify the soft-part - * status and we do not need to block the i/o completion for this. - */ - if (MD_MNSET_SETNO(setno)) { - md_spps_t *err_ps; - err_ps = kmem_cache_alloc(sp_parent_cache, MD_ALLOCFLAGS); - sp_parent_init(err_ps); - - err_ps->ps_un = ps->ps_un; - err_ps->ps_ui = ps->ps_ui; - - md_unit_readerexit(ps->ps_ui); - - daemon_request(&md_sp_daemon, sp_xmit_error, - (daemon_queue_t *)err_ps, REQ_OLD); - - sp_finish_error(ps, 0); - - return; - } else { - ps->ps_un->un_status = MD_SP_ERR; - } - - /* Flag the error */ - sp_finish_error(ps, 1); - -} - -/* - * FUNCTION: sp_mapbuf() - * INPUT: un - unit structure for soft partition we are doing - * I/O on. - * voff - virtual offset in soft partition to map. - * bcount - # of blocks in the I/O. - * OUTPUT: bp - translated buffer to be passed down to next layer. - * RETURNS: 1 - request must be fragmented, more work to do, - * 0 - request satisified, no more work to do - * -1 - error - * PURPOSE: Map the the virtual offset in the soft partition (passed - * in via voff) to the "physical" offset on whatever the soft - * partition is built on top of. We do this by doing a binary - * search of the extent array in the soft partition unit - * structure. Once the current extent is found, we do the - * translation, determine if the I/O will cross extent - * boundaries (if so, we have to fragment the I/O), then - * fill in the buf structure to be passed down to the next layer. - */ -static int -sp_mapbuf( - mp_unit_t *un, - sp_ext_offset_t voff, - sp_ext_length_t bcount, - buf_t *bp -) -{ - int lo, mid, hi, found, more; - size_t new_bcount; - sp_ext_offset_t new_blkno; - sp_ext_offset_t new_offset; - sp_ext_offset_t ext_endblk; - md_dev64_t new_edev; - extern unsigned md_maxphys; - - found = 0; - lo = 0; - hi = un->un_numexts - 1; - - /* - * do a binary search to find the extent that contains the - * starting offset. after this loop, mid contains the index - * of the correct extent. - */ - while (lo <= hi && !found) { - mid = (lo + hi) / 2; - /* is the starting offset contained within the mid-ext? */ - if (voff >= un->un_ext[mid].un_voff && - voff < un->un_ext[mid].un_voff + un->un_ext[mid].un_len) - found = 1; - else if (voff < un->un_ext[mid].un_voff) - hi = mid - 1; - else /* voff > un->un_ext[mid].un_voff + un->un_ext[mid].len */ - lo = mid + 1; - } - - if (!found) { - cmn_err(CE_WARN, "sp_mapbuf: invalid offset %llu.\n", voff); - return (-1); - } - - /* translate to underlying physical offset/device */ - new_offset = voff - un->un_ext[mid].un_voff; - new_blkno = un->un_ext[mid].un_poff + new_offset; - new_edev = un->un_dev; - - /* determine if we need to break the I/O into fragments */ - ext_endblk = un->un_ext[mid].un_voff + un->un_ext[mid].un_len; - if (voff + btodb(bcount) > ext_endblk) { - new_bcount = dbtob(ext_endblk - voff); - more = 1; - } else { - new_bcount = bcount; - more = 0; - } - - /* only break up the I/O if we're not built on another metadevice */ - if ((md_getmajor(new_edev) != md_major) && (new_bcount > md_maxphys)) { - new_bcount = md_maxphys; - more = 1; - } - if (bp != (buf_t *)NULL) { - /* do bp updates */ - bp->b_bcount = new_bcount; - bp->b_lblkno = new_blkno; - bp->b_edev = md_dev64_to_dev(new_edev); - } - return (more); -} - -/* - * FUNCTION: sp_validate() - * INPUT: un - unit structure to be validated. - * OUTPUT: none. - * RETURNS: 0 - soft partition ok. - * -1 - error. - * PURPOSE: called on open to sanity check the soft partition. In - * order to open a soft partition: - * - it must have at least one extent - * - the extent info in core and on disk must match - * - it may not be in an intermediate state (which would - * imply that a two-phase commit was interrupted) - * - * If the extent checking fails (B_ERROR returned from the read - * strategy call) _and_ we're a multi-owner diskset, we send a - * message to the master so that all nodes inherit the same view - * of the soft partition. - * If we are checking a soft-part that is marked as in error, and - * we can actually read and validate the watermarks we send a - * message to clear the error to the master node. - */ -static int -sp_validate(mp_unit_t *un) -{ - uint_t ext; - struct buf *buf; - sp_ext_length_t len; - mp_watermark_t *wm; - set_t setno; - int reset_error = 0; - - setno = MD_UN2SET(un); - - /* sanity check unit structure components ?? */ - if (un->un_status != MD_SP_OK) { - if (un->un_status != MD_SP_ERR) { - cmn_err(CE_WARN, "md: %s: open failed, soft partition " - "status is %u.", - md_shortname(MD_SID(un)), - un->un_status); - return (-1); - } else { - cmn_err(CE_WARN, "md: %s: open of soft partition " - "in Errored state.", - md_shortname(MD_SID(un))); - reset_error = 1; - } - } - - if (un->un_numexts == 0) { - cmn_err(CE_WARN, "md: %s: open failed, soft partition does " - "not have any extents.", md_shortname(MD_SID(un))); - return (-1); - } - - len = 0LL; - for (ext = 0; ext < un->un_numexts; ext++) { - - /* tally extent lengths to check total size */ - len += un->un_ext[ext].un_len; - - /* allocate buffer for watermark */ - buf = getrbuf(KM_SLEEP); - - /* read watermark */ - buf->b_flags = B_READ; - buf->b_edev = md_dev64_to_dev(un->un_dev); - buf->b_iodone = NULL; - buf->b_proc = NULL; - buf->b_bcount = sizeof (mp_watermark_t); - buf->b_lblkno = un->un_ext[ext].un_poff - 1; - buf->b_bufsize = sizeof (mp_watermark_t); - buf->b_un.b_addr = kmem_alloc(sizeof (mp_watermark_t), - KM_SLEEP); - - /* - * make the call non-blocking so that it is not affected - * by a set take. - */ - md_call_strategy(buf, MD_STR_MAPPED|MD_NOBLOCK, NULL); - (void) biowait(buf); - - if (buf->b_flags & B_ERROR) { - cmn_err(CE_WARN, "md: %s: open failed, could not " - "read watermark at block %llu for extent %u, " - "error %d.", md_shortname(MD_SID(un)), - buf->b_lblkno, ext, buf->b_error); - kmem_free(buf->b_un.b_addr, sizeof (mp_watermark_t)); - freerbuf(buf); - - /* - * If we're a multi-owner diskset we send a message - * indicating that this soft-part has an invalid - * extent to the master node. This ensures a consistent - * view of the soft-part across the cluster. - */ - if (MD_MNSET_SETNO(setno)) { - sp_send_stat_err(un); - } - return (-1); - } - - wm = (mp_watermark_t *)buf->b_un.b_addr; - - /* make sure the checksum is correct first */ - if (crcchk((uchar_t *)wm, (uint_t *)&wm->wm_checksum, - (uint_t)sizeof (mp_watermark_t), (uchar_t *)NULL)) { - cmn_err(CE_WARN, "md: %s: open failed, watermark " - "at block %llu for extent %u does not have a " - "valid checksum 0x%08x.", md_shortname(MD_SID(un)), - buf->b_lblkno, ext, wm->wm_checksum); - kmem_free(buf->b_un.b_addr, sizeof (mp_watermark_t)); - freerbuf(buf); - return (-1); - } - - if (wm->wm_magic != MD_SP_MAGIC) { - cmn_err(CE_WARN, "md: %s: open failed, watermark " - "at block %llu for extent %u does not have a " - "valid watermark magic number, expected 0x%x, " - "found 0x%x.", md_shortname(MD_SID(un)), - buf->b_lblkno, ext, MD_SP_MAGIC, wm->wm_magic); - kmem_free(buf->b_un.b_addr, sizeof (mp_watermark_t)); - freerbuf(buf); - return (-1); - } - - /* make sure sequence number matches the current extent */ - if (wm->wm_seq != ext) { - cmn_err(CE_WARN, "md: %s: open failed, watermark " - "at block %llu for extent %u has invalid " - "sequence number %u.", md_shortname(MD_SID(un)), - buf->b_lblkno, ext, wm->wm_seq); - kmem_free(buf->b_un.b_addr, sizeof (mp_watermark_t)); - freerbuf(buf); - return (-1); - } - - /* make sure watermark length matches unit structure */ - if (wm->wm_length != un->un_ext[ext].un_len) { - cmn_err(CE_WARN, "md: %s: open failed, watermark " - "at block %llu for extent %u has inconsistent " - "length, expected %llu, found %llu.", - md_shortname(MD_SID(un)), buf->b_lblkno, - ext, un->un_ext[ext].un_len, - (u_longlong_t)wm->wm_length); - kmem_free(buf->b_un.b_addr, sizeof (mp_watermark_t)); - freerbuf(buf); - return (-1); - } - - /* - * make sure the type is a valid soft partition and not - * a free extent or the end. - */ - if (wm->wm_type != EXTTYP_ALLOC) { - cmn_err(CE_WARN, "md: %s: open failed, watermark " - "at block %llu for extent %u is not marked " - "as in-use, type = %u.", md_shortname(MD_SID(un)), - buf->b_lblkno, ext, wm->wm_type); - kmem_free(buf->b_un.b_addr, sizeof (mp_watermark_t)); - freerbuf(buf); - return (-1); - } - /* free up buffer */ - kmem_free(buf->b_un.b_addr, sizeof (mp_watermark_t)); - freerbuf(buf); - } - - if (len != un->un_length) { - cmn_err(CE_WARN, "md: %s: open failed, computed length " - "%llu != expected length %llu.", md_shortname(MD_SID(un)), - len, un->un_length); - return (-1); - } - - /* - * If we're a multi-owner set _and_ reset_error is set, we should clear - * the error condition on all nodes in the set. Use SP_SETSTAT2 with - * MD_SP_OK. - */ - if (MD_MNSET_SETNO(setno) && reset_error) { - sp_send_stat_ok(un); - } - return (0); -} - -/* - * FUNCTION: sp_done() - * INPUT: child_buf - buffer attached to child save structure. - * this is the buffer on which I/O has just - * completed. - * OUTPUT: none. - * RETURNS: 0 - success. - * 1 - error. - * PURPOSE: called on I/O completion. - */ -static int -sp_done(struct buf *child_buf) -{ - struct buf *parent_buf; - mdi_unit_t *ui; - md_spps_t *ps; - md_spcs_t *cs; - - /* find the child save structure to which this buffer belongs */ - cs = (md_spcs_t *)((caddr_t)child_buf - - (sizeof (md_spcs_t) - sizeof (buf_t))); - /* now get the parent save structure */ - ps = cs->cs_ps; - parent_buf = ps->ps_bp; - - mutex_enter(&ps->ps_mx); - /* pass any errors back up to the parent */ - if (child_buf->b_flags & B_ERROR) { - ps->ps_flags |= MD_SPPS_ERROR; - parent_buf->b_error = child_buf->b_error; - } - /* mapout, if needed */ - if (child_buf->b_flags & B_REMAPPED) - bp_mapout(child_buf); - - ps->ps_frags--; - if (ps->ps_frags != 0) { - /* - * if this parent has more children, we just free the - * child and return. - */ - kmem_cache_free(sp_child_cache, cs); - mutex_exit(&ps->ps_mx); - return (1); - } - /* there are no more children */ - kmem_cache_free(sp_child_cache, cs); - if (ps->ps_flags & MD_SPPS_ERROR) { - sp_error(ps); - return (1); - } - ui = ps->ps_ui; - if (!(ps->ps_flags & MD_SPPS_DONTFREE)) { - mutex_exit(&ps->ps_mx); - } else { - /* - * this should only ever happen if we are panicking, - * since DONTFREE is only set on the parent if panicstr - * is non-NULL. - */ - ASSERT(panicstr); - } - SPPS_FREE(sp_parent_cache, ps); - md_kstat_done(ui, parent_buf, 0); - md_unit_readerexit(ui); - md_biodone(parent_buf); - return (0); -} - -/* - * FUNCTION: md_sp_strategy() - * INPUT: parent_buf - parent buffer - * flag - flags - * private - private data - * OUTPUT: none. - * RETURNS: void. - * PURPOSE: Soft partitioning I/O strategy. Performs the main work - * needed to do I/O to a soft partition. The basic - * algorithm is as follows: - * - Allocate a child save structure to keep track - * of the I/O we are going to pass down. - * - Map the I/O to the correct extent in the soft - * partition (see sp_mapbuf()). - * - bioclone() the buffer and pass it down the - * stack using md_call_strategy. - * - If the I/O needs to split across extents, - * repeat the above steps until all fragments - * are finished. - */ -static void -md_sp_strategy(buf_t *parent_buf, int flag, void *private) -{ - md_spps_t *ps; - md_spcs_t *cs; - int more; - mp_unit_t *un; - mdi_unit_t *ui; - size_t current_count; - off_t current_offset; - sp_ext_offset_t current_blkno; - buf_t *child_buf; - set_t setno = MD_MIN2SET(getminor(parent_buf->b_edev)); - int strat_flag = flag; - - /* - * When doing IO to a multi owner meta device, check if set is halted. - * We do this check without the needed lock held, for performance - * reasons. - * If an IO just slips through while the set is locked via an - * MD_MN_SUSPEND_SET, we don't care about it. - * Only check for suspension if we are a top-level i/o request - * (MD_STR_NOTTOP is cleared in 'flag'); - */ - if ((md_set[setno].s_status & (MD_SET_HALTED | MD_SET_MNSET)) == - (MD_SET_HALTED | MD_SET_MNSET)) { - if ((flag & MD_STR_NOTTOP) == 0) { - mutex_enter(&md_mx); - /* Here we loop until the set is no longer halted */ - while (md_set[setno].s_status & MD_SET_HALTED) { - cv_wait(&md_cv, &md_mx); - } - mutex_exit(&md_mx); - } - } - - ui = MDI_UNIT(getminor(parent_buf->b_edev)); - - md_kstat_waitq_enter(ui); - - un = (mp_unit_t *)md_unit_readerlock(ui); - - if ((flag & MD_NOBLOCK) == 0) { - if (md_inc_iocount(setno) != 0) { - parent_buf->b_flags |= B_ERROR; - parent_buf->b_error = ENXIO; - parent_buf->b_resid = parent_buf->b_bcount; - md_kstat_waitq_exit(ui); - md_unit_readerexit(ui); - biodone(parent_buf); - return; - } - } else { - md_inc_iocount_noblock(setno); - } - - if (!(flag & MD_STR_NOTTOP)) { - if (md_checkbuf(ui, (md_unit_t *)un, parent_buf) != 0) { - md_kstat_waitq_exit(ui); - return; - } - } - - ps = kmem_cache_alloc(sp_parent_cache, MD_ALLOCFLAGS); - sp_parent_init(ps); - - /* - * Save essential information from the original buffhdr - * in the parent. - */ - ps->ps_un = un; - ps->ps_ui = ui; - ps->ps_bp = parent_buf; - ps->ps_addr = parent_buf->b_un.b_addr; - - current_count = parent_buf->b_bcount; - current_blkno = (sp_ext_offset_t)parent_buf->b_blkno; - current_offset = 0; - - /* - * if we are at the top and we are panicking, - * we don't free in order to save state. - */ - if (!(flag & MD_STR_NOTTOP) && (panicstr != NULL)) - ps->ps_flags |= MD_SPPS_DONTFREE; - - md_kstat_waitq_to_runq(ui); - - ps->ps_frags++; - - /* - * Mark this i/o as MD_STR_ABR if we've had ABR enabled on this - * metadevice. - */ - if (ui->ui_tstate & MD_ABR_CAP) - strat_flag |= MD_STR_ABR; - - /* - * this loop does the main work of an I/O. we allocate a - * a child save for each buf, do the logical to physical - * mapping, decide if we need to frag the I/O, clone the - * new I/O to pass down the stack. repeat until we've - * taken care of the entire buf that was passed to us. - */ - do { - cs = kmem_cache_alloc(sp_child_cache, MD_ALLOCFLAGS); - sp_child_init(cs); - child_buf = &cs->cs_buf; - cs->cs_ps = ps; - - more = sp_mapbuf(un, current_blkno, current_count, child_buf); - if (more == -1) { - parent_buf->b_flags |= B_ERROR; - parent_buf->b_error = EIO; - md_kstat_done(ui, parent_buf, 0); - md_unit_readerexit(ui); - md_biodone(parent_buf); - kmem_cache_free(sp_parent_cache, ps); - return; - } - - child_buf = md_bioclone(parent_buf, current_offset, - child_buf->b_bcount, child_buf->b_edev, - child_buf->b_blkno, sp_done, child_buf, - KM_NOSLEEP); - /* calculate new offset, counts, etc... */ - current_offset += child_buf->b_bcount; - current_count -= child_buf->b_bcount; - current_blkno += (sp_ext_offset_t)(btodb(child_buf->b_bcount)); - - if (more) { - mutex_enter(&ps->ps_mx); - ps->ps_frags++; - mutex_exit(&ps->ps_mx); - } - - md_call_strategy(child_buf, strat_flag, private); - } while (more); - - if (!(flag & MD_STR_NOTTOP) && (panicstr != NULL)) { - while (!(ps->ps_flags & MD_SPPS_DONE)) { - md_daemon(1, &md_done_daemon); - } - kmem_cache_free(sp_parent_cache, ps); - } -} - -/* - * FUNCTION: sp_directed_read() - * INPUT: mnum - minor number - * vdr - vol_directed_rd_t from user - * mode - access mode for copying data out. - * OUTPUT: none. - * RETURNS: 0 - success - * Exxxxx - failure error-code - * PURPOSE: Construct the necessary sub-device i/o requests to perform the - * directed read as requested by the user. This is essentially the - * same as md_sp_strategy() with the exception being that the - * underlying 'md_call_strategy' is replaced with an ioctl call. - */ -int -sp_directed_read(minor_t mnum, vol_directed_rd_t *vdr, int mode) -{ - md_spps_t *ps; - md_spcs_t *cs; - int more; - mp_unit_t *un; - mdi_unit_t *ui; - size_t current_count; - off_t current_offset; - sp_ext_offset_t current_blkno; - buf_t *child_buf, *parent_buf; - void *kbuffer; - vol_directed_rd_t cvdr; - caddr_t userbuf; - offset_t useroff; - int ret = 0; - - ui = MDI_UNIT(mnum); - - md_kstat_waitq_enter(ui); - - bzero(&cvdr, sizeof (cvdr)); - - un = (mp_unit_t *)md_unit_readerlock(ui); - - /* - * Construct a parent_buf header which reflects the user-supplied - * request. - */ - - kbuffer = kmem_alloc(vdr->vdr_nbytes, KM_NOSLEEP); - if (kbuffer == NULL) { - vdr->vdr_flags |= DKV_DMR_ERROR; - md_kstat_waitq_exit(ui); - md_unit_readerexit(ui); - return (ENOMEM); - } - - parent_buf = getrbuf(KM_NOSLEEP); - if (parent_buf == NULL) { - vdr->vdr_flags |= DKV_DMR_ERROR; - md_kstat_waitq_exit(ui); - md_unit_readerexit(ui); - kmem_free(kbuffer, vdr->vdr_nbytes); - return (ENOMEM); - } - parent_buf->b_un.b_addr = kbuffer; - parent_buf->b_flags = B_READ; - parent_buf->b_bcount = vdr->vdr_nbytes; - parent_buf->b_lblkno = lbtodb(vdr->vdr_offset); - parent_buf->b_edev = un->un_dev; - - - ps = kmem_cache_alloc(sp_parent_cache, MD_ALLOCFLAGS); - sp_parent_init(ps); - - /* - * Save essential information from the original buffhdr - * in the parent. - */ - ps->ps_un = un; - ps->ps_ui = ui; - ps->ps_bp = parent_buf; - ps->ps_addr = parent_buf->b_un.b_addr; - - current_count = parent_buf->b_bcount; - current_blkno = (sp_ext_offset_t)parent_buf->b_lblkno; - current_offset = 0; - - md_kstat_waitq_to_runq(ui); - - ps->ps_frags++; - vdr->vdr_bytesread = 0; - - /* - * this loop does the main work of an I/O. we allocate a - * a child save for each buf, do the logical to physical - * mapping, decide if we need to frag the I/O, clone the - * new I/O to pass down the stack. repeat until we've - * taken care of the entire buf that was passed to us. - */ - do { - cs = kmem_cache_alloc(sp_child_cache, MD_ALLOCFLAGS); - sp_child_init(cs); - child_buf = &cs->cs_buf; - cs->cs_ps = ps; - - more = sp_mapbuf(un, current_blkno, current_count, child_buf); - if (more == -1) { - ret = EIO; - vdr->vdr_flags |= DKV_DMR_SHORT; - kmem_cache_free(sp_child_cache, cs); - goto err_out; - } - - cvdr.vdr_flags = vdr->vdr_flags; - cvdr.vdr_side = vdr->vdr_side; - cvdr.vdr_nbytes = child_buf->b_bcount; - cvdr.vdr_offset = ldbtob(child_buf->b_lblkno); - /* Work out where we are in the allocated buffer */ - useroff = (offset_t)(uintptr_t)kbuffer; - useroff = useroff + (offset_t)current_offset; - cvdr.vdr_data = (void *)(uintptr_t)useroff; - child_buf = md_bioclone(parent_buf, current_offset, - child_buf->b_bcount, child_buf->b_edev, - child_buf->b_blkno, NULL, - child_buf, KM_NOSLEEP); - /* calculate new offset, counts, etc... */ - current_offset += child_buf->b_bcount; - current_count -= child_buf->b_bcount; - current_blkno += (sp_ext_offset_t)(btodb(child_buf->b_bcount)); - - if (more) { - mutex_enter(&ps->ps_mx); - ps->ps_frags++; - mutex_exit(&ps->ps_mx); - } - - ret = md_call_ioctl(child_buf->b_edev, DKIOCDMR, &cvdr, - (mode | FKIOCTL), NULL); - - /* - * Free the child structure as we've finished with it. - * Normally this would be done by sp_done() but we're just - * using md_bioclone() to segment the transfer and we never - * issue a strategy request so the iodone will not be called. - */ - kmem_cache_free(sp_child_cache, cs); - if (ret == 0) { - /* copyout the returned data to vdr_data + offset */ - userbuf = (caddr_t)kbuffer; - userbuf += (caddr_t)(cvdr.vdr_data) - (caddr_t)kbuffer; - if (ddi_copyout(userbuf, vdr->vdr_data, - cvdr.vdr_bytesread, mode)) { - ret = EFAULT; - goto err_out; - } - vdr->vdr_bytesread += cvdr.vdr_bytesread; - } else { - goto err_out; - } - } while (more); - - /* - * Update the user-supplied vol_directed_rd_t structure with the - * contents of the last issued child request. - */ - vdr->vdr_flags = cvdr.vdr_flags; - vdr->vdr_side = cvdr.vdr_side; - bcopy(cvdr.vdr_side_name, vdr->vdr_side_name, VOL_SIDENAME); - -err_out: - if (ret != 0) { - vdr->vdr_flags |= DKV_DMR_ERROR; - } - if (vdr->vdr_bytesread != vdr->vdr_nbytes) { - vdr->vdr_flags |= DKV_DMR_SHORT; - } - kmem_cache_free(sp_parent_cache, ps); - kmem_free(kbuffer, vdr->vdr_nbytes); - freerbuf(parent_buf); - md_unit_readerexit(ui); - return (ret); -} - -/* - * FUNCTION: sp_snarf() - * INPUT: cmd - snarf cmd. - * setno - set number. - * OUTPUT: none. - * RETURNS: 1 - soft partitions were snarfed. - * 0 - no soft partitions were snarfed. - * PURPOSE: Snarf soft partition metadb records into their in-core - * structures. This routine is called at "snarf time" when - * md loads and gets all metadevices records into memory. - * The basic algorithm is simply to walk the soft partition - * records in the metadb and call the soft partitioning - * build_incore routine to set up the in-core structures. - */ -static int -sp_snarf(md_snarfcmd_t cmd, set_t setno) -{ - mp_unit_t *un; - mddb_recid_t recid; - int gotsomething; - int all_sp_gotten; - mddb_type_t rec_type; - mddb_de_ic_t *dep; - mddb_rb32_t *rbp; - mp_unit_t *big_un; - mp_unit32_od_t *small_un; - size_t newreqsize; - - - if (cmd == MD_SNARF_CLEANUP) - return (0); - - all_sp_gotten = 1; - gotsomething = 0; - - /* get the record type */ - rec_type = (mddb_type_t)md_getshared_key(setno, - sp_md_ops.md_driver.md_drivername); - recid = mddb_makerecid(setno, 0); - - /* - * walk soft partition records in the metadb and call - * sp_build_incore to build in-core structures. - */ - while ((recid = mddb_getnextrec(recid, rec_type, 0)) > 0) { - /* if we've already gotten this record, go to the next one */ - if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) - continue; - - - dep = mddb_getrecdep(recid); - dep->de_flags = MDDB_F_SOFTPART; - rbp = dep->de_rb; - - switch (rbp->rb_revision) { - case MDDB_REV_RB: - case MDDB_REV_RBFN: - if ((rbp->rb_private & MD_PRV_CONVD) == 0) { - /* - * This means, we have an old and small record. - * And this record hasn't already been converted - * :-o before we create an incore metadevice - * from this we have to convert it to a big - * record. - */ - small_un = - (mp_unit32_od_t *)mddb_getrecaddr(recid); - newreqsize = sizeof (mp_unit_t) + - ((small_un->un_numexts - 1) * - sizeof (struct mp_ext)); - big_un = (mp_unit_t *)kmem_zalloc(newreqsize, - KM_SLEEP); - softpart_convert((caddr_t)small_un, - (caddr_t)big_un, SMALL_2_BIG); - kmem_free(small_un, dep->de_reqsize); - dep->de_rb_userdata = big_un; - dep->de_reqsize = newreqsize; - rbp->rb_private |= MD_PRV_CONVD; - un = big_un; - } else { - /* Record has already been converted */ - un = (mp_unit_t *)mddb_getrecaddr(recid); - } - un->c.un_revision &= ~MD_64BIT_META_DEV; - break; - case MDDB_REV_RB64: - case MDDB_REV_RB64FN: - /* Large device */ - un = (mp_unit_t *)mddb_getrecaddr(recid); - un->c.un_revision |= MD_64BIT_META_DEV; - un->c.un_flag |= MD_EFILABEL; - break; - } - MDDB_NOTE_FN(rbp->rb_revision, un->c.un_revision); - - /* - * Create minor node for snarfed entry. - */ - (void) md_create_minor_node(MD_MIN2SET(MD_SID(un)), MD_SID(un)); - - if (MD_UNIT(MD_SID(un)) != NULL) { - /* unit is already in-core */ - mddb_setrecprivate(recid, MD_PRV_PENDDEL); - continue; - } - all_sp_gotten = 0; - if (sp_build_incore((void *)un, 1) == 0) { - mddb_setrecprivate(recid, MD_PRV_GOTIT); - md_create_unit_incore(MD_SID(un), &sp_md_ops, 0); - gotsomething = 1; - } - } - - if (!all_sp_gotten) - return (gotsomething); - /* double-check records */ - recid = mddb_makerecid(setno, 0); - while ((recid = mddb_getnextrec(recid, rec_type, 0)) > 0) - if (!(mddb_getrecprivate(recid) & MD_PRV_GOTIT)) - mddb_setrecprivate(recid, MD_PRV_PENDDEL); - - return (0); -} - -/* - * FUNCTION: sp_halt() - * INPUT: cmd - halt cmd. - * setno - set number. - * RETURNS: 0 - success. - * 1 - err. - * PURPOSE: Perform driver halt operations. As with stripe, we - * support MD_HALT_CHECK and MD_HALT_DOIT. The first - * does a check to see if halting can be done safely - * (no open soft partitions), the second cleans up and - * shuts down the driver. - */ -static int -sp_halt(md_haltcmd_t cmd, set_t setno) -{ - int i; - mdi_unit_t *ui; - minor_t mnum; - - if (cmd == MD_HALT_CLOSE) - return (0); - - if (cmd == MD_HALT_OPEN) - return (0); - - if (cmd == MD_HALT_UNLOAD) - return (0); - - if (cmd == MD_HALT_CHECK) { - for (i = 0; i < md_nunits; i++) { - mnum = MD_MKMIN(setno, i); - if ((ui = MDI_UNIT(mnum)) == NULL) - continue; - if (ui->ui_opsindex != sp_md_ops.md_selfindex) - continue; - if (md_unit_isopen(ui)) - return (1); - } - return (0); - } - - if (cmd != MD_HALT_DOIT) - return (1); - - for (i = 0; i < md_nunits; i++) { - mnum = MD_MKMIN(setno, i); - if ((ui = MDI_UNIT(mnum)) == NULL) - continue; - if (ui->ui_opsindex != sp_md_ops.md_selfindex) - continue; - reset_sp((mp_unit_t *)MD_UNIT(mnum), mnum, 0); - } - - return (0); -} - -/* - * FUNCTION: sp_open_dev() - * INPUT: un - unit structure. - * oflags - open flags. - * OUTPUT: none. - * RETURNS: 0 - success. - * non-zero - err. - * PURPOSE: open underlying device via md_layered_open. - */ -static int -sp_open_dev(mp_unit_t *un, int oflags) -{ - minor_t mnum = MD_SID(un); - int err; - md_dev64_t tmpdev; - set_t setno = MD_MIN2SET(MD_SID(un)); - side_t side = mddb_getsidenum(setno); - - tmpdev = un->un_dev; - /* - * Do the open by device id if underlying is regular - */ - if ((md_getmajor(tmpdev) != md_major) && - md_devid_found(setno, side, un->un_key) == 1) { - tmpdev = md_resolve_bydevid(mnum, tmpdev, un->un_key); - } - err = md_layered_open(mnum, &tmpdev, oflags); - un->un_dev = tmpdev; - - if (err) - return (ENXIO); - - return (0); -} - -/* - * FUNCTION: sp_open() - * INPUT: dev - device to open. - * flag - pass-through flag. - * otyp - pass-through open type. - * cred_p - credentials. - * md_oflags - open flags. - * OUTPUT: none. - * RETURNS: 0 - success. - * non-zero - err. - * PURPOSE: open a soft partition. - */ -/* ARGSUSED */ -static int -sp_open( - dev_t *dev, - int flag, - int otyp, - cred_t *cred_p, - int md_oflags -) -{ - minor_t mnum = getminor(*dev); - mdi_unit_t *ui = MDI_UNIT(mnum); - mp_unit_t *un; - int err = 0; - set_t setno; - - /* - * When doing an open of a multi owner metadevice, check to see if this - * node is a starting node and if a reconfig cycle is underway. - * If so, the system isn't sufficiently set up enough to handle the - * open (which involves I/O during sp_validate), so fail with ENXIO. - */ - setno = MD_MIN2SET(mnum); - if ((md_set[setno].s_status & (MD_SET_MNSET | MD_SET_MN_START_RC)) == - (MD_SET_MNSET | MD_SET_MN_START_RC)) { - return (ENXIO); - } - - /* grab necessary locks */ - un = (mp_unit_t *)md_unit_openclose_enter(ui); - setno = MD_UN2SET(un); - - /* open underlying device, if necessary */ - if (! md_unit_isopen(ui) || (md_oflags & MD_OFLG_PROBEDEV)) { - if ((err = sp_open_dev(un, md_oflags)) != 0) - goto out; - - if (MD_MNSET_SETNO(setno)) { - /* For probe, don't incur the overhead of validate */ - if (!(md_oflags & MD_OFLG_PROBEDEV)) { - /* - * Don't call sp_validate while - * unit_openclose lock is held. So, actually - * open the device, drop openclose lock, - * call sp_validate, reacquire openclose lock, - * and close the device. If sp_validate - * succeeds, then device will be re-opened. - */ - if ((err = md_unit_incopen(mnum, flag, - otyp)) != 0) - goto out; - - mutex_enter(&ui->ui_mx); - ui->ui_lock |= MD_UL_OPENINPROGRESS; - mutex_exit(&ui->ui_mx); - md_unit_openclose_exit(ui); - if (otyp != OTYP_LYR) - rw_exit(&md_unit_array_rw.lock); - - err = sp_validate(un); - - if (otyp != OTYP_LYR) - rw_enter(&md_unit_array_rw.lock, - RW_READER); - (void) md_unit_openclose_enter(ui); - (void) md_unit_decopen(mnum, otyp); - mutex_enter(&ui->ui_mx); - ui->ui_lock &= ~MD_UL_OPENINPROGRESS; - cv_broadcast(&ui->ui_cv); - mutex_exit(&ui->ui_mx); - /* - * Should be in the same state as before - * the sp_validate. - */ - if (err != 0) { - /* close the device opened above */ - md_layered_close(un->un_dev, md_oflags); - err = EIO; - goto out; - } - } - /* - * As we're a multi-owner metadevice we need to ensure - * that all nodes have the same idea of the status. - * sp_validate() will mark the device as errored (if - * it cannot read the watermark) or ok (if it was - * previously errored but the watermark is now valid). - * This code-path is only entered on the non-probe open - * so we will maintain the errored state during a probe - * call. This means the sys-admin must metarecover -m - * to reset the soft-partition error. - */ - } else { - /* For probe, don't incur the overhead of validate */ - if (!(md_oflags & MD_OFLG_PROBEDEV) && - (err = sp_validate(un)) != 0) { - /* close the device opened above */ - md_layered_close(un->un_dev, md_oflags); - err = EIO; - goto out; - } else { - /* - * we succeeded in validating the on disk - * format versus the in core, so reset the - * status if it's in error - */ - if (un->un_status == MD_SP_ERR) { - un->un_status = MD_SP_OK; - } - } - } - } - - /* count open */ - if ((err = md_unit_incopen(mnum, flag, otyp)) != 0) - goto out; - -out: - md_unit_openclose_exit(ui); - return (err); -} - -/* - * FUNCTION: sp_close() - * INPUT: dev - device to close. - * flag - pass-through flag. - * otyp - pass-through type. - * cred_p - credentials. - * md_cflags - close flags. - * OUTPUT: none. - * RETURNS: 0 - success. - * non-zero - err. - * PURPOSE: close a soft paritition. - */ -/* ARGSUSED */ -static int -sp_close( - dev_t dev, - int flag, - int otyp, - cred_t *cred_p, - int md_cflags -) -{ - minor_t mnum = getminor(dev); - mdi_unit_t *ui = MDI_UNIT(mnum); - mp_unit_t *un; - int err = 0; - - /* grab necessary locks */ - un = (mp_unit_t *)md_unit_openclose_enter(ui); - - /* count closed */ - if ((err = md_unit_decopen(mnum, otyp)) != 0) - goto out; - - /* close devices, if necessary */ - if (! md_unit_isopen(ui) || (md_cflags & MD_OFLG_PROBEDEV)) { - md_layered_close(un->un_dev, md_cflags); - } - - /* - * If a MN set and transient capabilities (eg ABR/DMR) are set, - * clear these capabilities if this is the last close in - * the cluster - */ - if (MD_MNSET_SETNO(MD_UN2SET(un)) && - (ui->ui_tstate & MD_ABR_CAP)) { - md_unit_openclose_exit(ui); - mdmn_clear_all_capabilities(mnum); - return (0); - } - /* unlock, return success */ -out: - md_unit_openclose_exit(ui); - return (err); -} - - -/* used in sp_dump routine */ -static struct buf dumpbuf; - -/* - * FUNCTION: sp_dump() - * INPUT: dev - device to dump to. - * addr - address to dump. - * blkno - blkno on device. - * nblk - number of blocks to dump. - * OUTPUT: none. - * RETURNS: result from bdev_dump. - * PURPOSE: This routine dumps memory to the disk. It assumes that - * the memory has already been mapped into mainbus space. - * It is called at disk interrupt priority when the system - * is in trouble. - * NOTE: this function is defined using 32-bit arguments, - * but soft partitioning is internally 64-bit. Arguments - * are casted where appropriate. - */ -static int -sp_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) -{ - mp_unit_t *un; - buf_t *bp; - sp_ext_length_t nb; - daddr_t mapblk; - int result; - int more; - int saveresult = 0; - - /* - * Don't need to grab the unit lock. - * Cause nothing else is supposed to be happenning. - * Also dump is not supposed to sleep. - */ - un = (mp_unit_t *)MD_UNIT(getminor(dev)); - - if ((diskaddr_t)blkno >= un->c.un_total_blocks) - return (EINVAL); - - if (((diskaddr_t)blkno + nblk) > un->c.un_total_blocks) - return (EINVAL); - - bp = &dumpbuf; - nb = (sp_ext_length_t)dbtob(nblk); - do { - bzero((caddr_t)bp, sizeof (*bp)); - more = sp_mapbuf(un, (sp_ext_offset_t)blkno, nb, bp); - nblk = (int)(btodb(bp->b_bcount)); - mapblk = bp->b_blkno; - result = bdev_dump(bp->b_edev, addr, mapblk, nblk); - if (result) - saveresult = result; - - nb -= bp->b_bcount; - addr += bp->b_bcount; - blkno += nblk; - } while (more); - - return (saveresult); -} - -static int -sp_imp_set( - set_t setno -) -{ - mddb_recid_t recid; - int gotsomething; - mddb_type_t rec_type; - mddb_de_ic_t *dep; - mddb_rb32_t *rbp; - mp_unit_t *un64; - mp_unit32_od_t *un32; - md_dev64_t self_devt; - minor_t *self_id; /* minor needs to be updated */ - md_parent_t *parent_id; /* parent needs to be updated */ - mddb_recid_t *record_id; /* record id needs to be updated */ - - gotsomething = 0; - - rec_type = (mddb_type_t)md_getshared_key(setno, - sp_md_ops.md_driver.md_drivername); - recid = mddb_makerecid(setno, 0); - - while ((recid = mddb_getnextrec(recid, rec_type, 0)) > 0) { - if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) - continue; - - dep = mddb_getrecdep(recid); - rbp = dep->de_rb; - - switch (rbp->rb_revision) { - case MDDB_REV_RB: - case MDDB_REV_RBFN: - /* - * Small device - */ - un32 = (mp_unit32_od_t *)mddb_getrecaddr(recid); - self_id = &(un32->c.un_self_id); - parent_id = &(un32->c.un_parent); - record_id = &(un32->c.un_record_id); - - if (!md_update_minor(setno, mddb_getsidenum - (setno), un32->un_key)) - goto out; - break; - - case MDDB_REV_RB64: - case MDDB_REV_RB64FN: - un64 = (mp_unit_t *)mddb_getrecaddr(recid); - self_id = &(un64->c.un_self_id); - parent_id = &(un64->c.un_parent); - record_id = &(un64->c.un_record_id); - - if (!md_update_minor(setno, mddb_getsidenum - (setno), un64->un_key)) - goto out; - break; - } - - /* - * If this is a top level and a friendly name metadevice, - * update its minor in the namespace. - */ - if ((*parent_id == MD_NO_PARENT) && - ((rbp->rb_revision == MDDB_REV_RBFN) || - (rbp->rb_revision == MDDB_REV_RB64FN))) { - - self_devt = md_makedevice(md_major, *self_id); - if (!md_update_top_device_minor(setno, - mddb_getsidenum(setno), self_devt)) - goto out; - } - - /* - * Update unit with the imported setno - * - */ - mddb_setrecprivate(recid, MD_PRV_GOTIT); - - *self_id = MD_MKMIN(setno, MD_MIN2UNIT(*self_id)); - if (*parent_id != MD_NO_PARENT) - *parent_id = MD_MKMIN(setno, MD_MIN2UNIT(*parent_id)); - *record_id = MAKERECID(setno, DBID(*record_id)); - - gotsomething = 1; - } - -out: - return (gotsomething); -} - -static md_named_services_t sp_named_services[] = { - {NULL, 0} -}; - -md_ops_t sp_md_ops = { - sp_open, /* open */ - sp_close, /* close */ - md_sp_strategy, /* strategy */ - NULL, /* print */ - sp_dump, /* dump */ - NULL, /* read */ - NULL, /* write */ - md_sp_ioctl, /* ioctl, */ - sp_snarf, /* snarf */ - sp_halt, /* halt */ - NULL, /* aread */ - NULL, /* awrite */ - sp_imp_set, /* import set */ - sp_named_services -}; - -static void -init_init() -{ - sp_parent_cache = kmem_cache_create("md_softpart_parent", - sizeof (md_spps_t), 0, sp_parent_constructor, - sp_parent_destructor, sp_run_queue, NULL, NULL, 0); - sp_child_cache = kmem_cache_create("md_softpart_child", - sizeof (md_spcs_t) - sizeof (buf_t) + biosize(), 0, - sp_child_constructor, sp_child_destructor, sp_run_queue, - NULL, NULL, 0); -} - -static void -fini_uninit() -{ - kmem_cache_destroy(sp_parent_cache); - kmem_cache_destroy(sp_child_cache); - sp_parent_cache = sp_child_cache = NULL; -} - -/* define the module linkage */ -MD_PLUGIN_MISC_MODULE("soft partition module", init_init(), fini_uninit()) diff --git a/usr/src/uts/common/io/lvm/softpart/sp_ioctl.c b/usr/src/uts/common/io/lvm/softpart/sp_ioctl.c deleted file mode 100644 index 2e4aae56b740..000000000000 --- a/usr/src/uts/common/io/lvm/softpart/sp_ioctl.c +++ /dev/null @@ -1,1674 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* - * Soft partitioning metadevice driver (md_sp), administrative routines. - * - * This file contains the administrative routines for the soft partitioning - * metadevice driver. All administration is done through the use of ioctl's. - * - * The primary ioctl's supported by soft partitions are as follows: - * - * MD_IOCSET - set up a new soft partition. - * MD_IOCGET - get the unit structure of a soft partition. - * MD_IOCRESET - delete a soft partition. - * MD_IOCGROW - add space to a soft partition. - * MD_IOCGETDEVS - get the device the soft partition is built on. - * MD_IOC_SPSTATUS - set the status (un_status field in the soft - * partition unit structure) for one or more soft - * partitions. - * - * Note that, as with other metadevices, the majority of the work for - * building/growing/deleting soft partitions is performed in userland - * (specifically in libmeta, see meta_sp.c). The driver's main administrative - * function is to maintain the in-core & metadb entries associated with a soft - * partition. - * - * In addition, a few other ioctl's are supported via helper routines in - * the md driver. These are: - * - * DKIOCINFO - get "disk" information. - * DKIOCGEOM - get geometry information. - * DKIOCGVTOC - get vtoc information. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -extern int md_status; - -extern unit_t md_nunits; -extern set_t md_nsets; -extern md_set_t md_set[]; - -extern md_ops_t sp_md_ops; -extern md_krwlock_t md_unit_array_rw; -extern major_t md_major; - -/* - * FUNCTION: sp_getun() - * INPUT: mnum - minor number of soft partition to get. - * OUTPUT: mde - return error pointer. - * RETURNS: mp_unit_t * - ptr to unit structure requested - * NULL - error - * PURPOSE: Returns a reference to the soft partition unit structure - * indicated by the passed-in minor number. - */ -static mp_unit_t * -sp_getun(minor_t mnum, md_error_t *mde) -{ - mp_unit_t *un; - mdi_unit_t *ui; - set_t setno = MD_MIN2SET(mnum); - - /* check set */ - if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits)) { - (void) mdmderror(mde, MDE_INVAL_UNIT, mnum); - return (NULL); - } - - if (md_get_setstatus(setno) & MD_SET_STALE) { - (void) mdmddberror(mde, MDE_DB_STALE, mnum, setno); - return (NULL); - } - - ui = MDI_UNIT(mnum); - - if (ui == NULL) { - (void) mdmderror(mde, MDE_UNIT_NOT_SETUP, mnum); - return (NULL); - } - - un = (mp_unit_t *)MD_UNIT(mnum); - - if (un->c.un_type != MD_METASP) { - (void) mdmderror(mde, MDE_NOT_SP, mnum); - return (NULL); - } - - return (un); -} - - -/* - * FUNCTION: sp_setstatus() - * INPUT: d - data ptr passed in from ioctl. - * mode - pass-through to ddi_copyin. - * lockp - lock ptr. - * OUTPUT: none. - * RETURNS: 0 - success. - * non-zero - error. - * PURPOSE: Set the status of one or more soft partitions atomically. - * this implements the MD_IOC_SPSTATUS ioctl. Soft partitions - * are passed in as an array of minor numbers. The un_status - * field in the unit structure of each soft partition is set to - * the status passed in and all unit structures are recommitted - * to the metadb at once. - */ -static int -sp_setstatus(void *d, int mode, IOLOCK *lockp) -{ - minor_t *minors; - mp_unit_t *un; - mddb_recid_t *recids; - int i, nunits, sz; - int err = 0; - sp_status_t status; - md_error_t *mdep; - - md_sp_statusset_t *msp = (md_sp_statusset_t *)d; - - nunits = msp->num_units; - sz = msp->size; - status = msp->new_status; - mdep = &msp->mde; - - mdclrerror(mdep); - /* allocate minor number and recids arrays */ - minors = kmem_alloc(sz, KM_SLEEP); - recids = kmem_alloc((nunits + 1) * sizeof (mddb_recid_t), KM_SLEEP); - - /* copyin minor number array */ - if (err = ddi_copyin((void *)(uintptr_t)msp->minors, minors, sz, mode)) - goto out; - - /* check to make sure all units are valid first */ - for (i = 0; i < nunits; i++) { - if ((un = sp_getun(minors[i], mdep)) == NULL) { - err = mdmderror(mdep, MDE_INVAL_UNIT, minors[i]); - goto out; - } - } - - /* update state for all units */ - for (i = 0; i < nunits; i++) { - un = sp_getun(minors[i], mdep); - (void) md_ioctl_writerlock(lockp, MDI_UNIT(minors[i])); - un->un_status = status; - recids[i] = un->c.un_record_id; - md_ioctl_writerexit(lockp); - } - - recids[i] = 0; - mddb_commitrecs_wrapper(recids); - -out: - kmem_free(minors, sz); - kmem_free(recids, ((nunits + 1) * sizeof (mddb_recid_t))); - return (err); -} - - -/* - * FUNCTION: sp_update_watermarks() - * INPUT: d - data ptr passed in from ioctl. - * mode - pass-through to ddi_copyin. - * OUTPUT: none. - * RETURNS: 0 - success. - * non-zero - error. - * PURPOSE: This implements the MD_IOC_SPUPDATEWM ioctl. - * Watermarks are passed in an array. - */ -static int -sp_update_watermarks(void *d, int mode) -{ - minor_t mnum; - set_t setno; - md_error_t *mdep; - mp_unit_t *un; - int err = 0; - size_t wsz; - size_t osz; - mp_watermark_t *watermarks; - sp_ext_offset_t *offsets; - md_dev64_t device; - buf_t *bp; - int i; - md_sp_update_wm_t *mup = (md_sp_update_wm_t *)d; - side_t side; - - mnum = mup->mnum; - setno = MD_MIN2SET(mnum); - side = mddb_getsidenum(setno); - un = MD_UNIT(mnum); - - if (un == NULL) - return (EFAULT); - - mdep = &mup->mde; - - mdclrerror(mdep); - - /* Validate the set */ - if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits)) - return (mdmderror(mdep, MDE_INVAL_UNIT, mnum)); - if (md_get_setstatus(setno) & MD_SET_STALE) - return (mdmddberror(mdep, MDE_DB_STALE, mnum, setno)); - - wsz = mup->count * sizeof (mp_watermark_t); - watermarks = kmem_alloc(wsz, KM_SLEEP); - - osz = mup->count * sizeof (sp_ext_offset_t); - offsets = kmem_alloc(osz, KM_SLEEP); - - /* - * Once we're here, we are no longer stateless: we cannot - * return without first freeing the watermarks and offset - * arrays we just allocated. So use the "out" label instead - * of "return." - */ - - /* Retrieve the watermark and offset arrays from user land */ - - if (ddi_copyin((void *)(uintptr_t)mup->wmp, watermarks, wsz, mode)) { - err = EFAULT; - goto out; - } - - if (ddi_copyin((void *)(uintptr_t)mup->osp, offsets, osz, mode)) { - err = EFAULT; - goto out; - } - - /* - * NOTE: For multi-node sets we only commit the watermarks if we are - * the master node. This avoids an ioctl-within-ioctl deadlock if the - * underlying device is a mirror. - */ - if (MD_MNSET_SETNO(setno) && !md_set[setno].s_am_i_master) { - goto out; - } - - device = un->un_dev; - if ((md_getmajor(device) != md_major) && - (md_devid_found(setno, side, un->un_key) == 1)) { - device = md_resolve_bydevid(mnum, device, un->un_key); - } - /* - * Flag the fact that we're coming from an ioctl handler to the - * underlying device so that it can take appropriate action if needed. - * This is necessary for multi-owner mirrors as they may need to - * update the metadevice state as a result of the layered open. - */ - if (md_layered_open(mnum, &device, MD_OFLG_FROMIOCTL)) { - err = mdcomperror(mdep, MDE_SP_COMP_OPEN_ERR, - mnum, device); - goto out; - } - - bp = kmem_alloc(biosize(), KM_SLEEP); - bioinit(bp); - - for (i = 0; i < mup->count; i++) { - - /* - * Even the "constant" fields should be initialized - * here, since bioreset() below will clear them. - */ - bp->b_flags = B_WRITE; - bp->b_bcount = sizeof (mp_watermark_t); - bp->b_bufsize = sizeof (mp_watermark_t); - bp->b_un.b_addr = (caddr_t)&watermarks[i]; - bp->b_lblkno = offsets[i]; - bp->b_edev = md_dev64_to_dev(device); - - /* - * For MN sets only: - * Use a special flag MD_STR_WMUPDATE, for the following case: - * If the watermarks reside on a mirror disk and a switch - * of ownership is triggered by this IO, - * the message that is generated by that request must be - * processed even if the commd subsystem is currently suspended. - * - * For non-MN sets or non-mirror metadevices, - * this flag has no meaning and is not checked. - */ - - md_call_strategy(bp, MD_NOBLOCK | MD_STR_WMUPDATE, NULL); - - if (biowait(bp)) { - err = mdmderror(mdep, - MDE_SP_BADWMWRITE, mnum); - break; - } - - /* Get the buf_t ready for the next iteration */ - bioreset(bp); - } - - biofini(bp); - kmem_free(bp, biosize()); - - md_layered_close(device, MD_OFLG_NULL); - -out: - kmem_free(watermarks, wsz); - kmem_free(offsets, osz); - - return (err); -} - - -/* - * FUNCTION: sp_read_watermark() - * INPUT: d - data ptr passed in from ioctl. - * mode - pass-through to ddi_copyin. - * OUTPUT: none. - * RETURNS: 0 - success. - * non-zero - error. - * PURPOSE: This implements the MD_IOC_SPREADWM ioctl. - */ -static int -sp_read_watermark(void *d, int mode) -{ - md_error_t *mdep; - mp_watermark_t watermark; - md_dev64_t device; - buf_t *bp; - md_sp_read_wm_t *mrp = (md_sp_read_wm_t *)d; - - mdep = &mrp->mde; - - mdclrerror(mdep); - - device = mrp->rdev; - - /* - * Flag the fact that we are being called from ioctl context so that - * the underlying device can take any necessary extra steps to handle - * this scenario. - */ - if (md_layered_open((minor_t)-1, &device, MD_OFLG_FROMIOCTL)) { - return (mdcomperror(mdep, MDE_SP_COMP_OPEN_ERR, - (minor_t)NODEV, device)); - } - - bp = kmem_alloc(biosize(), KM_SLEEP); - bioinit(bp); - - bp->b_flags = B_READ; - bp->b_bcount = sizeof (mp_watermark_t); - bp->b_bufsize = sizeof (mp_watermark_t); - bp->b_un.b_addr = (caddr_t)&watermark; - bp->b_lblkno = mrp->offset; - bp->b_edev = md_dev64_to_dev(device); - - md_call_strategy(bp, MD_NOBLOCK, NULL); - - if (biowait(bp)) { - /* - * Taking advantage of the knowledge that mdmderror() - * returns 0, so we don't really need to keep track of - * an error code other than in the error struct. - */ - (void) mdmderror(mdep, MDE_SP_BADWMREAD, - getminor(device)); - } - - biofini(bp); - kmem_free(bp, biosize()); - - md_layered_close(device, MD_OFLG_NULL); - - if (ddi_copyout(&watermark, (void *)(uintptr_t)mrp->wmp, - sizeof (mp_watermark_t), mode)) { - return (EFAULT); - } - - return (0); -} - - -/* - * FUNCTION: sp_set() - * INPUT: d - data ptr passed in from ioctl. - * mode - pass-through to ddi_copyin. - * OUTPUT: none. - * RETURNS: 0 - success. - * non-zero - error. - * PURPOSE: Create a soft partition. The unit structure representing - * the soft partiton is passed down from userland. We allocate - * a metadb entry, copyin the unit the structure, handle any - * metadevice parenting issues, then commit the record to the - * metadb. Once the record is in the metadb, we must also - * build the associated in-core structures. This is done via - * sp_build_incore() (see sp.c). - */ -static int -sp_set(void *d, int mode) -{ - minor_t mnum; - mp_unit_t *un; - void *rec_addr; - mddb_recid_t recids[3]; - mddb_type_t rec_type; - int err; - set_t setno; - md_error_t *mdep; - md_unit_t *child_un; - md_set_params_t *msp = (md_set_params_t *)d; - - mnum = msp->mnum; - setno = MD_MIN2SET(mnum); - mdep = &msp->mde; - - mdclrerror(mdep); - - /* validate set */ - - if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits)) - return (mdmderror(mdep, MDE_INVAL_UNIT, mnum)); - if (md_get_setstatus(setno) & MD_SET_STALE) - return (mdmddberror(mdep, MDE_DB_STALE, mnum, setno)); - - /* get the record type */ - rec_type = (mddb_type_t)md_getshared_key(setno, - sp_md_ops.md_driver.md_drivername); - - /* check if there is already a device with this minor number */ - un = MD_UNIT(mnum); - if (un != NULL) - return (mdmderror(mdep, MDE_UNIT_ALREADY_SETUP, mnum)); - - /* create the db record for this soft partition */ - - if (msp->options & MD_CRO_64BIT) { -#if defined(_ILP32) - return (mdmderror(mdep, MDE_UNIT_TOO_LARGE, mnum)); -#else - recids[0] = mddb_createrec((size_t)msp->size, rec_type, 0, - MD_CRO_64BIT | MD_CRO_SOFTPART | MD_CRO_FN, setno); -#endif - } else { - recids[0] = mddb_createrec((size_t)msp->size, rec_type, 0, - MD_CRO_32BIT | MD_CRO_SOFTPART | MD_CRO_FN, setno); - } - /* set initial value for possible child record */ - recids[1] = 0; - if (recids[0] < 0) - return (mddbstatus2error(mdep, recids[0], mnum, setno)); - - /* get the address of the soft partition db record */ - rec_addr = (void *) mddb_getrecaddr(recids[0]); - - /* - * at this point we can happily mess with the soft partition - * db record since we haven't committed it to the metadb yet. - * if we crash before we commit, the uncommitted record will be - * automatically purged. - */ - - /* copy in the user's soft partition unit struct */ - if (err = ddi_copyin((void *)(uintptr_t)msp->mdp, - rec_addr, (size_t)msp->size, mode)) { - mddb_deleterec_wrapper(recids[0]); - return (EFAULT); - } - - /* fill in common unit structure fields which aren't set in userland */ - un = (mp_unit_t *)rec_addr; - - /* All 64 bit metadevices only support EFI labels. */ - if (msp->options & MD_CRO_64BIT) { - un->c.un_flag |= MD_EFILABEL; - } - - MD_SID(un) = mnum; - MD_RECID(un) = recids[0]; - MD_PARENT(un) = MD_NO_PARENT; - un->c.un_revision |= MD_FN_META_DEV; - - /* if we are parenting a metadevice, set our child's parent field */ - if (md_getmajor(un->un_dev) == md_major) { - /* it's a metadevice, need to parent it */ - child_un = MD_UNIT(md_getminor(un->un_dev)); - if (child_un == NULL) { - mddb_deleterec_wrapper(recids[0]); - return (mdmderror(mdep, MDE_INVAL_UNIT, - md_getminor(un->un_dev))); - } - md_set_parent(un->un_dev, MD_SID(un)); - - /* set child recid and recids end marker */ - recids[1] = MD_RECID(child_un); - recids[2] = 0; - } - - /* - * build the incore structures. - */ - if (err = sp_build_incore(rec_addr, 0)) { - md_nblocks_set(mnum, -1ULL); - MD_UNIT(mnum) = NULL; - - mddb_deleterec_wrapper(recids[0]); - return (err); - } - - /* - * Update unit availability - */ - md_set[setno].s_un_avail--; - - /* - * commit the record. - * if we had to update a child record, it will get commited - * as well. - */ - mddb_commitrecs_wrapper(recids); - - /* create the mdi_unit struct for this soft partition */ - md_create_unit_incore(mnum, &sp_md_ops, 0); - - SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_CREATE, TAG_METADEVICE, MD_UN2SET(un), - MD_SID(un)); - return (0); -} - - -/* - * FUNCTION: sp_get() - * INPUT: d - data ptr. - * mode - pass-through to ddi_copyout. - * lock - lock ptr. - * OUTPUT: none. - * RETURNS: 0 - success. - * non-zero - error. - * PURPOSE: Get the soft partition unit structure specified by the - * minor number. the in-core unit structure is obtained - * and copied into the md_i_get structure passed down from - * userland. - */ -static int -sp_get(void *d, int mode, IOLOCK *lock) -{ - minor_t mnum; - mdi_unit_t *ui; - mp_unit_t *un; - md_error_t *mdep; - md_i_get_t *migp = d; - - - mnum = migp->id; - mdep = &migp->mde; - - mdclrerror(mdep); - - /* make sure this is a valid unit structure */ - if ((MD_MIN2SET(mnum) >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits)) - return (mdmderror(mdep, MDE_INVAL_UNIT, mnum)); - - /* get the mdi_unit */ - if ((ui = MDI_UNIT(mnum)) == NULL) { - return (mdmderror(mdep, MDE_UNIT_NOT_SETUP, mnum)); - } - - /* - * md_ioctl_readerlock returns a reference to the in-core - * unit structure. this lock will be dropped by - * md_ioctl_lock_exit() before the ioctl returns. - */ - un = (mp_unit_t *)md_ioctl_readerlock(lock, ui); - - /* verify the md_i_get structure */ - if (migp->size == 0) { - migp->size = un->c.un_size; - return (0); - } - if (migp->size < un->c.un_size) { - return (EFAULT); - } - - /* copyout unit */ - if (ddi_copyout(un, (void *)(uintptr_t)migp->mdp, - un->c.un_size, mode)) - return (EFAULT); - return (0); -} - - -/* - * FUNCTION: sp_reset() - * INPUT: reset_params - soft partitioning reset parameters. - * OUTPUT: none. - * RETURNS: 0 - success. - * non-zero - error. - * PURPOSE: Do the setup work needed to delete a soft partition. - * note that the actual removal of both in-core and metadb - * structures is done in the reset_sp() routine (see sp.c). - * In addition, since multiple soft partitions may exist - * on top of a single metadevice, the soft partition reset - * parameters (md_sp_reset_t) contains information about - * how the soft partition should deparent/reparent the - * underlying metadevice. If the underlying metadevice is - * to be deparented, the new_parent field will be MD_NO_PARENT, - * otherwise it will be contain the minor number of another - * soft partition built on top of the underlying metadevice. - */ -static int -sp_reset(md_sp_reset_t *softp) -{ - minor_t mnum = softp->mnum; - mdi_unit_t *ui; - mp_unit_t *un; - md_unit_t *child_un; - set_t setno = MD_MIN2SET(mnum); - - mdclrerror(&softp->mde); - - /* get the unit structure */ - if ((un = sp_getun(mnum, &softp->mde)) == NULL) { - return (mdmderror(&softp->mde, MDE_INVAL_UNIT, mnum)); - } - - /* don't delete if we have a parent */ - if (MD_HAS_PARENT(un->c.un_parent)) { - return (mdmderror(&softp->mde, MDE_IN_USE, mnum)); - } - - rw_enter(&md_unit_array_rw.lock, RW_WRITER); - - ui = MDI_UNIT(mnum); - (void) md_unit_openclose_enter(ui); - - /* don't delete if we are currently open */ - if (md_unit_isopen(ui)) { - md_unit_openclose_exit(ui); - rw_exit(&md_unit_array_rw.lock); - return (mdmderror(&softp->mde, MDE_IS_OPEN, mnum)); - } - - md_unit_openclose_exit(ui); - - /* - * if we are built on metadevice, we need to deparent - * or reparent that metadevice. - */ - if (md_getmajor(un->un_dev) == md_major) { - child_un = MD_UNIT(md_getminor(un->un_dev)); - md_set_parent(un->un_dev, softp->new_parent); - mddb_commitrec_wrapper(MD_RECID(child_un)); - } - /* remove the soft partition */ - reset_sp(un, mnum, 1); - - /* - * Update unit availability - */ - md_set[setno].s_un_avail++; - - /* - * If MN set, reset s_un_next so all nodes can have - * the same view of the next available slot when - * nodes are -w and -j - */ - if (MD_MNSET_SETNO(setno)) { - md_upd_set_unnext(setno, MD_MIN2UNIT(mnum)); - } - - /* release locks and return */ -out: - rw_exit(&md_unit_array_rw.lock); - return (0); -} - - -/* - * FUNCTION: sp_grow() - * INPUT: d - data ptr. - * mode - pass-through to ddi_copyin. - * lockp - lock ptr. - * OUTPUT: none. - * RETURNS: 0 - success. - * non-zero - error. - * PURPOSE: Attach more space to a soft partition. We are passed in - * a new unit structure with the new extents and other updated - * information. The new unit structure essentially replaces - * the old unit for this soft partition. We place the new - * unit into the metadb, delete the old metadb record, and - * then update the in-core unit structure array to point to - * the new unit. - */ -static int -sp_grow(void *d, int mode, IOLOCK *lockp) -{ - minor_t mnum; - mp_unit_t *un, *new_un; - mdi_unit_t *ui; - minor_t *par = NULL; - IOLOCK *plock = NULL; - int i; - mddb_recid_t recid; - mddb_type_t rec_type; - mddb_recid_t old_vtoc = 0; - md_create_rec_option_t options; - int err; - int rval = 0; - set_t setno; - md_error_t *mdep; - int npar; - md_grow_params_t *mgp = (md_grow_params_t *)d; - - mnum = mgp->mnum; - mdep = &mgp->mde; - setno = MD_MIN2SET(mnum); - npar = mgp->npar; - - mdclrerror(mdep); - - /* validate set */ - if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits)) - return (mdmderror(mdep, MDE_INVAL_UNIT, mnum)); - if (md_get_setstatus(setno) & MD_SET_STALE) - return (mdmddberror(mdep, MDE_DB_STALE, mnum, setno)); - - /* make sure this soft partition already exists */ - ui = MDI_UNIT(mnum); - if (ui == NULL) - return (mdmderror(mdep, MDE_UNIT_NOT_SETUP, mnum)); - - /* handle any parents */ - if (npar >= 1) { - ASSERT((minor_t *)(uintptr_t)mgp->par != NULL); - par = kmem_alloc(npar * sizeof (*par), KM_SLEEP); - plock = kmem_alloc(npar * sizeof (*plock), KM_SLEEP); - if (ddi_copyin((void *)(uintptr_t)mgp->par, par, - (npar * sizeof (*par)), mode) != 0) { - kmem_free(par, npar * sizeof (*par)); - kmem_free(plock, npar * sizeof (*plock)); - return (EFAULT); - } - } - - /* - * handle parent locking. grab the unit writer lock, - * then all parent ioctl locks, and then finally our own. - * parents should be sorted to avoid deadlock. - */ - rw_enter(&md_unit_array_rw.lock, RW_WRITER); - for (i = 0; i < npar; ++i) { - (void) md_ioctl_writerlock(&plock[i], - MDI_UNIT(par[i])); - } - un = (mp_unit_t *)md_ioctl_writerlock(lockp, ui); - - rec_type = (mddb_type_t)md_getshared_key(setno, - sp_md_ops.md_driver.md_drivername); - - /* - * Preserve the friendly name nature of the unit that is growing. - */ - options = MD_CRO_SOFTPART; - if (un->c.un_revision & MD_FN_META_DEV) - options |= MD_CRO_FN; - if (mgp->options & MD_CRO_64BIT) { -#if defined(_ILP32) - rval = mdmderror(mdep, MDE_UNIT_TOO_LARGE, mnum); - goto out; -#else - recid = mddb_createrec((size_t)mgp->size, rec_type, 0, - MD_CRO_64BIT | options, setno); -#endif - } else { - recid = mddb_createrec((size_t)mgp->size, rec_type, 0, - MD_CRO_32BIT | options, setno); - } - if (recid < 0) { - rval = mddbstatus2error(mdep, (int)recid, mnum, setno); - goto out; - } - - /* get the address of the new unit */ - new_un = (mp_unit_t *)mddb_getrecaddr(recid); - - /* copy in the user's unit struct */ - err = ddi_copyin((void *)(uintptr_t)mgp->mdp, new_un, - (size_t)mgp->size, mode); - if (err) { - mddb_deleterec_wrapper(recid); - rval = EFAULT; - goto out; - } - if (options & MD_CRO_FN) - new_un->c.un_revision |= MD_FN_META_DEV; - - /* All 64 bit metadevices only support EFI labels. */ - if (mgp->options & MD_CRO_64BIT) { - new_un->c.un_flag |= MD_EFILABEL; - /* - * If the device was previously smaller than a terabyte, - * and had a vtoc record attached to it, we remove the - * vtoc record, because the layout has changed completely. - */ - if (((un->c.un_revision & MD_64BIT_META_DEV) == 0) && - (un->c.un_vtoc_id != 0)) { - old_vtoc = un->c.un_vtoc_id; - new_un->c.un_vtoc_id = - md_vtoc_to_efi_record(old_vtoc, setno); - } - } - - /* commit new unit struct */ - MD_RECID(new_un) = recid; - mddb_commitrec_wrapper(recid); - - /* - * delete old unit struct. - */ - mddb_deleterec_wrapper(MD_RECID(un)); - - /* place new unit in in-core array */ - md_nblocks_set(mnum, new_un->c.un_total_blocks); - MD_UNIT(mnum) = new_un; - - SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_GROW, TAG_METADEVICE, - MD_UN2SET(new_un), MD_SID(new_un)); - - /* - * If old_vtoc has a non zero value, we know: - * - This unit crossed the border from smaller to larger one TB - * - There was a vtoc record for the unit, - * - This vtoc record is no longer needed, because - * a new efi record has been created for this un. - */ - if (old_vtoc != 0) { - mddb_deleterec_wrapper(old_vtoc); - } - - /* release locks, return success */ -out: - for (i = npar - 1; (i >= 0); --i) - md_ioctl_writerexit(&plock[i]); - rw_exit(&md_unit_array_rw.lock); - if (plock != NULL) - kmem_free(plock, npar * sizeof (*plock)); - if (par != NULL) - kmem_free(par, npar * sizeof (*par)); - return (rval); -} - -/* - * FUNCTION: sp_getdevs() - * INPUT: d - data ptr. - * mode - pass-through to ddi_copyout. - * lockp - lock ptr. - * OUTPUT: none. - * RETURNS: 0 - success. - * non-zero - error. - * PURPOSE: Get the device on which the soft partition is built. - * This is simply a matter of copying out the md_dev64_t stored - * in the soft partition unit structure. - */ -static int -sp_getdevs( - void *d, - int mode, - IOLOCK *lockp -) -{ - minor_t mnum; - mdi_unit_t *ui; - mp_unit_t *un; - md_error_t *mdep; - md_dev64_t *devsp; - md_dev64_t unit_dev; - md_getdevs_params_t *mgdp = (md_getdevs_params_t *)d; - - - mnum = mgdp->mnum; - mdep = &(mgdp->mde); - - mdclrerror(mdep); - - /* check set */ - if ((MD_MIN2SET(mnum) >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits)) - return (mdmderror(mdep, MDE_INVAL_UNIT, mnum)); - /* check unit */ - if ((ui = MDI_UNIT(mnum)) == NULL) { - return (mdmderror(mdep, MDE_UNIT_NOT_SETUP, mnum)); - } - /* get unit */ - un = (mp_unit_t *)md_ioctl_readerlock(lockp, ui); - devsp = (md_dev64_t *)(uintptr_t)mgdp->devs; - - /* only ever 1 device for a soft partition */ - if (mgdp->cnt != 0) { - /* do miniroot->target device translation */ - unit_dev = un->un_dev; - if (md_getmajor(unit_dev) != md_major) { - if ((unit_dev = md_xlate_mini_2_targ(unit_dev)) - == NODEV64) - return (ENODEV); - } - /* copyout dev information */ - if (ddi_copyout(&unit_dev, devsp, sizeof (*devsp), mode) != 0) - return (EFAULT); - } - mgdp->cnt = 1; - - return (0); -} - -/* - * sp_set_capability: - * ------------------ - * Called to set or clear a capability for a softpart - * called by the MD_MN_SET_CAP ioctl. - */ -static int -sp_set_capability(md_mn_setcap_params_t *p, IOLOCK *lockp) -{ - set_t setno; - mdi_unit_t *ui; - mp_unit_t *un; - int err = 0; - - if ((un = sp_getun(p->mnum, &p->mde)) == NULL) - return (EINVAL); - - /* This function is only valid for a multi-node set */ - setno = MD_MIN2SET(p->mnum); - if (!MD_MNSET_SETNO(setno)) { - return (EINVAL); - } - ui = MDI_UNIT(p->mnum); - (void) md_ioctl_readerlock(lockp, ui); - - if (p->sc_set & DKV_ABR_CAP) { - void (*inc_abr_count)(); - - ui->ui_tstate |= MD_ABR_CAP; /* Set ABR capability */ - /* Increment abr count in underlying metadevice */ - inc_abr_count = (void(*)())md_get_named_service(un->un_dev, - 0, MD_INC_ABR_COUNT, 0); - if (inc_abr_count != NULL) - (void) (*inc_abr_count)(un->un_dev); - } else { - void (*dec_abr_count)(); - - ui->ui_tstate &= ~MD_ABR_CAP; /* Clear ABR capability */ - /* Decrement abr count in underlying metadevice */ - dec_abr_count = (void(*)())md_get_named_service(un->un_dev, - 0, MD_DEC_ABR_COUNT, 0); - if (dec_abr_count != NULL) - (void) (*dec_abr_count)(un->un_dev); - } - if (p->sc_set & DKV_DMR_CAP) { - ui->ui_tstate |= MD_DMR_CAP; /* Set DMR capability */ - } else { - ui->ui_tstate &= ~MD_DMR_CAP; /* Clear DMR capability */ - } - md_ioctl_readerexit(lockp); - return (err); -} - - -/* - * FUNCTION: sp_admin_ioctl(). - * INPUT: cmd - ioctl to be handled. - * data - data ptr. - * mode - pass-through to copyin/copyout routines. - * lockp - lock ptr. - * OUTPUT: none. - * RETURNS: 0 - success. - * non-zero - error. - * PURPOSE: Handle administrative ioctl's. Essentially a large - * switch statement to dispatch the ioctl's to their - * handlers. See comment at beginning of file for specifics - * on which ioctl's are handled. - */ -static int -sp_admin_ioctl(int cmd, void *data, int mode, IOLOCK *lockp) -{ - size_t sz = 0; - void *d = NULL; - int err = 0; - - /* We can only handle 32-bit clients for internal commands */ - if ((mode & DATAMODEL_MASK) != DATAMODEL_ILP32) { - return (EINVAL); - } - - /* handle ioctl */ - switch (cmd) { - - case MD_IOCSET: - { - /* create new soft partition */ - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (md_set_params_t); - - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = sp_set(d, mode); - break; - } - - case MD_IOCGET: - { - /* get soft partition unit structure */ - if (! (mode & FREAD)) - return (EACCES); - - sz = sizeof (md_i_get_t); - - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = sp_get(d, mode, lockp); - break; - } - case MD_IOCRESET: - { - /* delete soft partition */ - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (md_sp_reset_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = sp_reset((md_sp_reset_t *)d); - break; - } - - case MD_IOCGROW: - { - /* grow soft partition */ - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (md_grow_params_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = sp_grow(d, mode, lockp); - break; - } - - case MD_IOCGET_DEVS: - { - /* get underlying device */ - if (! (mode & FREAD)) - return (EACCES); - - sz = sizeof (md_getdevs_params_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = sp_getdevs(d, mode, lockp); - break; - } - - case MD_IOC_SPSTATUS: - { - /* set the status field of one or more soft partitions */ - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (md_sp_statusset_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = sp_setstatus(d, mode, lockp); - break; - } - - case MD_IOC_SPUPDATEWM: - case MD_MN_IOC_SPUPDATEWM: - { - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (md_sp_update_wm_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = sp_update_watermarks(d, mode); - break; - } - - case MD_IOC_SPREADWM: - { - if (! (mode & FREAD)) - return (EACCES); - - sz = sizeof (md_sp_read_wm_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = sp_read_watermark(d, mode); - break; - } - - case MD_MN_SET_CAP: - { - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (md_mn_setcap_params_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = sp_set_capability((md_mn_setcap_params_t *)d, lockp); - break; - } - - default: - return (ENOTTY); - } - - /* - * copyout and free any args - */ - if (sz != 0) { - if (err == 0) { - if (ddi_copyout(d, data, sz, mode) != 0) { - err = EFAULT; - } - } - kmem_free(d, sz); - } - return (err); -} - - -/* - * FUNCTION: md_sp_ioctl() - * INPUT: dev - device we are operating on. - * cmd - ioctl to be handled. - * data - data ptr. - * mode - pass-through to copyin/copyout routines. - * lockp - lock ptr. - * OUTPUT: none. - * RETURNS: 0 - success. - * non-zero - error. - * PURPOSE: Dispatch ioctl's. Administrative ioctl's are handled - * by sp_admin_ioctl. All others (see comment at beginning - * of this file) are handled in-line here. - */ -int -md_sp_ioctl(dev_t dev, int cmd, void *data, int mode, IOLOCK *lockp) -{ - minor_t mnum = getminor(dev); - mp_unit_t *un; - mdi_unit_t *ui; - int err = 0; - - /* handle admin ioctls */ - if (mnum == MD_ADM_MINOR) - return (sp_admin_ioctl(cmd, data, mode, lockp)); - - /* check unit */ - if ((MD_MIN2SET(mnum) >= md_nsets) || - (MD_MIN2UNIT(mnum) >= md_nunits) || - ((ui = MDI_UNIT(mnum)) == NULL) || - ((un = MD_UNIT(mnum)) == NULL)) - return (ENXIO); - - /* is this a supported ioctl? */ - err = md_check_ioctl_against_unit(cmd, un->c); - if (err != 0) { - return (err); - } - - - /* handle ioctl */ - switch (cmd) { - - case DKIOCINFO: - { - /* "disk" info */ - struct dk_cinfo *p; - - if (! (mode & FREAD)) - return (EACCES); - - p = kmem_alloc(sizeof (*p), KM_SLEEP); - - get_info(p, mnum); - if (ddi_copyout((caddr_t)p, data, sizeof (*p), mode) != 0) - err = EFAULT; - - kmem_free(p, sizeof (*p)); - return (err); - } - - case DKIOCGMEDIAINFO: - { - struct dk_minfo p; - - if (! (mode & FREAD)) - return (EACCES); - - get_minfo(&p, mnum); - if (ddi_copyout(&p, data, sizeof (struct dk_minfo), mode) != 0) - err = EFAULT; - - return (err); - } - - case DKIOCGGEOM: - { - /* geometry information */ - struct dk_geom *p; - - if (! (mode & FREAD)) - return (EACCES); - - p = kmem_alloc(sizeof (*p), KM_SLEEP); - - md_get_geom((md_unit_t *)un, p); - if (ddi_copyout((caddr_t)p, data, sizeof (*p), - mode) != 0) - err = EFAULT; - - kmem_free(p, sizeof (*p)); - return (err); - } - case DKIOCGAPART: - { - struct dk_map dmp; - - err = 0; - md_get_cgapart((md_unit_t *)un, &dmp); - - if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) { - if (ddi_copyout((caddr_t)&dmp, data, sizeof (dmp), - mode) != 0) - err = EFAULT; - } -#ifdef _SYSCALL32 - else { - struct dk_map32 dmp32; - - dmp32.dkl_cylno = dmp.dkl_cylno; - dmp32.dkl_nblk = dmp.dkl_nblk; - - if (ddi_copyout((caddr_t)&dmp32, data, sizeof (dmp32), - mode) != 0) - err = EFAULT; - } -#endif /* _SYSCALL32 */ - - return (err); - } - case DKIOCGVTOC: - { - /* vtoc information */ - struct vtoc *vtoc; - - if (! (mode & FREAD)) - return (EACCES); - - vtoc = kmem_zalloc(sizeof (*vtoc), KM_SLEEP); - md_get_vtoc((md_unit_t *)un, vtoc); - - if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) { - if (ddi_copyout(vtoc, data, sizeof (*vtoc), mode)) - err = EFAULT; - } -#ifdef _SYSCALL32 - else { - struct vtoc32 *vtoc32; - - vtoc32 = kmem_zalloc(sizeof (*vtoc32), KM_SLEEP); - - vtoctovtoc32((*vtoc), (*vtoc32)); - if (ddi_copyout(vtoc32, data, sizeof (*vtoc32), mode)) - err = EFAULT; - kmem_free(vtoc32, sizeof (*vtoc32)); - } -#endif /* _SYSCALL32 */ - - kmem_free(vtoc, sizeof (*vtoc)); - return (err); - } - - case DKIOCSVTOC: - { - struct vtoc *vtoc; - - if (! (mode & FWRITE)) - return (EACCES); - - vtoc = kmem_zalloc(sizeof (*vtoc), KM_SLEEP); - if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) { - if (ddi_copyin(data, vtoc, sizeof (*vtoc), mode)) { - err = EFAULT; - } - } -#ifdef _SYSCALL32 - else { - struct vtoc32 *vtoc32; - - vtoc32 = kmem_zalloc(sizeof (*vtoc32), KM_SLEEP); - - if (ddi_copyin(data, vtoc32, sizeof (*vtoc32), mode)) { - err = EFAULT; - } else { - vtoc32tovtoc((*vtoc32), (*vtoc)); - } - kmem_free(vtoc32, sizeof (*vtoc32)); - } -#endif /* _SYSCALL32 */ - - if (err == 0) - err = md_set_vtoc((md_unit_t *)un, vtoc); - - kmem_free(vtoc, sizeof (*vtoc)); - return (err); - } - - case DKIOCGEXTVTOC: - { - /* extended vtoc information */ - struct extvtoc *extvtoc; - - if (! (mode & FREAD)) - return (EACCES); - - extvtoc = kmem_zalloc(sizeof (*extvtoc), KM_SLEEP); - md_get_extvtoc((md_unit_t *)un, extvtoc); - - if (ddi_copyout(extvtoc, data, sizeof (*extvtoc), mode)) - err = EFAULT; - - kmem_free(extvtoc, sizeof (*extvtoc)); - return (err); - } - - case DKIOCSEXTVTOC: - { - struct extvtoc *extvtoc; - - if (! (mode & FWRITE)) - return (EACCES); - - extvtoc = kmem_zalloc(sizeof (*extvtoc), KM_SLEEP); - if (ddi_copyin(data, extvtoc, sizeof (*extvtoc), mode)) { - err = EFAULT; - } - - if (err == 0) - err = md_set_extvtoc((md_unit_t *)un, extvtoc); - - kmem_free(extvtoc, sizeof (*extvtoc)); - return (err); - } - - case DKIOCGETEFI: - { - /* - * This one can be done centralized, - * no need to put in the same code for all types of metadevices - */ - return (md_dkiocgetefi(mnum, data, mode)); - } - case DKIOCSETEFI: - { - /* - * This one can be done centralized, - * no need to put in the same code for all types of metadevices - */ - return (md_dkiocsetefi(mnum, data, mode)); - } - - case DKIOCPARTITION: - { - return (md_dkiocpartition(mnum, data, mode)); - } - - case DKIOCGETVOLCAP: - { - /* - * Return the supported capabilities for the soft-partition. - * We can only support those caps that are provided by the - * underlying device. - */ - - volcap_t vc; - - if (!MD_MNSET_SETNO(MD_MIN2SET(mnum))) - return (EINVAL); - - if (! (mode & FREAD)) - return (EACCES); - - bzero(&vc, sizeof (vc)); - - /* Send ioctl to underlying driver */ - - err = md_call_ioctl(un->un_dev, cmd, &vc, (mode | FKIOCTL), - lockp); - - if (err == 0) - ui->ui_capab = vc.vc_info; - - if (ddi_copyout(&vc, data, sizeof (vc), mode)) - err = EFAULT; - - return (err); - } - - case DKIOCSETVOLCAP: - { - /* - * Enable a supported capability (as returned by DKIOCGETVOLCAP) - * Do not pass the request down as we're the top-level device - * handler for the application. - * If the requested capability is supported (set in ui_capab), - * set the corresponding bit in ui_tstate so that we can pass - * the appropriate flag when performing i/o. - * This request is propagated to all nodes. - */ - volcap_t vc, vc1; - volcapset_t volcap = 0; - void (*check_offline)(); - int offline_status = 0; - - if (!MD_MNSET_SETNO(MD_MIN2SET(mnum))) - return (EINVAL); - - if (! (mode & FWRITE)) - return (EACCES); - - if (ddi_copyin(data, &vc, sizeof (vc), mode)) - return (EFAULT); - - /* - * Send DKIOCGETVOLCAP to underlying driver to see if - * capability supported - */ - - vc1.vc_info = 0; - err = md_call_ioctl(un->un_dev, DKIOCGETVOLCAP, &vc1, - (mode | FKIOCTL), lockp); - if (err != 0) - return (err); - - /* Save capabilities */ - ui->ui_capab = vc1.vc_info; - /* - * Error if required capability not supported by underlying - * driver - */ - if ((vc1.vc_info & vc.vc_set) == 0) - return (ENOTSUP); - - - /* - * Check if underlying mirror has an offline submirror, - * fail if there is on offline submirror - */ - check_offline = (void(*)())md_get_named_service(un->un_dev, - 0, MD_CHECK_OFFLINE, 0); - if (check_offline != NULL) - (void) (*check_offline)(un->un_dev, &offline_status); - if (offline_status) - return (EINVAL); - - if (ui->ui_tstate & MD_ABR_CAP) - volcap |= DKV_ABR_CAP; - - /* Only send capability message if there is a change */ - if ((vc.vc_set & (DKV_ABR_CAP)) != volcap) - err = mdmn_send_capability_message(mnum, vc, lockp); - return (err); - } - - case DKIOCDMR: - { - /* - * Only valid for MN sets. We need to pass it down to the - * underlying driver if its a metadevice, after we've modified - * the offsets to pick up the correct lower-level device - * position. - */ - vol_directed_rd_t *vdr; -#ifdef _MULTI_DATAMODEL - vol_directed_rd32_t *vdr32; -#endif /* _MULTI_DATAMODEL */ - - if (!MD_MNSET_SETNO(MD_MIN2SET(mnum))) - return (EINVAL); - - if (! (ui->ui_capab & DKV_DMR_CAP)) - return (EINVAL); - - vdr = kmem_zalloc(sizeof (vol_directed_rd_t), KM_NOSLEEP); - if (vdr == NULL) - return (ENOMEM); - - /* - * Underlying device supports directed mirror read, so update - * the user-supplied offset to pick the correct block from the - * partitioned metadevice. - */ -#ifdef _MULTI_DATAMODEL - vdr32 = kmem_zalloc(sizeof (vol_directed_rd32_t), KM_NOSLEEP); - if (vdr32 == NULL) { - kmem_free(vdr, sizeof (vol_directed_rd_t)); - return (ENOMEM); - } - - switch (ddi_model_convert_from(mode & FMODELS)) { - case DDI_MODEL_ILP32: - if (ddi_copyin(data, vdr32, sizeof (*vdr32), mode)) { - kmem_free(vdr, sizeof (*vdr)); - return (EFAULT); - } - vdr->vdr_flags = vdr32->vdr_flags; - vdr->vdr_offset = vdr32->vdr_offset; - vdr->vdr_nbytes = vdr32->vdr_nbytes; - vdr->vdr_data = (void *)(uintptr_t)vdr32->vdr_data; - vdr->vdr_side = vdr32->vdr_side; - break; - - case DDI_MODEL_NONE: - if (ddi_copyin(data, vdr, sizeof (*vdr), mode)) { - kmem_free(vdr32, sizeof (*vdr32)); - kmem_free(vdr, sizeof (*vdr)); - return (EFAULT); - } - break; - - default: - kmem_free(vdr32, sizeof (*vdr32)); - kmem_free(vdr, sizeof (*vdr)); - return (EFAULT); - } -#else /* ! _MULTI_DATAMODEL */ - if (ddi_copyin(data, vdr, sizeof (*vdr), mode)) { - kmem_free(vdr, sizeof (*vdr)); - return (EFAULT); - } -#endif /* _MULTI_DATA_MODEL */ - - err = sp_directed_read(mnum, vdr, mode); - - -#ifdef _MULTI_DATAMODEL - switch (ddi_model_convert_from(mode & FMODELS)) { - case DDI_MODEL_ILP32: - vdr32->vdr_flags = vdr->vdr_flags; - vdr32->vdr_offset = vdr->vdr_offset; - vdr32->vdr_side = vdr->vdr_side; - vdr32->vdr_bytesread = vdr->vdr_bytesread; - bcopy(vdr->vdr_side_name, vdr32->vdr_side_name, - sizeof (vdr32->vdr_side_name)); - - if (ddi_copyout(vdr32, data, sizeof (*vdr32), mode)) - err = EFAULT; - break; - - case DDI_MODEL_NONE: - if (ddi_copyout(&vdr, data, sizeof (vdr), mode)) - err = EFAULT; - break; - } -#else /* ! _MULTI_DATA_MODEL */ - if (ddi_copyout(&vdr, data, sizeof (vdr), mode)) - err = EFAULT; -#endif /* _MULTI_DATA_MODEL */ - -#ifdef _MULTI_DATAMODEL - kmem_free(vdr32, sizeof (*vdr32)); -#endif /* _MULTI_DATAMODEL */ - kmem_free(vdr, sizeof (*vdr)); - - return (err); - } - - } - - /* Option not handled */ - return (ENOTTY); -} diff --git a/usr/src/uts/common/io/lvm/stripe/stripe.c b/usr/src/uts/common/io/lvm/stripe/stripe.c deleted file mode 100644 index 79c959097496..000000000000 --- a/usr/src/uts/common/io/lvm/stripe/stripe.c +++ /dev/null @@ -1,1796 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011 Bayard G. Bell. All rights reserved. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -md_ops_t stripe_md_ops; -#ifndef lint -md_ops_t *md_interface_ops = &stripe_md_ops; -#endif - -extern unit_t md_nunits; -extern set_t md_nsets; -extern md_set_t md_set[]; - -extern kmutex_t md_mx; -extern kcondvar_t md_cv; - -extern int md_status; -extern major_t md_major; -extern mdq_anchor_t md_done_daemon; - -static int md_stripe_mcs_buf_off; -static kmem_cache_t *stripe_parent_cache = NULL; -static kmem_cache_t *stripe_child_cache = NULL; - -/*ARGSUSED1*/ -static int -stripe_parent_constructor(void *p, void *d1, int d2) -{ - mutex_init(&((md_sps_t *)p)->ps_mx, - NULL, MUTEX_DEFAULT, NULL); - return (0); -} - -static void -stripe_parent_init(void *ps) -{ - bzero(ps, offsetof(md_sps_t, ps_mx)); -} - -/*ARGSUSED1*/ -static void -stripe_parent_destructor(void *p, void *d) -{ - mutex_destroy(&((md_sps_t *)p)->ps_mx); -} - -/*ARGSUSED1*/ -static int -stripe_child_constructor(void *p, void *d1, int d2) -{ - bioinit(&((md_scs_t *)p)->cs_buf); - return (0); -} - -static void -stripe_child_init(md_scs_t *cs) -{ - cs->cs_mdunit = 0; - cs->cs_ps = NULL; - cs->cs_comp = NULL; - md_bioreset(&cs->cs_buf); -} - -/*ARGSUSED1*/ -static void -stripe_child_destructor(void *p, void *d) -{ - biofini(&((md_scs_t *)p)->cs_buf); -} - -/*ARGSUSED*/ -static void -stripe_run_queue(void *d) -{ - if (!(md_status & MD_GBL_DAEMONS_LIVE)) - md_daemon(1, &md_done_daemon); -} - -static void -stripe_close_all_devs(ms_unit_t *un, int md_cflags) -{ - int row; - int i; - int c; - struct ms_comp *mdcomp; - - mdcomp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]); - for (row = 0; row < un->un_nrows; row++) { - struct ms_row *mdr = &un->un_row[row]; - for (i = 0, c = mdr->un_icomp; i < mdr->un_ncomp; i++) { - struct ms_comp *mdc; - mdc = &mdcomp[c++]; - if (md_cflags & MD_OFLG_PROBEDEV) { - - /* - * It is possible that the md_layered_open - * failed because the stripe unit structure - * contained a NODEV. In such a case since - * there is nothing to open, there is nothing - * to close. - */ - if (mdc->un_dev == NODEV64) - continue; - } - if ((md_cflags & MD_OFLG_PROBEDEV) && - (mdc->un_mirror.ms_flags & MDM_S_PROBEOPEN)) { - md_layered_close(mdc->un_dev, - md_cflags); - mdc->un_mirror.ms_flags &= ~MDM_S_PROBEOPEN; - } else if (mdc->un_mirror.ms_flags & MDM_S_ISOPEN) { - md_layered_close(mdc->un_dev, md_cflags); - mdc->un_mirror.ms_flags &= ~MDM_S_ISOPEN; - } - } - } -} - -static int -stripe_open_all_devs(ms_unit_t *un, int md_oflags) -{ - minor_t mnum = MD_SID(un); - int row; - int i; - int c; - struct ms_comp *mdcomp; - int err; - int cont_on_errors = (md_oflags & MD_OFLG_CONT_ERRS); - int probe_err_cnt = 0; - int total_comp_cnt = 0; - set_t setno = MD_MIN2SET(MD_SID(un)); - side_t side = mddb_getsidenum(setno); - mdkey_t key; - - mdcomp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]); - - /* - * For a probe call, if any component of a stripe or a concat - * can be opened, it is considered to be a success. The total number - * of components in a stripe are computed prior to starting a probe. - * This number is then compared against the number of components - * that could be be successfully opened. If none of the components - * in a stripe can be opened, only then an ENXIO is returned for a - * probe type open. - */ - - for (row = 0; row < un->un_nrows; row++) { - struct ms_row *mdr = &un->un_row[row]; - - if (md_oflags & MD_OFLG_PROBEDEV) - total_comp_cnt += mdr->un_ncomp; - - for (i = 0, c = mdr->un_icomp; i < mdr->un_ncomp; i++) { - struct ms_comp *mdc; - md_dev64_t tmpdev; - - mdc = &mdcomp[c++]; - tmpdev = mdc->un_dev; - /* - * Do the open by device id - * Check if this comp is hotspared and - * if it is then use the key for hotspare. - * MN disksets don't use devids, so we better don't use - * md_devid_found/md_resolve_bydevid there. Rather do, - * what's done in stripe_build_incore() - */ - if (MD_MNSET_SETNO(setno)) { - if (mdc->un_mirror.ms_hs_id != 0) { - (void) md_hot_spare_ifc(HS_MKDEV, 0, 0, - 0, &mdc->un_mirror.ms_hs_id, NULL, - &tmpdev, NULL); - } - } else { - key = mdc->un_mirror.ms_hs_id ? - mdc->un_mirror.ms_hs_key : mdc->un_key; - if ((md_getmajor(tmpdev) != md_major) && - md_devid_found(setno, side, key) == 1) { - tmpdev = md_resolve_bydevid(mnum, - tmpdev, key); - } - } - - /* - * For a submirror, we only want to open those devices - * that are not errored. If the device is errored then - * then there is no reason to open it and leaving it - * closed allows the RCM/DR code to work so that the - * errored device can be replaced. - */ - if ((md_oflags & MD_OFLG_PROBEDEV) || - ! (mdc->un_mirror.ms_state & CS_ERRED)) { - - err = md_layered_open(mnum, &tmpdev, md_oflags); - } else { - err = ENXIO; - } - - /* - * Only set the un_dev if the tmpdev != NODEV64. If - * it is NODEV64 then the md_layered_open() will have - * failed in some manner. - */ - if (tmpdev != NODEV64) - mdc->un_dev = tmpdev; - - if (err) { - if (!cont_on_errors) { - stripe_close_all_devs(un, md_oflags); - return (ENXIO); - } - - if (md_oflags & MD_OFLG_PROBEDEV) - probe_err_cnt++; - } else { - if (md_oflags & MD_OFLG_PROBEDEV) { - mdc->un_mirror.ms_flags |= - MDM_S_PROBEOPEN; - } else - mdc->un_mirror.ms_flags |= MDM_S_ISOPEN; - } - } - } - - /* If every component in a stripe could not be opened fail */ - if ((md_oflags & MD_OFLG_PROBEDEV) && - (probe_err_cnt == total_comp_cnt)) - return (ENXIO); - else - return (0); -} - -int -stripe_build_incore(void *p, int snarfing) -{ - ms_unit_t *un = (ms_unit_t *)p; - struct ms_comp *mdcomp; - minor_t mnum; - int row; - int i; - int c; - int ncomps; - - mnum = MD_SID(un); - - if (MD_UNIT(mnum) != NULL) - return (0); - - MD_STATUS(un) = 0; - - /* - * Reset all the is_open flags, these are probably set - * cause they just came out of the database. - */ - mdcomp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]); - - ncomps = 0; - for (row = 0; row < un->un_nrows; row++) { - struct ms_row *mdr = &un->un_row[row]; - ncomps += mdr->un_ncomp; - } - - for (row = 0; row < un->un_nrows; row++) { - struct ms_row *mdr = &un->un_row[row]; - for (i = 0, c = mdr->un_icomp; i < mdr->un_ncomp; i++) { - struct ms_comp *mdc; - set_t setno; - md_dev64_t tmpdev; - - mdc = &mdcomp[c++]; - mdc->un_mirror.ms_flags &= - ~(MDM_S_ISOPEN | MDM_S_IOERR | MDM_S_RS_TRIED); - - if (!snarfing) - continue; - - setno = MD_MIN2SET(mnum); - - tmpdev = md_getdevnum(setno, mddb_getsidenum(setno), - mdc->un_key, MD_NOTRUST_DEVT); - mdc->un_dev = tmpdev; - /* - * Check for hotspares. If the hotspares haven't been - * snarfed yet, stripe_open_all_devs() will do the - * remapping of the dev's later. - */ - if (mdc->un_mirror.ms_hs_id != 0) { - mdc->un_mirror.ms_orig_dev = mdc->un_dev; - (void) md_hot_spare_ifc(HS_MKDEV, 0, 0, - 0, &mdc->un_mirror.ms_hs_id, NULL, - &tmpdev, NULL); - mdc->un_dev = tmpdev; - } - } - } - - /* place various information in the in-core data structures */ - md_nblocks_set(mnum, un->c.un_total_blocks); - MD_UNIT(mnum) = un; - - return (0); -} - -void -reset_stripe(ms_unit_t *un, minor_t mnum, int removing) -{ - ms_comp_t *mdcomp; - struct ms_row *mdr; - int i, c; - int row; - int nsv; - int isv; - sv_dev_t *sv; - mddb_recid_t *recids; - mddb_recid_t vtoc_id; - int rid = 0; - - md_destroy_unit_incore(mnum, &stripe_md_ops); - - md_nblocks_set(mnum, -1ULL); - MD_UNIT(mnum) = NULL; - - /* - * Attempt release of its minor node - */ - md_remove_minor_node(mnum); - - if (!removing) - return; - - nsv = 0; - /* Count the number of devices */ - for (row = 0; row < un->un_nrows; row++) { - mdr = &un->un_row[row]; - nsv += mdr->un_ncomp; - } - sv = (sv_dev_t *)kmem_alloc(sizeof (sv_dev_t) * nsv, KM_SLEEP); - - /* - * allocate recids array. since we may have to commit - * underlying soft partition records, we need an array - * of size: total number of components in stripe + 3 - * (one for the stripe itself, one for the hotspare, one - * for the end marker). - */ - recids = kmem_alloc(sizeof (mddb_recid_t) * (nsv + 3), KM_SLEEP); - - /* - * Save the md_dev64_t's and driver nm indexes. - * Because after the mddb_deleterec() we will - * not be able to access the unit structure. - * - * NOTE: Deleting the names before deleting the - * unit structure would cause problems if - * the machine crashed in between the two. - */ - isv = 0; - mdcomp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]); - - for (row = 0; row < un->un_nrows; row++) { - mdr = &un->un_row[row]; - for (i = 0, c = mdr->un_icomp; i < mdr->un_ncomp; i++) { - struct ms_comp *mdc; - md_dev64_t child_dev; - md_unit_t *child_un; - - mdc = &mdcomp[c++]; - if (mdc->un_mirror.ms_hs_id != 0) { - mdkey_t hs_key; - - hs_key = mdc->un_mirror.ms_hs_key; - - mdc->un_dev = mdc->un_mirror.ms_orig_dev; - mdc->un_start_block = - mdc->un_mirror.ms_orig_blk; - mdc->un_mirror.ms_hs_id = 0; - mdc->un_mirror.ms_hs_key = 0; - mdc->un_mirror.ms_orig_dev = 0; - recids[0] = 0; - recids[1] = 0; /* recids[1] filled in below */ - recids[2] = 0; - (void) md_hot_spare_ifc(HS_FREE, un->un_hsp_id, - 0, 0, &recids[0], &hs_key, NULL, NULL); - mddb_commitrecs_wrapper(recids); - } - - /* - * check if we've got metadevice below us and - * deparent it if we do. - * NOTE: currently soft partitions are the - * the only metadevices stripes can be - * built on top of. - */ - child_dev = mdc->un_dev; - if (md_getmajor(child_dev) == md_major) { - child_un = MD_UNIT(md_getminor(child_dev)); - md_reset_parent(child_dev); - recids[rid++] = MD_RECID(child_un); - } - - sv[isv].setno = MD_MIN2SET(mnum); - sv[isv++].key = mdc->un_key; - } - } - - recids[rid++] = un->c.un_record_id; - recids[rid] = 0; /* filled in below */ - - /* - * Decrement the HSP reference count and - * remove the knowledge of the HSP from the unit struct. - * This is done atomically to remove a window. - */ - if (un->un_hsp_id != -1) { - (void) md_hot_spare_ifc(HSP_DECREF, un->un_hsp_id, 0, 0, - &recids[rid++], NULL, NULL, NULL); - un->un_hsp_id = -1; - } - - /* set end marker and commit records */ - recids[rid] = 0; - mddb_commitrecs_wrapper(recids); - - vtoc_id = un->c.un_vtoc_id; - - /* - * Remove self from the namespace - */ - if (un->c.un_revision & MD_FN_META_DEV) { - (void) md_rem_selfname(un->c.un_self_id); - } - - /* Remove the unit structure */ - mddb_deleterec_wrapper(un->c.un_record_id); - - /* Remove the vtoc, if present */ - if (vtoc_id) - mddb_deleterec_wrapper(vtoc_id); - - SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DELETE, SVM_TAG_METADEVICE, - MD_MIN2SET(mnum), MD_MIN2UNIT(mnum)); - md_rem_names(sv, nsv); - kmem_free(sv, sizeof (sv_dev_t) * nsv); - kmem_free(recids, sizeof (mddb_recid_t) * (nsv + 3)); -} - -static void -stripe_error(md_sps_t *ps) -{ - struct buf *pb = ps->ps_bp; - mdi_unit_t *ui = ps->ps_ui; - md_dev64_t dev = ps->ps_errcomp->un_dev; - md_dev64_t md_dev = md_expldev(pb->b_edev); - char *str; - - if (pb->b_flags & B_READ) { - ps->ps_errcomp->un_mirror.ms_flags |= MDM_S_READERR; - str = "read"; - } else { - ps->ps_errcomp->un_mirror.ms_flags |= MDM_S_WRTERR; - str = "write"; - } - if (!(ps->ps_flags & MD_SPS_DONTFREE)) { - if (MUTEX_HELD(&ps->ps_mx)) { - mutex_exit(&ps->ps_mx); - } - } else { - ASSERT(panicstr); - } - SPS_FREE(stripe_parent_cache, ps); - pb->b_flags |= B_ERROR; - - md_kstat_done(ui, pb, 0); - md_unit_readerexit(ui); - md_biodone(pb); - - cmn_err(CE_WARN, "md: %s: %s error on %s", - md_shortname(md_getminor(md_dev)), str, - md_devname(MD_DEV2SET(md_dev), dev, NULL, 0)); -} - -static int -stripe_done(struct buf *cb) -{ - struct buf *pb; - mdi_unit_t *ui; - md_sps_t *ps; - md_scs_t *cs; - - /*LINTED*/ - cs = (md_scs_t *)((caddr_t)cb - md_stripe_mcs_buf_off); - ps = cs->cs_ps; - pb = ps->ps_bp; - - mutex_enter(&ps->ps_mx); - if (cb->b_flags & B_ERROR) { - ps->ps_flags |= MD_SPS_ERROR; - pb->b_error = cb->b_error; - ps->ps_errcomp = cs->cs_comp; - } - - if (cb->b_flags & B_REMAPPED) - bp_mapout(cb); - - ps->ps_frags--; - if (ps->ps_frags != 0) { - mutex_exit(&ps->ps_mx); - kmem_cache_free(stripe_child_cache, cs); - return (1); - } - kmem_cache_free(stripe_child_cache, cs); - if (ps->ps_flags & MD_SPS_ERROR) { - stripe_error(ps); - return (1); - } - ui = ps->ps_ui; - if (!(ps->ps_flags & MD_SPS_DONTFREE)) { - mutex_exit(&ps->ps_mx); - } else { - ASSERT(panicstr); - } - SPS_FREE(stripe_parent_cache, ps); - md_kstat_done(ui, pb, 0); - md_unit_readerexit(ui); - md_biodone(pb); - return (0); -} - - -/* - * This routine does the mapping from virtual (dev, blkno) of a metapartition - * to the real (dev, blkno) of a real disk partition. - * It goes to the md_conf[] table to find out the correct real partition - * dev and block number for this buffer. - * - * A single buf request can not go across real disk partition boundary. - * When the virtual request specified by (dev, blkno) spans more than one - * real partition, md_mapbuf will return 1. Then the caller should prepare - * another real buf and continue calling md_mapbuf to do the mapping until - * it returns 0. - * - */ - -static int -md_mapbuf( - ms_unit_t *un, - diskaddr_t blkno, - u_longlong_t bcount, - buf_t *bp, /* if bp==NULL, skip bp updates */ - ms_comp_t **mdc) /* if bp==NULL, skip mdc update */ -{ - struct ms_row *mdr; - struct ms_comp *mdcomp; - diskaddr_t stripe_blk; - diskaddr_t fragment, blk_in_row, endblk; - offset_t interlace; - size_t dev_index; - int row_index, more; - extern unsigned md_maxphys; - /* Work var's when bp==NULL */ - u_longlong_t wb_bcount; - diskaddr_t wb_blkno; - md_dev64_t wb_edev; - ms_comp_t *wmdc; - - /* - * Do a real calculation to derive the minor device of the - * Virtual Disk, which in turn will let us derive the - * device/minor of the underlying real device. - */ - - - for (row_index = 0; row_index < un->un_nrows; row_index++) { - mdr = &un->un_row[row_index]; - if (blkno < mdr->un_cum_blocks) - break; - } - ASSERT(row_index != un->un_nrows); - - mdcomp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]); - - blk_in_row = blkno - mdr->un_cum_blocks + mdr->un_blocks; - endblk = (diskaddr_t)(blkno + howmany(bcount, DEV_BSIZE)); - if (mdr->un_ncomp == 1) { /* No striping */ - if (endblk > mdr->un_cum_blocks) { - wb_bcount = ldbtob(mdr->un_cum_blocks - blkno); - if ((row_index + 1) == un->un_nrows) - more = 0; - else - more = 1; - } else { - wb_bcount = bcount; - more = 0; - } - wmdc = &mdcomp[mdr->un_icomp]; - wb_blkno = blk_in_row; - } else { /* Have striping */ - interlace = mdr->un_interlace; - fragment = blk_in_row % interlace; - if (bcount > ldbtob(interlace - fragment)) { - more = 1; - wb_bcount = ldbtob(interlace - fragment); - } else { - more = 0; - wb_bcount = bcount; - } - - stripe_blk = blk_in_row / interlace; - dev_index = (size_t)(stripe_blk % mdr->un_ncomp); - wmdc = &mdcomp[mdr->un_icomp + dev_index]; - wb_blkno = (diskaddr_t)(((stripe_blk / mdr->un_ncomp) * - interlace) + fragment); - } - - wb_blkno += wmdc->un_start_block; - wb_edev = wmdc->un_dev; - - /* only break up the I/O if we're not built on another metadevice */ - if ((md_getmajor(wb_edev) != md_major) && (wb_bcount > md_maxphys)) { - wb_bcount = md_maxphys; - more = 1; - } - if (bp != (buf_t *)NULL) { - /* - * wb_bcount is limited by md_maxphys which is 'int' - */ - bp->b_bcount = (size_t)wb_bcount; - bp->b_lblkno = wb_blkno; - bp->b_edev = md_dev64_to_dev(wb_edev); - *mdc = wmdc; - } - return (more); -} - -static void -md_stripe_strategy(buf_t *pb, int flag, void *private) -{ - md_sps_t *ps; - md_scs_t *cs; - int doing_writes; - int more; - ms_unit_t *un; - mdi_unit_t *ui; - size_t current_count; - diskaddr_t current_blkno; - off_t current_offset; - buf_t *cb; /* child buf pointer */ - set_t setno; - - setno = MD_MIN2SET(getminor(pb->b_edev)); - - /* - * When doing IO to a multi owner meta device, check if set is halted. - * We do this check without the needed lock held, for performance - * reasons. - * If an IO just slips through while the set is locked via an - * MD_MN_SUSPEND_SET, we don't care about it. - * Only check for a suspended set if we are a top-level i/o request - * (MD_STR_NOTTOP is cleared in 'flag'). - */ - if ((md_set[setno].s_status & (MD_SET_HALTED | MD_SET_MNSET)) == - (MD_SET_HALTED | MD_SET_MNSET)) { - if ((flag & MD_STR_NOTTOP) == 0) { - mutex_enter(&md_mx); - /* Here we loop until the set is no longer halted */ - while (md_set[setno].s_status & MD_SET_HALTED) { - cv_wait(&md_cv, &md_mx); - } - mutex_exit(&md_mx); - } - } - - ui = MDI_UNIT(getminor(pb->b_edev)); - - md_kstat_waitq_enter(ui); - - un = (ms_unit_t *)md_unit_readerlock(ui); - - if ((flag & MD_NOBLOCK) == 0) { - if (md_inc_iocount(setno) != 0) { - pb->b_flags |= B_ERROR; - pb->b_error = ENXIO; - pb->b_resid = pb->b_bcount; - md_kstat_waitq_exit(ui); - md_unit_readerexit(ui); - biodone(pb); - return; - } - } else { - md_inc_iocount_noblock(setno); - } - - if (!(flag & MD_STR_NOTTOP)) { - if (md_checkbuf(ui, (md_unit_t *)un, pb) != 0) { - md_kstat_waitq_exit(ui); - return; - } - } - - ps = kmem_cache_alloc(stripe_parent_cache, MD_ALLOCFLAGS); - stripe_parent_init(ps); - - /* - * Save essential information from the original buffhdr - * in the md_save structure. - */ - ps->ps_un = un; - ps->ps_ui = ui; - ps->ps_bp = pb; - ps->ps_addr = pb->b_un.b_addr; - - if ((pb->b_flags & B_READ) == 0) - doing_writes = 1; - else - doing_writes = 0; - - - current_count = pb->b_bcount; - current_blkno = pb->b_lblkno; - current_offset = 0; - - if (!(flag & MD_STR_NOTTOP) && panicstr) - ps->ps_flags |= MD_SPS_DONTFREE; - - md_kstat_waitq_to_runq(ui); - - ps->ps_frags++; - do { - cs = kmem_cache_alloc(stripe_child_cache, MD_ALLOCFLAGS); - stripe_child_init(cs); - cb = &cs->cs_buf; - cs->cs_ps = ps; - more = md_mapbuf(un, current_blkno, current_count, cb, - &cs->cs_comp); - - cb = md_bioclone(pb, current_offset, cb->b_bcount, cb->b_edev, - cb->b_lblkno, stripe_done, cb, KM_NOSLEEP); - /* - * Do these calculations now, - * so that we pickup a valid b_bcount from the chld_bp. - */ - current_offset += cb->b_bcount; - current_count -= cb->b_bcount; - current_blkno += (diskaddr_t)(lbtodb(cb->b_bcount)); - - if (more) { - mutex_enter(&ps->ps_mx); - ps->ps_frags++; - mutex_exit(&ps->ps_mx); - } - - if (doing_writes && - cs->cs_comp->un_mirror.ms_flags & MDM_S_NOWRITE) { - (void) stripe_done(cb); - continue; - } - md_call_strategy(cb, flag, private); - } while (more); - - if (!(flag & MD_STR_NOTTOP) && panicstr) { - while (!(ps->ps_flags & MD_SPS_DONE)) { - md_daemon(1, &md_done_daemon); - drv_usecwait(10); - } - kmem_cache_free(stripe_parent_cache, ps); - } -} - -static int -stripe_snarf(md_snarfcmd_t cmd, set_t setno) -{ - ms_unit_t *un; - mddb_recid_t recid; - int gotsomething; - int all_stripes_gotten; - mddb_type_t typ1; - mddb_de_ic_t *dep; - mddb_rb32_t *rbp; - size_t newreqsize; - ms_unit_t *big_un; - ms_unit32_od_t *small_un; - - - if (cmd == MD_SNARF_CLEANUP) - return (0); - - all_stripes_gotten = 1; - gotsomething = 0; - - typ1 = (mddb_type_t)md_getshared_key(setno, - stripe_md_ops.md_driver.md_drivername); - recid = mddb_makerecid(setno, 0); - - while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0) { - if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) - continue; - - dep = mddb_getrecdep(recid); - dep->de_flags = MDDB_F_STRIPE; - rbp = dep->de_rb; - - switch (rbp->rb_revision) { - case MDDB_REV_RB: - case MDDB_REV_RBFN: - if ((rbp->rb_private & MD_PRV_CONVD) == 0) { - /* - * This means, we have an old and small record - * and this record hasn't already been - * converted. Before we create an incore - * metadevice from this we have to convert it to - * a big record. - */ - small_un = - (ms_unit32_od_t *)mddb_getrecaddr(recid); - newreqsize = get_big_stripe_req_size(small_un, - COMPLETE_STRUCTURE); - big_un = (ms_unit_t *)kmem_zalloc(newreqsize, - KM_SLEEP); - stripe_convert((caddr_t)small_un, - (caddr_t)big_un, SMALL_2_BIG); - kmem_free(small_un, dep->de_reqsize); - dep->de_rb_userdata = big_un; - dep->de_reqsize = newreqsize; - un = big_un; - rbp->rb_private |= MD_PRV_CONVD; - } else { - /* Small device had already been converted */ - un = (ms_unit_t *)mddb_getrecaddr(recid); - } - un->c.un_revision &= ~MD_64BIT_META_DEV; - break; - case MDDB_REV_RB64: - case MDDB_REV_RB64FN: - /* Big device */ - un = (ms_unit_t *)mddb_getrecaddr(recid); - un->c.un_revision |= MD_64BIT_META_DEV; - un->c.un_flag |= MD_EFILABEL; - break; - } - MDDB_NOTE_FN(rbp->rb_revision, un->c.un_revision); - - /* Create minor node for snarfed unit. */ - (void) md_create_minor_node(MD_MIN2SET(MD_SID(un)), MD_SID(un)); - - if (MD_UNIT(MD_SID(un)) != NULL) { - mddb_setrecprivate(recid, MD_PRV_PENDDEL); - continue; - } - all_stripes_gotten = 0; - if (stripe_build_incore((void *)un, 1) == 0) { - mddb_setrecprivate(recid, MD_PRV_GOTIT); - md_create_unit_incore(MD_SID(un), &stripe_md_ops, 0); - gotsomething = 1; - } - } - - if (!all_stripes_gotten) - return (gotsomething); - - recid = mddb_makerecid(setno, 0); - while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0) - if (!(mddb_getrecprivate(recid) & MD_PRV_GOTIT)) - mddb_setrecprivate(recid, MD_PRV_PENDDEL); - - return (0); -} - -static int -stripe_halt(md_haltcmd_t cmd, set_t setno) -{ - int i; - mdi_unit_t *ui; - minor_t mnum; - - if (cmd == MD_HALT_CLOSE) - return (0); - - if (cmd == MD_HALT_OPEN) - return (0); - - if (cmd == MD_HALT_UNLOAD) - return (0); - - if (cmd == MD_HALT_CHECK) { - for (i = 0; i < md_nunits; i++) { - mnum = MD_MKMIN(setno, i); - if ((ui = MDI_UNIT(mnum)) == NULL) - continue; - if (ui->ui_opsindex != stripe_md_ops.md_selfindex) - continue; - if (md_unit_isopen(ui)) - return (1); - } - return (0); - } - - if (cmd != MD_HALT_DOIT) - return (1); - - for (i = 0; i < md_nunits; i++) { - mnum = MD_MKMIN(setno, i); - if ((ui = MDI_UNIT(mnum)) == NULL) - continue; - if (ui->ui_opsindex != stripe_md_ops.md_selfindex) - continue; - reset_stripe((ms_unit_t *)MD_UNIT(mnum), mnum, 0); - } - - return (0); -} - -/*ARGSUSED3*/ -static int -stripe_open(dev_t *dev, int flag, int otyp, cred_t *cred_p, int md_oflags) -{ - minor_t mnum = getminor(*dev); - mdi_unit_t *ui = MDI_UNIT(mnum); - ms_unit_t *un; - int err = 0; - set_t setno; - - /* - * When doing an open of a multi owner metadevice, check to see if this - * node is a starting node and if a reconfig cycle is underway. - * If so, the system isn't sufficiently set up enough to handle the - * open (which involves I/O during sp_validate), so fail with ENXIO. - */ - setno = MD_MIN2SET(mnum); - if ((md_set[setno].s_status & (MD_SET_MNSET | MD_SET_MN_START_RC)) == - (MD_SET_MNSET | MD_SET_MN_START_RC)) { - return (ENXIO); - } - - /* single thread */ - un = (ms_unit_t *)md_unit_openclose_enter(ui); - - /* open devices, if necessary */ - if (! md_unit_isopen(ui) || (md_oflags & MD_OFLG_PROBEDEV)) { - if ((err = stripe_open_all_devs(un, md_oflags)) != 0) { - goto out; - } - } - - /* count open */ - if ((err = md_unit_incopen(mnum, flag, otyp)) != 0) - goto out; - - /* unlock, return success */ -out: - md_unit_openclose_exit(ui); - return (err); -} - -/*ARGSUSED1*/ -static int -stripe_close( - dev_t dev, - int flag, - int otyp, - cred_t *cred_p, - int md_cflags -) -{ - minor_t mnum = getminor(dev); - mdi_unit_t *ui = MDI_UNIT(mnum); - ms_unit_t *un; - int err = 0; - - /* single thread */ - un = (ms_unit_t *)md_unit_openclose_enter(ui); - - /* count closed */ - if ((err = md_unit_decopen(mnum, otyp)) != 0) - goto out; - - /* close devices, if necessary */ - if (! md_unit_isopen(ui) || (md_cflags & MD_OFLG_PROBEDEV)) { - stripe_close_all_devs(un, md_cflags); - } - - /* unlock, return success */ -out: - md_unit_openclose_exit(ui); - return (err); -} - - -static struct buf dumpbuf; - -/* - * This routine dumps memory to the disk. It assumes that the memory has - * already been mapped into mainbus space. It is called at disk interrupt - * priority when the system is in trouble. - * - */ -static int -stripe_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) -{ - ms_unit_t *un; - buf_t *bp; - ms_comp_t *mdc; - u_longlong_t nb; - diskaddr_t mapblk; - int result; - int more; - int saveresult = 0; - - /* - * Don't need to grab the unit lock. - * Cause nothing else is suppose to be happenning. - * Also dump is not suppose to sleep. - */ - un = (ms_unit_t *)MD_UNIT(getminor(dev)); - - if ((diskaddr_t)blkno >= un->c.un_total_blocks) - return (EINVAL); - - if ((diskaddr_t)blkno + nblk > un->c.un_total_blocks) - return (EINVAL); - - bp = &dumpbuf; - nb = ldbtob(nblk); - do { - bzero((caddr_t)bp, sizeof (*bp)); - more = md_mapbuf(un, (diskaddr_t)blkno, nb, bp, &mdc); - nblk = btodb(bp->b_bcount); - mapblk = bp->b_lblkno; - if (!(mdc->un_mirror.ms_flags & MDM_S_NOWRITE)) { - /* - * bdev_dump() is currently only able to take - * 32 bit wide blkno's. - */ - result = bdev_dump(bp->b_edev, addr, (daddr_t)mapblk, - nblk); - if (result) - saveresult = result; - } - - nb -= bp->b_bcount; - addr += bp->b_bcount; - blkno += nblk; - } while (more); - - return (saveresult); -} - -/*ARGSUSED*/ -static intptr_t -stripe_shared_by_blk( - md_dev64_t dev, - void *junk, - diskaddr_t blkno, - u_longlong_t *cnt) -{ - ms_unit_t *un; - buf_t bp; - ms_comp_t *comp; - - un = MD_UNIT(md_getminor(dev)); - (void) md_mapbuf(un, blkno, ldbtob(*cnt), &bp, &comp); - *cnt = (u_longlong_t)lbtodb(bp.b_bcount); - return ((intptr_t)&comp->un_mirror); -} - -/* - * stripe_block_count_skip_size() returns the following values - * so that the logical to physical block mappings can - * be calculated without intimate knowledge of the underpinnings. - * - * block - first logical block number of the device. - * block = [ # of blocks before THE row ] + - * [ # of blocks in THE row before the component ] - * count - # of segments (interlaced size). - * skip - # of logical blocks between segments, or delta to - * get to next segment - * size - interlace size used for the block, count, skip. - */ -/*ARGSUSED*/ -static intptr_t -stripe_block_count_skip_size( - md_dev64_t dev, - void *junk, - int ci, - diskaddr_t *block, - size_t *count, - u_longlong_t *skip, - u_longlong_t *size) -{ - ms_unit_t *un; - int row; - struct ms_row *mdr; - int cmpcount = 0; - - un = MD_UNIT(md_getminor(dev)); - - for (row = 0; row < un->un_nrows; row++) { - mdr = &un->un_row[row]; - if ((mdr->un_ncomp + cmpcount) > ci) - break; - cmpcount += mdr->un_ncomp; - } - ASSERT(row != un->un_nrows); - - /* - * Concatenations are always contiguous blocks, - * you cannot depend on the interlace being a usable - * value (except for stripes). - */ - if (mdr->un_ncomp == 1) { /* Concats */ - *block = mdr->un_cum_blocks - mdr->un_blocks; - *count = 1; - *skip = 0; - *size = mdr->un_blocks; - } else { /* Stripes */ - *block = (mdr->un_cum_blocks - mdr->un_blocks) + - ((ci - cmpcount) * mdr->un_interlace); - *count = (size_t)(mdr->un_blocks / (mdr->un_interlace * - mdr->un_ncomp)); - *skip = (mdr->un_interlace * mdr->un_ncomp) - mdr->un_interlace; - *size = mdr->un_interlace; - } - - return (0); -} - -/*ARGSUSED*/ -static intptr_t -stripe_shared_by_indx(md_dev64_t dev, void *junk, int indx) -{ - ms_unit_t *un; - ms_comp_t *comp; - - un = MD_UNIT(md_getminor(dev)); - comp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]); - comp += indx; - return ((intptr_t)&comp->un_mirror); -} - -/*ARGSUSED*/ -intptr_t -stripe_component_count(md_dev64_t dev, void *junk) -{ - /* - * See comments for stripe_get_dev - */ - - ms_unit_t *un; - int count = 0; - int row; - - un = MD_UNIT(md_getminor(dev)); - for (row = 0; row < un->un_nrows; row++) - count += un->un_row[row].un_ncomp; - return (count); -} - -/*ARGSUSED*/ -intptr_t -stripe_get_dev(md_dev64_t dev, void *junk, int indx, ms_cd_info_t *cd) -{ - /* - * It should be noted that stripe_replace in stripe_ioctl.c calls this - * routine using makedevice(0, minor) for the first argument. - * - * If this routine at some point in the future needs to use the major - * number stripe_replace must be changed. - */ - - ms_unit_t *un; - ms_comp_t *comp; - md_dev64_t tmpdev; - - un = MD_UNIT(md_getminor(dev)); - comp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]); - comp += indx; - tmpdev = comp->un_dev; - /* - * Try to resolve devt again if NODEV64 - * Check if this comp is hotspared and if it is - * then use key for hotspare - */ - if (tmpdev == NODEV64) { - tmpdev = md_resolve_bydevid(md_getminor(dev), tmpdev, - comp->un_mirror.ms_hs_id ? - comp->un_mirror.ms_hs_key : - comp->un_key); - comp->un_dev = tmpdev; - } - - cd->cd_dev = comp->un_dev; - cd->cd_orig_dev = comp->un_mirror.ms_orig_dev; - return (0); -} - -/*ARGSUSED*/ -void -stripe_replace_done(md_dev64_t dev, sv_dev_t *sv) -{ - /* - * See comments for stripe_get_dev - */ - - minor_t mnum = md_getminor(dev); - - if (sv != NULL) { - md_rem_names(sv, 1); - kmem_free(sv, sizeof (sv_dev_t)); - } - - md_unit_writerexit(MDI_UNIT(mnum)); -} - -/*ARGSUSED*/ -intptr_t -stripe_replace_dev(md_dev64_t dev, void *junk, int ci, ms_new_dev_t *nd, - mddb_recid_t *recids, int nrecids, void (**replace_done)(), - void **replace_data) -{ - minor_t mnum; - ms_unit_t *un; - mdi_unit_t *ui; - ms_comp_t *comp; - diskaddr_t dev_size; - int row; - int ncomps = 0; - int cmpcount = 0; - int rid = 0; - struct ms_row *mdr; - sv_dev_t *sv = NULL; - mddb_recid_t hs_id = 0; - set_t setno; - side_t side; - md_dev64_t this_dev; - md_dev64_t old_dev; - - mnum = md_getminor(dev); - ui = MDI_UNIT(mnum); - setno = MD_MIN2SET(mnum); - side = mddb_getsidenum(setno); - - un = md_unit_writerlock(ui); - - *replace_data = NULL; - comp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]); - - comp += ci; - old_dev = comp->un_dev; - - /* - * Count the number of components - */ - for (row = 0; row < un->un_nrows; row++) { - struct ms_row *mdr = &un->un_row[row]; - ncomps += mdr->un_ncomp; - } - - recids[0] = 0; - /* - * No need of checking size of new device, - * when hotsparing (it has already been done), or - * when enabling the device. - */ - if ((nd != NULL) && (nd->nd_hs_id == 0)) { - for (row = 0; row < un->un_nrows; row++) { - mdr = &un->un_row[row]; - if ((mdr->un_ncomp + cmpcount) > ci) - break; - cmpcount += mdr->un_ncomp; - } - ASSERT(row != un->un_nrows); - - /* Concatenations have a ncomp = 1 */ - dev_size = mdr->un_blocks / mdr->un_ncomp; - - /* - * now check to see if new comp can be used in - * place of old comp - */ - if ((un->c.un_flag & MD_LABELED) && (ci == 0) && - nd->nd_labeled) - nd->nd_start_blk = 0; - else - nd->nd_nblks -= nd->nd_start_blk; - - if (dev_size > nd->nd_nblks) { - md_unit_writerexit(ui); - return (MDE_COMP_TOO_SMALL); - } - - sv = (sv_dev_t *)kmem_alloc(sizeof (sv_dev_t), KM_SLEEP); - sv->setno = MD_MIN2SET(mnum); - sv->key = comp->un_key; - } - - /* - * Close this component. - */ - if (comp->un_mirror.ms_flags & MDM_S_ISOPEN) { - md_layered_close(comp->un_dev, MD_OFLG_NULL); - comp->un_mirror.ms_flags &= ~MDM_S_ISOPEN; - } - - /* - * If the component is hotspared, return to the pool. - */ - if (comp->un_mirror.ms_hs_id != 0) { - hs_cmds_t cmd; - mdkey_t hs_key; - - hs_key = comp->un_mirror.ms_hs_key; - comp->un_dev = comp->un_mirror.ms_orig_dev; - comp->un_start_block = comp->un_mirror.ms_orig_blk; - comp->un_mirror.ms_hs_key = 0; - comp->un_mirror.ms_hs_id = 0; - comp->un_mirror.ms_orig_dev = 0; - - cmd = HS_FREE; - if ((comp->un_mirror.ms_state != CS_OKAY) && - (comp->un_mirror.ms_state != CS_RESYNC)) - cmd = HS_BAD; - (void) md_hot_spare_ifc(cmd, un->un_hsp_id, 0, 0, &hs_id, - &hs_key, NULL, NULL); - } - - /* - * Open by device id; for enable (indicated by a NULL - * nd pointer), use the existing component info. For - * replace, use the new device. - */ - if (nd == NULL) { - this_dev = md_resolve_bydevid(mnum, comp->un_dev, comp->un_key); - /* - * If someone replaced a new disk in the same slot - * we get NODEV64 since old device id cannot be - * resolved. The new devt is obtained from the - * mddb since devt is going to be unchanged for the - * enable case. No need to check for multiple - * keys here because the caller (comp_replace) - * has already sanity checked it for us. - */ - if (this_dev == NODEV64) { - this_dev = md_getdevnum(setno, side, comp->un_key, - MD_TRUST_DEVT); - } - } else { - /* - * If this is a hotspare, save the original dev_t for later - * use. If this has occured during boot then the value of - * comp->un_dev will be NODEV64 because of the failure to look - * up the devid of the device. - */ - if (nd->nd_hs_id != 0) - comp->un_mirror.ms_orig_dev = comp->un_dev; - this_dev = md_resolve_bydevid(mnum, nd->nd_dev, nd->nd_key); - } - - comp->un_dev = this_dev; - - /* - * Now open the new device if required. Note for a single component - * stripe it will not be open - leave this for the mirror driver to - * deal with. - */ - if (md_unit_isopen(ui)) { - if (md_layered_open(mnum, &this_dev, MD_OFLG_NULL)) { - mddb_recid_t ids[3]; - - ids[0] = un->c.un_record_id; - ids[1] = hs_id; - ids[2] = 0; - mddb_commitrecs_wrapper(ids); - if ((nd != NULL) && (nd->nd_hs_id != 0)) { - /* - * Revert back to the original device. - */ - comp->un_dev = comp->un_mirror.ms_orig_dev; - - cmn_err(CE_WARN, - "md: %s: open error of hotspare %s", - md_shortname(mnum), - md_devname(MD_MIN2SET(mnum), nd->nd_dev, - NULL, 0)); - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_OPEN_FAIL, - SVM_TAG_HS, MD_MIN2SET(mnum), nd->nd_dev); - } - md_unit_writerexit(ui); - return (MDE_COMP_OPEN_ERR); - } - if (nd != NULL) - nd->nd_dev = this_dev; - - comp->un_mirror.ms_flags |= MDM_S_ISOPEN; - } - - if (nd == NULL) { - recids[0] = un->c.un_record_id; - recids[1] = hs_id; - recids[2] = 0; - *replace_done = stripe_replace_done; - return (0); - } - - /* if hot sparing this device */ - if (nd->nd_hs_id != 0) { - char devname[MD_MAX_CTDLEN]; - char hs_devname[MD_MAX_CTDLEN]; - set_t setno; - - comp->un_mirror.ms_hs_id = nd->nd_hs_id; - comp->un_mirror.ms_hs_key = nd->nd_key; - - comp->un_mirror.ms_orig_blk = comp->un_start_block; - - setno = MD_MIN2SET(mnum); - - (void) md_devname(setno, comp->un_mirror.ms_orig_dev, devname, - sizeof (devname)); - (void) md_devname(setno, nd->nd_dev, hs_devname, - sizeof (hs_devname)); - - cmn_err(CE_NOTE, "md: %s: hotspared device %s with %s", - md_shortname(mnum), devname, hs_devname); - - } else { /* replacing the device */ - comp->un_key = nd->nd_key; - *replace_data = (void *)sv; - - /* - * For the old device, make sure to reset the parent - * if it's a metadevice. - */ - if (md_getmajor(comp->un_dev) == md_major) { - minor_t comp_mnum = md_getminor(old_dev); - md_unit_t *comp_un = MD_UNIT(comp_mnum); - - md_reset_parent(old_dev); - recids[rid++] = MD_RECID(comp_un); - } - } - - comp->un_dev = nd->nd_dev; - comp->un_start_block = nd->nd_start_blk; - - /* - * For the new device, make sure to set the parent if it's a - * metadevice. - * - * If we ever support using metadevices as hot spares, this - * will need to be tested, and possibly moved into the - * preceding "else" clause, immediately following the parent - * reset block. For now, it's convenient to leave it here and - * only compress nd->nd_dev once. - */ - if (md_getmajor(comp->un_dev) == md_major) { - minor_t comp_mnum = md_getminor(comp->un_dev); - md_unit_t *comp_un = MD_UNIT(comp_mnum); - - md_set_parent(comp->un_dev, MD_SID(un)); - recids[rid++] = MD_RECID(comp_un); - } - - recids[rid++] = un->c.un_record_id; - recids[rid++] = hs_id; - recids[rid] = 0; - *replace_done = stripe_replace_done; - return (0); -} - -/*ARGSUSED*/ -static intptr_t -stripe_hotspare_dev( - md_dev64_t dev, - void *junk, - int ci, - mddb_recid_t *recids, - int nrecids, - void (**replace_done)(), - void **replace_data) -{ - ms_unit_t *un; - mdi_unit_t *ui; - ms_comp_t *comp; - int row; - struct ms_row *mdr; - ms_new_dev_t nd; - int err; - int i; - minor_t mnum; - set_t setno; - int cmpcount = 0; - - mnum = md_getminor(dev); - ui = MDI_UNIT(mnum); - un = MD_UNIT(mnum); - setno = MD_MIN2SET(mnum); - - if (md_get_setstatus(setno) & MD_SET_STALE) - return (1); - - if (un->un_hsp_id == -1) - return (1); - - for (row = 0; row < un->un_nrows; row++) { - mdr = &un->un_row[row]; - if ((mdr->un_ncomp + cmpcount) > ci) - break; - cmpcount += mdr->un_ncomp; - } - ASSERT(row != un->un_nrows); - - comp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]); - comp += ci; - /* Concatenations have a ncomp = 1 */ - nd.nd_nblks = mdr->un_blocks / mdr->un_ncomp; - - if ((un->c.un_flag & MD_LABELED) && (ci == 0)) - nd.nd_labeled = 1; - else - nd.nd_labeled = 0; - -again: - err = md_hot_spare_ifc(HS_GET, un->un_hsp_id, nd.nd_nblks, - nd.nd_labeled, &nd.nd_hs_id, &nd.nd_key, &nd.nd_dev, - &nd.nd_start_blk); - - if (err) { - if (!stripe_replace_dev(dev, junk, ci, NULL, recids, nrecids, - replace_done, replace_data)) { - mddb_commitrecs_wrapper(recids); - md_unit_writerexit(ui); - } - recids[0] = 0; - return (1); - } - - if (stripe_replace_dev(dev, junk, ci, &nd, recids, nrecids, - replace_done, replace_data)) { - - (void) md_hot_spare_ifc(HS_BAD, un->un_hsp_id, 0, 0, - &nd.nd_hs_id, &nd.nd_key, NULL, NULL); - mddb_commitrec_wrapper(nd.nd_hs_id); - goto again; - } - - /* Leave a slot for the null recid */ - for (i = 0; i < (nrecids - 1); i++) { - if (recids[i] == 0) { - recids[i++] = nd.nd_hs_id; - recids[i] = 0; - } - } - return (0); -} - -static int -stripe_imp_set( - set_t setno -) -{ - - mddb_recid_t recid; - int i, row, c, gotsomething; - mddb_type_t typ1; - mddb_de_ic_t *dep; - mddb_rb32_t *rbp; - ms_unit32_od_t *un32; - ms_unit_t *un64; - md_dev64_t self_devt; - minor_t *self_id; /* minor needs to be updated */ - md_parent_t *parent_id; /* parent needs to be updated */ - mddb_recid_t *record_id; /* record id needs to be updated */ - mddb_recid_t *hsp_id; - ms_comp32_od_t *comp32; - ms_comp_t *comp64; - - - gotsomething = 0; - - typ1 = (mddb_type_t)md_getshared_key(setno, - stripe_md_ops.md_driver.md_drivername); - recid = mddb_makerecid(setno, 0); - - while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0) { - if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) - continue; - - dep = mddb_getrecdep(recid); - rbp = dep->de_rb; - - switch (rbp->rb_revision) { - case MDDB_REV_RB: - case MDDB_REV_RBFN: - /* - * Small device - */ - un32 = (ms_unit32_od_t *)mddb_getrecaddr(recid); - self_id = &(un32->c.un_self_id); - parent_id = &(un32->c.un_parent); - record_id = &(un32->c.un_record_id); - hsp_id = &(un32->un_hsp_id); - - comp32 = (ms_comp32_od_t *) - ((void *)&((char *)un32)[un32->un_ocomp]); - for (row = 0; row < un32->un_nrows; row++) { - struct ms_row32_od *mdr = &un32->un_row[row]; - for (i = 0, c = mdr->un_icomp; - i < mdr->un_ncomp; i++) { - ms_comp32_od_t *mdc; - - mdc = &comp32[c++]; - - if (!md_update_minor(setno, - mddb_getsidenum(setno), - mdc->un_key)) - goto out; - - if (mdc->un_mirror.ms_hs_id != 0) - mdc->un_mirror.ms_hs_id = - MAKERECID(setno, - mdc->un_mirror.ms_hs_id); - } - } - break; - case MDDB_REV_RB64: - case MDDB_REV_RB64FN: - un64 = (ms_unit_t *)mddb_getrecaddr(recid); - self_id = &(un64->c.un_self_id); - parent_id = &(un64->c.un_parent); - record_id = &(un64->c.un_record_id); - hsp_id = &(un64->un_hsp_id); - - comp64 = (ms_comp_t *) - ((void *)&((char *)un64)[un64->un_ocomp]); - for (row = 0; row < un64->un_nrows; row++) { - struct ms_row *mdr = &un64->un_row[row]; - - for (i = 0, c = mdr->un_icomp; - i < mdr->un_ncomp; i++) { - ms_comp_t *mdc; - - mdc = &comp64[c++]; - - if (!md_update_minor(setno, - mddb_getsidenum(setno), - mdc->un_key)) - goto out; - - if (mdc->un_mirror.ms_hs_id != 0) - mdc->un_mirror.ms_hs_id = - MAKERECID(setno, - mdc->un_mirror.ms_hs_id); - } - } - break; - } - - /* - * If this is a top level and a friendly name metadevice, - * update its minor in the namespace. - */ - if ((*parent_id == MD_NO_PARENT) && - ((rbp->rb_revision == MDDB_REV_RBFN) || - (rbp->rb_revision == MDDB_REV_RB64FN))) { - - self_devt = md_makedevice(md_major, *self_id); - if (!md_update_top_device_minor(setno, - mddb_getsidenum(setno), self_devt)) - goto out; - } - - /* - * Update unit with the imported setno - * - */ - mddb_setrecprivate(recid, MD_PRV_GOTIT); - - *self_id = MD_MKMIN(setno, MD_MIN2UNIT(*self_id)); - - if (*hsp_id != -1) - *hsp_id = MAKERECID(setno, DBID(*hsp_id)); - - if (*parent_id != MD_NO_PARENT) - *parent_id = MD_MKMIN(setno, MD_MIN2UNIT(*parent_id)); - *record_id = MAKERECID(setno, DBID(*record_id)); - - gotsomething = 1; - } - -out: - return (gotsomething); -} - -static md_named_services_t stripe_named_services[] = { - {stripe_shared_by_blk, "shared by blk" }, - {stripe_shared_by_indx, "shared by indx" }, - {stripe_component_count, "get component count" }, - {stripe_block_count_skip_size, "get block count skip size" }, - {stripe_get_dev, "get device" }, - {stripe_replace_dev, "replace device" }, - {stripe_hotspare_dev, "hotspare device" }, - {stripe_rename_check, MDRNM_CHECK }, - {NULL, 0} -}; - -md_ops_t stripe_md_ops = { - stripe_open, /* open */ - stripe_close, /* close */ - md_stripe_strategy, /* strategy */ - NULL, /* print */ - stripe_dump, /* dump */ - NULL, /* read */ - NULL, /* write */ - md_stripe_ioctl, /* stripe_ioctl, */ - stripe_snarf, /* stripe_snarf */ - stripe_halt, /* stripe_halt */ - NULL, /* aread */ - NULL, /* awrite */ - stripe_imp_set, /* import set */ - stripe_named_services -}; - -static void -init_init() -{ - md_stripe_mcs_buf_off = sizeof (md_scs_t) - sizeof (buf_t); - - stripe_parent_cache = kmem_cache_create("md_stripe_parent", - sizeof (md_sps_t), 0, stripe_parent_constructor, - stripe_parent_destructor, stripe_run_queue, NULL, NULL, - 0); - stripe_child_cache = kmem_cache_create("md_stripe_child", - sizeof (md_scs_t) - sizeof (buf_t) + biosize(), 0, - stripe_child_constructor, stripe_child_destructor, - stripe_run_queue, NULL, NULL, 0); -} - -static void -fini_uninit() -{ - kmem_cache_destroy(stripe_parent_cache); - kmem_cache_destroy(stripe_child_cache); - stripe_parent_cache = stripe_child_cache = NULL; -} - -/* define the module linkage */ -MD_PLUGIN_MISC_MODULE("stripes module", init_init(), fini_uninit()) diff --git a/usr/src/uts/common/io/lvm/stripe/stripe_ioctl.c b/usr/src/uts/common/io/lvm/stripe/stripe_ioctl.c deleted file mode 100644 index c790495e8c99..000000000000 --- a/usr/src/uts/common/io/lvm/stripe/stripe_ioctl.c +++ /dev/null @@ -1,1416 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -extern int md_status; - -extern unit_t md_nunits; -extern set_t md_nsets; -extern md_set_t md_set[]; - -extern md_ops_t stripe_md_ops; -extern md_krwlock_t md_unit_array_rw; -extern major_t md_major; - -static int -stripe_replace(replace_params_t *params) -{ - minor_t mnum = params->mnum; - ms_unit_t *un; - mddb_recid_t recids[6]; - ms_new_dev_t nd; - ms_cd_info_t cd; - int ci; - int cmpcnt; - void *repl_data; - md_dev64_t fake_devt; - void (*repl_done)(); - - mdclrerror(¶ms->mde); - - un = (ms_unit_t *)MD_UNIT(mnum); - - if (MD_STATUS(un) & MD_UN_RESYNC_ACTIVE) { - return (mdmderror(¶ms->mde, MDE_RESYNC_ACTIVE, mnum)); - } - - nd.nd_dev = params->new_dev; - nd.nd_key = params->new_key; - nd.nd_nblks = params->number_blks; - nd.nd_start_blk = params->start_blk; - nd.nd_labeled = params->has_label; - nd.nd_hs_id = 0; - - /* - * stripe_component_count and stripe_get_dev only care about the - * minor number associated with the first argument which is a - * md_dev64_t - * - * The comments section for these two routines have been updated - * to indicate that this routine calls with fake major numbers. - */ - fake_devt = md_makedevice(0, mnum); - cmpcnt = stripe_component_count(fake_devt, NULL); - for (ci = 0; ci < cmpcnt; ci++) { - (void) stripe_get_dev(fake_devt, NULL, ci, &cd); - if ((cd.cd_dev == params->old_dev) || - (cd.cd_orig_dev == params->old_dev)) - break; - } - if (ci == cmpcnt) { - return (EINVAL); - } - - /* In case of a dryrun we're done here */ - if (params->options & MDIOCTL_DRYRUN) { - return (0); - } - - (void) stripe_replace_dev(fake_devt, 0, ci, &nd, recids, 6, - &repl_done, &repl_data); - mddb_commitrecs_wrapper(recids); - (*repl_done)(fake_devt, repl_data); - - SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REPLACE, SVM_TAG_METADEVICE, - MD_UN2SET(un), MD_SID(un)); - return (0); -} - -static int -stripe_set(void *d, int mode) -{ - minor_t mnum; - ms_unit_t *un; - void *p; - mddb_recid_t ms_recid; - mddb_recid_t *recids; - mddb_type_t typ1; - int err; - set_t setno; - md_error_t *mdep; - struct ms_comp *mdcomp; - int row; - int rid; - int num_recs; - int i, c; - md_set_params_t *msp = d; - - mnum = msp->mnum; - setno = MD_MIN2SET(mnum); - - mdep = &msp->mde; - - mdclrerror(mdep); - - if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits)) { - return (mdmderror(mdep, MDE_INVAL_UNIT, mnum)); - } - - if (md_get_setstatus(setno) & MD_SET_STALE) - return (mdmddberror(mdep, MDE_DB_STALE, mnum, setno)); - - un = MD_UNIT(mnum); - if (un != NULL) { - return (mdmderror(mdep, MDE_UNIT_ALREADY_SETUP, mnum)); - } - - - typ1 = (mddb_type_t)md_getshared_key(setno, - stripe_md_ops.md_driver.md_drivername); - - /* create the db record for this mdstruct */ - if (msp->options & MD_CRO_64BIT) { -#if defined(_ILP32) - return (mdmderror(mdep, MDE_UNIT_TOO_LARGE, mnum)); -#else - ms_recid = mddb_createrec((size_t)msp->size, typ1, 0, - MD_CRO_64BIT | MD_CRO_STRIPE | MD_CRO_FN, setno); -#endif - } else { - ms_recid = mddb_createrec((size_t)msp->size, typ1, 0, - MD_CRO_32BIT | MD_CRO_STRIPE | MD_CRO_FN, setno); - } - if (ms_recid < 0) - return (mddbstatus2error(mdep, ms_recid, mnum, setno)); - - /* get the address of the mdstruct */ - p = (void *) mddb_getrecaddr(ms_recid); - /* - * It is okay that we muck with the mdstruct here, - * since no one else will know about the mdstruct - * until we commit it. If we crash, the record will - * be automatically purged, since we haven't - * committed it yet. - */ - - /* copy in the user's mdstruct */ - if (err = ddi_copyin((caddr_t)(uintptr_t)msp->mdp, (caddr_t)p, - (size_t)msp->size, mode)) { - mddb_deleterec_wrapper(ms_recid); - return (EFAULT); - } - - un = (ms_unit_t *)p; - - /* All 64 bit metadevices only support EFI labels. */ - if (msp->options & MD_CRO_64BIT) { - un->c.un_flag |= MD_EFILABEL; - } - - /* - * allocate the real recids array. since we may have to commit - * underlying metadevice records, we need an array - * of size: total number of components in stripe + 3 - * (1 for the stripe itself, one for the hotspare, one - * for the end marker). - */ - num_recs = 3; - rid = 0; - for (row = 0; row < un->un_nrows; row++) { - struct ms_row *mdr = &un->un_row[row]; - num_recs += mdr->un_ncomp; - } - recids = kmem_alloc(num_recs * sizeof (mddb_recid_t), KM_SLEEP); - recids[rid++] = ms_recid; - - MD_SID(un) = mnum; - MD_RECID(un) = recids[0]; - MD_CAPAB(un) = MD_CAN_PARENT | MD_CAN_SUB_MIRROR | MD_CAN_SP; - MD_PARENT(un) = MD_NO_PARENT; - un->c.un_revision |= MD_FN_META_DEV; - - if (err = stripe_build_incore(p, 0)) { - md_nblocks_set(mnum, -1ULL); - MD_UNIT(mnum) = NULL; - - mddb_deleterec_wrapper(recids[0]); - kmem_free(recids, num_recs * sizeof (mddb_recid_t)); - return (err); - } - - /* - * Update unit availability - */ - md_set[setno].s_un_avail--; - - recids[rid] = 0; - if (un->un_hsp_id != -1) - err = md_hot_spare_ifc(HSP_INCREF, un->un_hsp_id, 0, 0, - &recids[rid++], NULL, NULL, NULL); - - - if (err) { - md_nblocks_set(mnum, -1ULL); - MD_UNIT(mnum) = NULL; - - mddb_deleterec_wrapper(recids[0]); - kmem_free(recids, num_recs * sizeof (mddb_recid_t)); - return (mdhsperror(mdep, MDE_INVAL_HSP, un->un_hsp_id)); - } - - /* - * set the parent on any metadevice components. - * NOTE: currently soft partitions are the only metadevices - * which can appear within a stripe. - */ - mdcomp = (ms_comp_t *)((void *)&((char *)un)[un->un_ocomp]); - for (row = 0; row < un->un_nrows; row++) { - struct ms_row *mdr = &un->un_row[row]; - for (i = 0, c = mdr->un_icomp; i < mdr->un_ncomp; i++) { - ms_comp_t *mdc = &mdcomp[c++]; - md_dev64_t comp_dev; - md_unit_t *comp_un; - - comp_dev = mdc->un_dev; - if (md_getmajor(comp_dev) == md_major) { - /* set parent and disallow soft partitioning */ - comp_un = MD_UNIT(md_getminor(comp_dev)); - recids[rid++] = MD_RECID(comp_un); - md_set_parent(mdc->un_dev, MD_SID(un)); - } - } - } - - /* set end marker */ - recids[rid] = 0; - mddb_commitrecs_wrapper(recids); - - md_create_unit_incore(mnum, &stripe_md_ops, 0); - kmem_free(recids, (num_recs * sizeof (mddb_recid_t))); - SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_CREATE, SVM_TAG_METADEVICE, - MD_UN2SET(un), MD_SID(un)); - return (0); -} - - -/*ARGSUSED*/ -static int -stripe_get(void *d, int mode, IOLOCK *lock) -{ - minor_t mnum; - mdi_unit_t *ui; - ms_unit_t *un; - md_error_t *mdep; - md_i_get_t *migp = d; - - - mnum = migp->id; - mdep = &migp->mde; - - mdclrerror(mdep); - - if ((MD_MIN2SET(mnum) >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits)) - return (mdmderror(mdep, MDE_INVAL_UNIT, mnum)); - - if ((ui = MDI_UNIT(mnum)) == NULL) { - return (mdmderror(mdep, MDE_UNIT_NOT_SETUP, mnum)); - } - - un = (ms_unit_t *)md_ioctl_readerlock(lock, ui); - - if (migp->size == 0) { - migp->size = un->c.un_size; - return (0); - } - - if (migp->size < un->c.un_size) { - return (EFAULT); - } - - if (ddi_copyout(un, (void *)(uintptr_t)migp->mdp, - un->c.un_size, mode)) - return (EFAULT); - return (0); -} - -static int -stripe_reset(md_i_reset_t *mirp) -{ - minor_t mnum = mirp->mnum; - ms_unit_t *un; - mdi_unit_t *ui; - set_t setno = MD_MIN2SET(mnum); - - mdclrerror(&mirp->mde); - - if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits)) - return (mdmderror(&mirp->mde, MDE_INVAL_UNIT, mnum)); - - if (md_get_setstatus(setno) & MD_SET_STALE) - return (mdmddberror(&mirp->mde, MDE_DB_STALE, mnum, setno)); - - un = MD_UNIT(mnum); - if (un == NULL) { - return (mdmderror(&mirp->mde, MDE_UNIT_NOT_SETUP, mnum)); - } - - /* This prevents new opens */ - rw_enter(&md_unit_array_rw.lock, RW_WRITER); - - if (MD_HAS_PARENT(un->c.un_parent)) { - rw_exit(&md_unit_array_rw.lock); - return (mdmderror(&mirp->mde, MDE_IN_USE, mnum)); - } - - /* single thread */ - ui = MDI_UNIT(mnum); - un = md_unit_openclose_enter(ui); - - if (md_unit_isopen(ui)) { - md_unit_openclose_exit(ui); - rw_exit(&md_unit_array_rw.lock); - return (mdmderror(&mirp->mde, MDE_IS_OPEN, mnum)); - } - - md_unit_openclose_exit(ui); - reset_stripe(un, mnum, 1); - - /* - * Update unit availability - */ - md_set[setno].s_un_avail++; - - /* - * If MN set, reset s_un_next so all nodes can have - * the same view of the next available slot when - * nodes are -w and -j - */ - if (MD_MNSET_SETNO(setno)) { - (void) md_upd_set_unnext(setno, MD_MIN2UNIT(mnum)); - } - - rw_exit(&md_unit_array_rw.lock); - return (0); -} - -static int -stripe_grow(void *d, int mode, IOLOCK *lockp) -{ - minor_t mnum; - ms_unit_t *un, *new_un; - mdi_unit_t *ui; - minor_t *par = NULL; - IOLOCK *plock = NULL; - ms_comp_t *mdcomp, *new_comp; - int row, i, c; - mddb_recid_t ms_recid; - mddb_recid_t old_vtoc = 0; - mddb_recid_t *recids; - md_create_rec_option_t options; - mddb_type_t typ1; - int err; - int64_t tb, atb; - uint_t nr, oc; - int opened; - int rval = 0; - set_t setno; - md_error_t *mdep; - int npar; - int rid; - int num_recs; - u_longlong_t rev; - md_grow_params_t *mgp = d; - - - mnum = mgp->mnum; - mdep = &mgp->mde; - setno = MD_MIN2SET(mnum); - npar = mgp->npar; - - mdclrerror(mdep); - - if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits)) - return (mdmderror(mdep, MDE_INVAL_UNIT, mnum)); - - if (md_get_setstatus(setno) & MD_SET_STALE) - return (mdmddberror(mdep, MDE_DB_STALE, mnum, setno)); - - ui = MDI_UNIT(mnum); - if (ui == NULL) { - return (mdmderror(mdep, MDE_UNIT_NOT_SETUP, mnum)); - } - - if (npar >= 1) { - ASSERT((minor_t *)(uintptr_t)mgp->par != NULL); - par = kmem_alloc(npar * sizeof (*par), KM_SLEEP); - plock = kmem_alloc(npar * sizeof (*plock), KM_SLEEP); - if (ddi_copyin((caddr_t)(uintptr_t)mgp->par, (caddr_t)par, - (npar * sizeof (*par)), mode) != 0) { - kmem_free(par, npar * sizeof (*par)); - kmem_free(plock, npar * sizeof (*plock)); - return (EFAULT); - } - } - - /* - * we grab unit reader/writer first, then parent locks, - * then our own. - * we expect parent units to be sorted to avoid deadlock - */ - rw_enter(&md_unit_array_rw.lock, RW_WRITER); - for (i = 0; i < npar; ++i) { - (void) md_ioctl_writerlock(&plock[i], - MDI_UNIT(par[i])); - } - un = (ms_unit_t *)md_ioctl_writerlock(lockp, ui); - - if (un->un_nrows != mgp->nrows) { - rval = EINVAL; - goto out; - } - - typ1 = (mddb_type_t)md_getshared_key(setno, - stripe_md_ops.md_driver.md_drivername); - - /* - * Preserve the friendly name nature of growing device. - */ - options = MD_CRO_STRIPE; - if (un->c.un_revision & MD_FN_META_DEV) - options |= MD_CRO_FN; - if (mgp->options & MD_CRO_64BIT) { -#if defined(_ILP32) - rval = mdmderror(mdep, MDE_UNIT_TOO_LARGE, mnum); - goto out; -#else - ms_recid = mddb_createrec((size_t)mgp->size, typ1, 0, - MD_CRO_64BIT | options, setno); -#endif - } else { - ms_recid = mddb_createrec((size_t)mgp->size, typ1, 0, - MD_CRO_32BIT | options, setno); - } - - - if (ms_recid < 0) { - rval = mddbstatus2error(mdep, (int)ms_recid, mnum, setno); - goto out; - } - - /* get the address of the new unit */ - new_un = (ms_unit_t *)mddb_getrecaddr(ms_recid); - - /* - * It is okay that we muck with the new unit here, - * since no one else will know about the unit struct - * until we commit it. If we crash, the record will - * be automatically purged, since we haven't - * committed it yet and the old unit struct will be found. - */ - - /* copy in the user's unit struct */ - err = ddi_copyin((caddr_t)(uintptr_t)mgp->mdp, (caddr_t)new_un, - (size_t)mgp->size, mode); - if (err) { - mddb_deleterec_wrapper(ms_recid); - rval = EFAULT; - goto out; - } - if (options & MD_CRO_FN) - new_un->c.un_revision |= MD_FN_META_DEV; - - /* - * allocate the real recids array. since we may have to - * commit underlying metadevice records, we need an - * array of size: total number of new components being - * attached + 2 (one for the stripe itself, one for the - * end marker). - */ - num_recs = 2; - rid = 0; - for (row = 0; row < new_un->un_nrows; row++) { - struct ms_row *mdr = &new_un->un_row[row]; - num_recs += mdr->un_ncomp; - } - recids = kmem_alloc(num_recs * sizeof (mddb_recid_t), KM_SLEEP); - recids[rid++] = ms_recid; - - /* - * Save a few of the new unit structs fields. - * Before they get clobbered. - */ - tb = new_un->c.un_total_blocks; - atb = new_un->c.un_actual_tb; - nr = new_un->un_nrows; - oc = new_un->un_ocomp; - rev = new_un->c.un_revision; - - /* - * Copy the old unit struct (static stuff) - * into new unit struct - */ - bcopy((caddr_t)un, (caddr_t)new_un, - sizeof (ms_unit_t) + ((nr - 2) * (sizeof (struct ms_row)))); - - /* - * Restore the saved stuff. - */ - new_un->c.un_total_blocks = tb; - md_nblocks_set(mnum, new_un->c.un_total_blocks); - new_un->c.un_actual_tb = atb; - new_un->un_nrows = nr; - new_un->un_ocomp = oc; - new_un->c.un_revision = rev; - - new_un->c.un_record_id = ms_recid; - new_un->c.un_size = mgp->size; - - /* All 64 bit metadevices only support EFI labels. */ - if (mgp->options & MD_CRO_64BIT) { - new_un->c.un_flag |= MD_EFILABEL; - /* - * If the device was previously smaller than a terabyte, - * and had a vtoc record attached to it, we remove the - * vtoc record, because the layout has changed completely. - */ - if (((un->c.un_revision & MD_64BIT_META_DEV) == 0) && - (un->c.un_vtoc_id != 0)) { - old_vtoc = un->c.un_vtoc_id; - new_un->c.un_vtoc_id = - md_vtoc_to_efi_record(old_vtoc, setno); - } - } - - /* - * Copy the old component structs into the new unit struct. - */ - mdcomp = (ms_comp_t *)((void *)&((char *)un)[un->un_ocomp]); - new_comp = (ms_comp_t *)((void *)&((char *)new_un)[new_un->un_ocomp]); - for (row = 0; row < un->un_nrows; row++) { - struct ms_row *mdr = &un->un_row[row]; - for (i = 0, c = mdr->un_icomp; i < mdr->un_ncomp; i++, c++) { - bcopy((caddr_t)&mdcomp[c], (caddr_t)&new_comp[c], - sizeof (ms_comp_t)); - } - } - - opened = md_unit_isopen(ui); - - /* - * Set parent on metadevices being added. - * Open the new devices being added. - * NOTE: currently soft partitions are the only metadevices - * which can appear within a stripe. - */ - for (row = un->un_nrows; row < new_un->un_nrows; row++) { - struct ms_row *mdr = &new_un->un_row[row]; - for (i = 0, c = mdr->un_icomp; i < mdr->un_ncomp; i++) { - struct ms_comp *mdc = &new_comp[c++]; - md_dev64_t comp_dev; - md_unit_t *comp_un; - - comp_dev = mdc->un_dev; - /* set parent on any metadevices */ - if (md_getmajor(comp_dev) == md_major) { - comp_un = MD_UNIT(md_getminor(comp_dev)); - recids[rid++] = MD_RECID(comp_un); - md_set_parent(comp_dev, MD_SID(new_un)); - } - - if (opened) { - md_dev64_t tmpdev = mdc->un_dev; - /* - * Open by device id - * Check if this comp is hotspared and - * if it is then use the key for hotspare - */ - tmpdev = md_resolve_bydevid(mnum, tmpdev, - mdc->un_mirror.ms_hs_id ? - mdc->un_mirror.ms_hs_key : mdc->un_key); - (void) md_layered_open(mnum, &tmpdev, - MD_OFLG_NULL); - mdc->un_dev = tmpdev; - mdc->un_mirror.ms_flags |= MDM_S_ISOPEN; - } - } - } - - /* set end marker */ - recids[rid] = 0; - /* commit new unit struct */ - mddb_commitrecs_wrapper(recids); - - /* delete old unit struct */ - mddb_deleterec_wrapper(un->c.un_record_id); - - /* place new unit in in-core array */ - md_nblocks_set(mnum, new_un->c.un_total_blocks); - MD_UNIT(mnum) = new_un; - - /* - * If old_vtoc has a non zero value, we know: - * - This unit crossed the border from smaller to larger one TB - * - There was a vtoc record for the unit, - * - This vtoc record is no longer needed, because - * a new efi record has been created for this un. - */ - if (old_vtoc != 0) { - mddb_deleterec_wrapper(old_vtoc); - } - - /* free recids array */ - kmem_free(recids, num_recs * sizeof (mddb_recid_t)); - - SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_GROW, SVM_TAG_METADEVICE, - MD_UN2SET(new_un), MD_SID(new_un)); - - /* release locks, return success */ -out: - for (i = npar - 1; (i >= 0); --i) - md_ioctl_writerexit(&plock[i]); - rw_exit(&md_unit_array_rw.lock); - if (plock != NULL) - kmem_free(plock, npar * sizeof (*plock)); - if (par != NULL) - kmem_free(par, npar * sizeof (*par)); - return (rval); -} - -static int -stripe_get_geom( - ms_unit_t *un, - struct dk_geom *geomp -) -{ - md_get_geom((md_unit_t *)un, geomp); - - return (0); -} - -static int -stripe_get_vtoc( - ms_unit_t *un, - struct vtoc *vtocp -) -{ - md_get_vtoc((md_unit_t *)un, vtocp); - - return (0); -} - -static int -stripe_set_vtoc( - ms_unit_t *un, - struct vtoc *vtocp -) -{ - return (md_set_vtoc((md_unit_t *)un, vtocp)); -} - -static int -stripe_get_extvtoc( - ms_unit_t *un, - struct extvtoc *vtocp -) -{ - md_get_extvtoc((md_unit_t *)un, vtocp); - - return (0); -} - -static int -stripe_set_extvtoc( - ms_unit_t *un, - struct extvtoc *vtocp -) -{ - return (md_set_extvtoc((md_unit_t *)un, vtocp)); -} - -static int -stripe_get_cgapart( - ms_unit_t *un, - struct dk_map *dkmapp -) -{ - md_get_cgapart((md_unit_t *)un, dkmapp); - return (0); -} - -static int -stripe_getdevs( - void *d, - int mode, - IOLOCK *lock -) -{ - minor_t mnum; - mdi_unit_t *ui; - ms_unit_t *un; - struct ms_row *mdr; - ms_comp_t *mdcomp, *mdc; - int r, c, i; - int cnt; - md_error_t *mdep; - md_dev64_t *devsp; - md_dev64_t unit_dev; - md_getdevs_params_t *mgdp = d; - - - mnum = mgdp->mnum; - mdep = &mgdp->mde; - - /* check out unit */ - mdclrerror(mdep); - - if ((MD_MIN2SET(mnum) >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits)) - return (mdmderror(mdep, MDE_INVAL_UNIT, mnum)); - - if ((ui = MDI_UNIT(mnum)) == NULL) { - return (mdmderror(mdep, MDE_UNIT_NOT_SETUP, mnum)); - } - - un = (ms_unit_t *)md_ioctl_readerlock(lock, ui); - - mdcomp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]); - devsp = (md_dev64_t *)(uintptr_t)mgdp->devs; - - for (cnt = 0, r = 0; (r < un->un_nrows); ++r) { - mdr = &un->un_row[r]; - for (c = 0, i = mdr->un_icomp; (c < mdr->un_ncomp); ++c) { - mdc = &mdcomp[i++]; - if (cnt < mgdp->cnt) { - unit_dev = mdc->un_dev; - if (md_getmajor(unit_dev) != md_major) { - if ((unit_dev = md_xlate_mini_2_targ - (unit_dev)) == NODEV64) - return (ENODEV); - } - - if (ddi_copyout((caddr_t)&unit_dev, devsp, - sizeof (*devsp), mode) != 0) - return (EFAULT); - ++devsp; - } - ++cnt; - } - } - mgdp->cnt = cnt; - return (0); -} - -static int -stripe_change( - md_stripe_params_t *msp, - IOLOCK *lock -) -{ - ms_params_t *pp = &msp->params; - minor_t mnum = msp->mnum; - ms_unit_t *un; - mdi_unit_t *ui; - int r, c, i; - struct ms_row *mdr; - ms_comp_t *mdcomp, *mdc; - mddb_recid_t recids[4]; - int irecid; - int inc_new_hsp = 0; - int err; - set_t setno = MD_MIN2SET(mnum); - - mdclrerror(&msp->mde); - - if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits)) - return (mdmderror(&msp->mde, MDE_INVAL_UNIT, mnum)); - - if (md_get_setstatus(setno) & MD_SET_STALE) - return (mdmddberror(&msp->mde, MDE_DB_STALE, mnum, setno)); - - if ((ui = MDI_UNIT(mnum)) == NULL) { - return (mdmderror(&msp->mde, MDE_UNIT_NOT_SETUP, mnum)); - } - - if (!pp->change_hsp_id) - return (0); - - un = (ms_unit_t *)md_ioctl_writerlock(lock, ui); - - /* verify that no hot spares are in use */ - mdcomp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]); - for (r = 0; r < un->un_nrows; r++) { - mdr = &un->un_row[r]; - for (c = 0, i = mdr->un_icomp; c < mdr->un_ncomp; c++) { - mdc = &mdcomp[i++]; - if (mdc->un_mirror.ms_hs_id != 0) { - return (mdmderror(&msp->mde, MDE_HS_IN_USE, - mnum)); - } - } - } - - recids[1] = 0; - recids[2] = 0; - irecid = 1; - if (pp->hsp_id != -1) { - /* increment the reference count of the new hsp */ - err = md_hot_spare_ifc(HSP_INCREF, pp->hsp_id, 0, 0, - &recids[1], NULL, NULL, NULL); - if (err) { - return (mdhsperror(&msp->mde, MDE_INVAL_HSP, - pp->hsp_id)); - } - inc_new_hsp = 1; - irecid++; - } - - if (un->un_hsp_id != -1) { - /* decrement the reference count of the old hsp */ - err = md_hot_spare_ifc(HSP_DECREF, un->un_hsp_id, 0, 0, - &recids[irecid], NULL, NULL, NULL); - if (err) { - err = mdhsperror(&msp->mde, MDE_INVAL_HSP, - pp->hsp_id); - if (inc_new_hsp) { - (void) md_hot_spare_ifc(HSP_DECREF, - pp->hsp_id, 0, 0, - &recids[1], NULL, NULL, NULL); - /* - * Don't need to commit the record, - * cause it never got commit before - */ - } - return (err); - } - } - - un->un_hsp_id = pp->hsp_id; - - recids[0] = un->c.un_record_id; - recids[3] = 0; - mddb_commitrecs_wrapper(recids); - SE_NOTIFY(EC_SVM_STATE, ESC_SVM_CHANGE, SVM_TAG_METADEVICE, - MD_UN2SET(un), MD_SID(un)); - - return (0); -} - -static int -stripe_admin_ioctl(int cmd, void *data, int mode, IOLOCK *lockp) -{ - size_t sz = 0; - void *d = NULL; - int err = 0; - - /* We can only handle 32-bit clients for internal commands */ - if ((mode & DATAMODEL_MASK) != DATAMODEL_ILP32) { - return (EINVAL); - } - - /* handle ioctl */ - switch (cmd) { - - case MD_IOCSET: - { - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (struct md_set_params); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = stripe_set(d, mode); - break; - } - - case MD_IOCGET: - { - if (! (mode & FREAD)) - return (EACCES); - - sz = sizeof (struct md_i_get); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = stripe_get(d, mode, lockp); - break; - } - - case MD_IOCRESET: - { - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (md_i_reset_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = stripe_reset((md_i_reset_t *)d); - break; - } - - case MD_IOCGROW: - { - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (struct md_grow_params); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = stripe_grow(d, mode, lockp); - break; - } - - case MD_IOCGET_DEVS: - { - if (! (mode & FREAD)) - return (EACCES); - - sz = sizeof (struct md_getdevs_params); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = stripe_getdevs(d, mode, lockp); - break; - } - - case MD_IOCCHANGE: - { - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (md_stripe_params_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = stripe_change((md_stripe_params_t *)d, lockp); - break; - } - - case MD_IOCREPLACE: - { - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (replace_params_t); - d = kmem_alloc(sz, KM_SLEEP); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = stripe_replace((replace_params_t *)d); - break; - } - - case MD_IOCPROBE_DEV: - { - /* - * Ignore the request since stripe is not - * a type of 'redundant' metadevice - */ - break; - } - - default: - return (ENOTTY); - } - - /* - * copyout and free any args - */ - if (sz != 0) { - if (err == 0) { - if (ddi_copyout(d, data, sz, mode) != 0) { - err = EFAULT; - } - } - kmem_free(d, sz); - } - return (err); -} - -/* - * The parameters of md_stripe_ioctl are defined by the ddi and so - * dev is of type dev_t and not md_dev64_t - */ -int -md_stripe_ioctl(dev_t dev, int cmd, void *data, int mode, IOLOCK *lockp) -{ - minor_t mnum = getminor(dev); - ms_unit_t *un; - int err = 0; - - /* handle admin ioctls */ - if (mnum == MD_ADM_MINOR) - return (stripe_admin_ioctl(cmd, data, mode, lockp)); - - /* check unit */ - if ((MD_MIN2SET(mnum) >= md_nsets) || - (MD_MIN2UNIT(mnum) >= md_nunits) || - ((un = MD_UNIT(mnum)) == NULL)) - return (ENXIO); - - /* is this a supported ioctl? */ - err = md_check_ioctl_against_unit(cmd, un->c); - if (err != 0) { - return (err); - } - - /* handle ioctl */ - switch (cmd) { - - case DKIOCINFO: - { - struct dk_cinfo *p; - - if (! (mode & FREAD)) - return (EACCES); - - p = kmem_alloc(sizeof (*p), KM_SLEEP); - - get_info(p, mnum); - if (ddi_copyout((caddr_t)p, data, sizeof (*p), mode) != 0) - err = EFAULT; - - kmem_free(p, sizeof (*p)); - return (err); - } - - case DKIOCGMEDIAINFO: - { - struct dk_minfo p; - - if (! (mode & FREAD)) - return (EACCES); - - get_minfo(&p, mnum); - if (ddi_copyout(&p, data, sizeof (struct dk_minfo), mode) != 0) - err = EFAULT; - - return (err); - } - - case DKIOCGGEOM: - { - struct dk_geom *p; - - if (! (mode & FREAD)) - return (EACCES); - - p = kmem_alloc(sizeof (*p), KM_SLEEP); - - if ((err = stripe_get_geom(un, p)) == 0) { - if (ddi_copyout((caddr_t)p, data, sizeof (*p), - mode) != 0) - err = EFAULT; - } - - kmem_free(p, sizeof (*p)); - return (err); - } - - case DKIOCGVTOC: - { - struct vtoc *vtoc; - - if (! (mode & FREAD)) - return (EACCES); - - vtoc = kmem_zalloc(sizeof (*vtoc), KM_SLEEP); - if ((err = stripe_get_vtoc(un, vtoc)) != 0) { - kmem_free(vtoc, sizeof (*vtoc)); - return (err); - } - - if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) { - if (ddi_copyout(vtoc, data, sizeof (*vtoc), mode)) - err = EFAULT; - } -#ifdef _SYSCALL32 - else { - struct vtoc32 *vtoc32; - - vtoc32 = kmem_zalloc(sizeof (*vtoc32), KM_SLEEP); - - vtoctovtoc32((*vtoc), (*vtoc32)); - if (ddi_copyout(vtoc32, data, sizeof (*vtoc32), mode)) - err = EFAULT; - kmem_free(vtoc32, sizeof (*vtoc32)); - } -#endif /* _SYSCALL32 */ - - kmem_free(vtoc, sizeof (*vtoc)); - return (err); - } - - case DKIOCSVTOC: - { - struct vtoc *vtoc; - - if (! (mode & FWRITE)) - return (EACCES); - - vtoc = kmem_zalloc(sizeof (*vtoc), KM_SLEEP); - if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) { - if (ddi_copyin(data, vtoc, sizeof (*vtoc), mode)) { - err = EFAULT; - } - } -#ifdef _SYSCALL32 - else { - struct vtoc32 *vtoc32; - - vtoc32 = kmem_zalloc(sizeof (*vtoc32), KM_SLEEP); - - if (ddi_copyin(data, vtoc32, sizeof (*vtoc32), mode)) { - err = EFAULT; - } else { - vtoc32tovtoc((*vtoc32), (*vtoc)); - } - kmem_free(vtoc32, sizeof (*vtoc32)); - } -#endif /* _SYSCALL32 */ - - if (err == 0) { - err = stripe_set_vtoc(un, vtoc); - } - - kmem_free(vtoc, sizeof (*vtoc)); - return (err); - } - - - case DKIOCGEXTVTOC: - { - struct extvtoc *extvtoc; - - if (! (mode & FREAD)) - return (EACCES); - - extvtoc = kmem_zalloc(sizeof (*extvtoc), KM_SLEEP); - if ((err = stripe_get_extvtoc(un, extvtoc)) != 0) { - kmem_free(extvtoc, sizeof (*extvtoc)); - return (err); - } - - if (ddi_copyout(extvtoc, data, sizeof (*extvtoc), mode)) - err = EFAULT; - - kmem_free(extvtoc, sizeof (*extvtoc)); - return (err); - } - - case DKIOCSEXTVTOC: - { - struct extvtoc *extvtoc; - - if (! (mode & FWRITE)) - return (EACCES); - - extvtoc = kmem_zalloc(sizeof (*extvtoc), KM_SLEEP); - if (ddi_copyin(data, extvtoc, sizeof (*extvtoc), mode)) { - err = EFAULT; - } - - if (err == 0) { - err = stripe_set_extvtoc(un, extvtoc); - } - - kmem_free(extvtoc, sizeof (*extvtoc)); - return (err); - } - - case DKIOCGAPART: - { - struct dk_map dmp; - - if ((err = stripe_get_cgapart(un, &dmp)) != 0) { - return (err); - } - - if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) { - if (ddi_copyout((caddr_t)&dmp, data, sizeof (dmp), - mode) != 0) - err = EFAULT; - } -#ifdef _SYSCALL32 - else { - struct dk_map32 dmp32; - - dmp32.dkl_cylno = dmp.dkl_cylno; - dmp32.dkl_nblk = dmp.dkl_nblk; - - if (ddi_copyout((caddr_t)&dmp32, data, sizeof (dmp32), - mode) != 0) - err = EFAULT; - } -#endif /* _SYSCALL32 */ - - return (err); - } - case DKIOCGETEFI: - { - /* - * This one can be done centralized, - * no need to put in the same code for all types of metadevices - */ - return (md_dkiocgetefi(mnum, data, mode)); - } - case DKIOCSETEFI: - { - /* - * This one can be done centralized, - * no need to put in the same code for all types of metadevices - */ - return (md_dkiocsetefi(mnum, data, mode)); - } - case DKIOCPARTITION: - { - return (md_dkiocpartition(mnum, data, mode)); - } - - default: - return (ENOTTY); - } -} - -/* - * rename named service entry points and support functions - */ - -/* - * rename/exchange role swap functions are handled generically - */ - -/* - * support routine for MDRNM_CHECK - */ -static int -stripe_may_renexch_self( - ms_unit_t *un, - mdi_unit_t *ui, - md_rentxn_t *rtxnp) -{ - minor_t from_min; - minor_t to_min; - bool_t toplevel; - bool_t related; - - ASSERT(rtxnp); - ASSERT((rtxnp->op == MDRNOP_RENAME) || (rtxnp->op == MDRNOP_EXCHANGE)); - - from_min = rtxnp->from.mnum; - to_min = rtxnp->to.mnum; - - if (!un || !ui) { - (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR, - from_min); - return (EINVAL); - } - - ASSERT(!(MD_CAPAB(un) & MD_CAN_META_CHILD)); - if (MD_CAPAB(un) & MD_CAN_META_CHILD) { - (void) mdmderror(&rtxnp->mde, MDE_RENAME_SOURCE_BAD, from_min); - return (EINVAL); - } - - if (MD_PARENT(un) == MD_MULTI_PARENT) { - (void) mdmderror(&rtxnp->mde, MDE_RENAME_SOURCE_BAD, from_min); - return (EINVAL); - } - - toplevel = !MD_HAS_PARENT(MD_PARENT(un)); - - /* we're related if trying to swap with our parent */ - related = (!toplevel) && (MD_PARENT(un) == to_min); - - switch (rtxnp->op) { - case MDRNOP_EXCHANGE: - - if (!related) { - (void) mdmderror(&rtxnp->mde, - MDE_RENAME_TARGET_UNRELATED, to_min); - return (EINVAL); - } - - break; - - case MDRNOP_RENAME: - /* - * if from is top-level and is open, then the kernel is using - * the device and we return EBUSY. - */ - - if (toplevel && md_unit_isopen(ui)) { - (void) mdmderror(&rtxnp->mde, MDE_RENAME_BUSY, - from_min); - return (EBUSY); - } - break; - - default: - (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR, - from_min); - return (EINVAL); - } - - return (0); /* ok */ -} - -/* - * Named service entry point: MDRNM_CHECK - */ -intptr_t -stripe_rename_check( - md_rendelta_t *delta, - md_rentxn_t *rtxnp) -{ - int err = 0; - - ASSERT(delta); - ASSERT(rtxnp); - ASSERT(delta->unp); - ASSERT(delta->uip); - ASSERT((rtxnp->op == MDRNOP_RENAME) || (MDRNOP_EXCHANGE == rtxnp->op)); - - if (!delta || !rtxnp || !delta->uip || !delta->unp) { - (void) mdsyserror(&rtxnp->mde, EINVAL); - return (EINVAL); - } - - /* self does additional checks */ - if (delta->old_role == MDRR_SELF) { - err = stripe_may_renexch_self((ms_unit_t *)delta->unp, - delta->uip, rtxnp); - } -out: - return (err); -} -/* end of rename/exchange */ diff --git a/usr/src/uts/common/io/lvm/trans/mdtrans.c b/usr/src/uts/common/io/lvm/trans/mdtrans.c deleted file mode 100644 index cfa0c1ca7cff..000000000000 --- a/usr/src/uts/common/io/lvm/trans/mdtrans.c +++ /dev/null @@ -1,1263 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - * Copyright (c) 2011 Bayard G. Bell. All rights reserved. - * Copyright 2012 Milan Jurik. All rights reserved. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include -#include - -md_ops_t trans_md_ops; -#ifndef lint -md_ops_t *md_interface_ops = &trans_md_ops; -#endif /* lint */ - -extern unit_t md_nunits; -extern set_t md_nsets; -extern md_set_t md_set[]; -extern int md_status; -extern major_t md_major; - -extern int md_trans_ioctl(dev_t, int, void *, int, IOLOCK *); -extern md_krwlock_t md_unit_array_rw; - -extern mdq_anchor_t md_done_daemon; - -extern int md_in_upgrade; - -static kmem_cache_t *trans_parent_cache = NULL; -kmem_cache_t *trans_child_cache = NULL; - -#ifdef DEBUG -/* - * ROUTINES FOR TESTING: - */ -static int -_init_debug() -{ - extern int _init_ioctl(); - - return (_init_ioctl()); -} -static int -_fini_debug() -{ - extern int _fini_ioctl(); - int err = 0; - - err = _fini_ioctl(); - return (err); -} - -#endif /* DEBUG */ - -/* - * BEGIN RELEASE DEBUG - * The following routines remain in the released product for testability - */ -int -trans_done_shadow(buf_t *bp) -{ - buf_t *pb; - md_tps_t *ps = (md_tps_t *)bp->b_chain; - int rv = 0; - - pb = ps->ps_bp; - mutex_enter(&ps->ps_mx); - ps->ps_count--; - if (ps->ps_count > 0) { - if ((bp->b_flags & B_ERROR) != 0) { - pb->b_flags |= B_ERROR; - pb->b_error = bp->b_error; - } - mutex_exit(&ps->ps_mx); - kmem_cache_free(trans_child_cache, bp); - } else { - mutex_exit(&ps->ps_mx); - mutex_destroy(&ps->ps_mx); - rv = trans_done(bp); - } - return (rv); -} - -static void -shadow_debug(mt_unit_t *un, /* trans unit info */ - buf_t *pb, /* primary buffer */ - md_tps_t *ps, /* trans parent save */ - buf_t *cb, /* buffer for writing to master */ - int flag, - void *private) -{ - buf_t *sb; /* Shadow buffer */ - - mutex_init(&ps->ps_mx, NULL, MUTEX_DEFAULT, NULL); - ps->ps_count = 2; /* Write child buffer & shadow */ - cb->b_iodone = trans_done_shadow; - sb = kmem_cache_alloc(trans_child_cache, MD_ALLOCFLAGS); - trans_child_init(sb); - sb = bioclone(pb, 0, pb->b_bcount, md_dev64_to_dev(un->un_s_dev), - pb->b_blkno, trans_done_shadow, sb, KM_NOSLEEP); - - sb->b_flags |= B_ASYNC; - sb->b_chain = (void *)ps; - md_call_strategy(sb, flag | MD_STR_MAPPED, private); -} -/* - * END RELEASE DEBUG - */ - -/* - * COMMON MEMORY ALLOCATION ROUTINES (so that we can discover leaks) - */ -void * -md_trans_zalloc(size_t nb) -{ - TRANSSTATS(ts_trans_zalloc); - TRANSSTATSADD(ts_trans_alloced, nb); - return (kmem_zalloc(nb, KM_SLEEP)); -} -void * -md_trans_alloc(size_t nb) -{ - TRANSSTATS(ts_trans_alloc); - TRANSSTATSADD(ts_trans_alloced, nb); - return (kmem_alloc(nb, KM_SLEEP)); -} -void -md_trans_free(void *va, size_t nb) -{ - TRANSSTATS(ts_trans_free); - TRANSSTATSADD(ts_trans_freed, nb); - if (nb) - kmem_free(va, nb); -} - -static void -trans_parent_init(md_tps_t *ps) -{ - bzero(ps, sizeof (md_tps_t)); -} - -/*ARGSUSED1*/ -int -trans_child_constructor(void *p, void *d1, int d2) -{ - bioinit(p); - return (0); -} - -void -trans_child_init(struct buf *bp) -{ - md_bioreset(bp); -} - -/*ARGSUSED1*/ -void -trans_child_destructor(void *p, void *d) -{ - biofini(p); -} - -void -trans_commit(mt_unit_t *un, int domstr) -{ - mddb_recid_t recids[4]; - md_unit_t *su; - int ri = 0; - - if (md_get_setstatus(MD_UN2SET(un)) & MD_SET_STALE) - return; - - recids[ri++] = un->c.un_record_id; - - if (domstr) - if (md_getmajor(un->un_m_dev) == md_major) { - su = MD_UNIT(md_getminor(un->un_m_dev)); - recids[ri++] = su->c.un_record_id; - } - - if (ri == 0) - return; - recids[ri] = 0; - - uniqtime32(&un->un_timestamp); - mddb_commitrecs_wrapper(recids); -} - -void -trans_close_all_devs(mt_unit_t *un) -{ - if ((un->un_flags & TRANS_NEED_OPEN) == 0) { - md_layered_close(un->un_m_dev, MD_OFLG_NULL); - if (un->un_l_unit) - ldl_close_dev(un->un_l_unit); - un->un_flags |= TRANS_NEED_OPEN; - } -} - -int -trans_open_all_devs(mt_unit_t *un) -{ - int err; - minor_t mnum = MD_SID(un); - md_dev64_t tmpdev = un->un_m_dev; - set_t setno = MD_MIN2SET(MD_SID(un)); - side_t side = mddb_getsidenum(setno); - - /* - * Do the open by device id if it is regular device - */ - if ((md_getmajor(tmpdev) != md_major) && - md_devid_found(setno, side, un->un_m_key) == 1) { - tmpdev = md_resolve_bydevid(mnum, tmpdev, un->un_m_key); - } - err = md_layered_open(mnum, &tmpdev, MD_OFLG_NULL); - un->un_m_dev = tmpdev; - - if (err) - return (ENXIO); - - if (un->un_l_unit) { - err = ldl_open_dev(un, un->un_l_unit); - if (err) { - md_layered_close(tmpdev, MD_OFLG_NULL); - return (ENXIO); - } - } - return (0); -} - -uint_t mt_debug = 0; - -int -trans_build_incore(void *p, int snarfing) -{ - mt_unit_t *un = (mt_unit_t *)p; - minor_t mnum; - set_t setno; - - /* - * initialize debug mode and always start with no shadowing. - */ - if (!snarfing) - un->un_debug = mt_debug; - un->un_s_dev = NODEV64; - - mnum = MD_SID(un); - - if (MD_UNIT(mnum) != NULL) - return (0); - - setno = MD_MIN2SET(mnum); - - /* - * If snarfing the metatrans device, - * then remake the device number - */ - if (snarfing) { - un->un_m_dev = md_getdevnum(setno, mddb_getsidenum(setno), - un->un_m_key, MD_NOTRUST_DEVT); - } - - /* - * db rec is partially deleted; finish the db delete later - */ - if (MD_STATUS(un) & MD_UN_BEING_RESET) { - mddb_setrecprivate(un->c.un_record_id, MD_PRV_PENDCLEAN); - return (1); - } - - /* - * With the current device id implementation there is possibility - * that we may have NODEV if the underlying can't be resolved at - * snarf time. If this is the case we want to be consistent with - * the normal behavior and continue to allow the snarf of unit - * and resolve the devt at the open time - */ - if ((md_getmajor(un->un_m_dev) == md_major) && - (md_dev_exists(un->un_m_dev) == 0)) { - return (1); - } - - /* - * retain the detach status; reset open status - */ - un->un_flags &= (TRANS_DETACHING | TRANS_DETACHED); - un->un_flags |= TRANS_NEED_OPEN; - if ((un->un_flags & TRANS_DETACHED) == 0) - un->un_flags |= TRANS_ATTACHING; - - /* - * log device not set up yet; try again later - */ - if ((un->un_flags & TRANS_DETACHED) == 0) - if (ldl_findlog(un->un_l_recid) == NULL) - return (1); - - /* - * initialize incore fields - */ - un->un_next = NULL; - un->un_l_unit = NULL; - un->un_deltamap = NULL; - un->un_udmap = NULL; - un->un_logmap = NULL; - un->un_matamap = NULL; - un->un_shadowmap = NULL; - un->un_ut = NULL; - un->un_logreset = 0; - un->un_dev = md_makedevice(md_major, mnum); - MD_STATUS(un) = 0; - - /* necessary because capability didn't exist pre-4.1 */ - MD_CAPAB(un) = (MD_CAN_META_CHILD & ~MD_CAN_PARENT); - - /* - * attach the log - */ - trans_attach(un, 0); - - /* - * check for master dev dynconcat - */ - if (md_getmajor(un->un_m_dev) == md_major) { - struct mdc_unit *c; - - c = MD_UNIT(md_getminor(un->un_m_dev)); - un->c.un_total_blocks = c->un_total_blocks; - } - - /* place various information in the in-core data structures */ - md_nblocks_set(mnum, un->c.un_total_blocks); - MD_UNIT(mnum) = un; - - return (0); -} - -int -trans_detach(mt_unit_t *un, int force) -{ - mdi_unit_t *ui = MDI_UNIT(MD_SID(un)); - int error = 0; - - /* - * The caller is responsible for single-threading this routine. - */ - - if (ui == NULL) - return (0); - - /* - * already detached or the log isn't attached yet; do nothing - */ - if (un->un_flags & (TRANS_DETACHED | TRANS_ATTACHING)) - return (0); - - /* - * set state to detaching - */ - if (force || !md_unit_isopen(ui)) { - un->un_flags |= TRANS_DETACHING; - if (!MD_UPGRADE) { - trans_commit(un, 0); - } - SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DETACHING, TAG_METADEVICE, - MD_UN2SET(un), MD_SID(un)); - } - - /* - * device is busy - */ - if (md_unit_isopen(ui)) - return (EBUSY); - - /* - * detach the log - * if successful - * flags committed to TRANS_DETACHED in database - * un->un_l_unit set to NULL - * no error returned - */ - error = ldl_reset(un, 1, force); - if (error) - return (error); - - /* - * commit to database - */ - if (!MD_UPGRADE) { - trans_commit(un, 0); - } - SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DETACH, TAG_METADEVICE, MD_UN2SET(un), - MD_SID(un)); - - return (0); -} - -void -trans_attach(mt_unit_t *un, int attaching) -{ - mdi_unit_t *ui = MDI_UNIT(MD_SID(un)); - ml_unit_t *ul; - - /* - * called from snarf, set, and attach. Hence, the attaching param - * The caller is responsible for single-threading this routine. - */ - - /* - * not attaching; do nothing - */ - if ((un->un_flags & TRANS_ATTACHING) == 0) - return; - - /* - * find log unit struct - */ - ul = ldl_findlog(un->un_l_recid); - if (ul == NULL) - return; - un->un_l_dev = ul->un_dev; - - /* - * device is busy; do nothing - */ - if (attaching && md_unit_isopen(ui)) - return; - /* - * other functions use non-NULL un_l_unit as detach/attach flag - */ - un->un_l_unit = ul; - - /* - * add metatrans device to the log's list of mt devices - */ - ldl_utadd(un); - - /* - * attached - */ - un->un_flags &= ~TRANS_ATTACHING; - -} - -int -trans_reset(mt_unit_t *un, minor_t mnum, int removing, int force) -{ - sv_dev_t sv; - mddb_recid_t vtoc_id; - int error = 0; - - /* - * reset log, maps, and ufs interface - */ - error = ldl_reset(un, removing, force); - if (error) - return (error); - - /* - * done with underyling devices - */ - trans_close_all_devs(un); - - md_destroy_unit_incore(mnum, &trans_md_ops); - - md_nblocks_set(mnum, -1ULL); - MD_UNIT(mnum) = NULL; - - if (!removing) - return (0); - - md_reset_parent(un->un_m_dev); - MD_STATUS(un) |= MD_UN_BEING_RESET; - trans_commit(un, 1); - SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DELETE, TAG_METADEVICE, MD_UN2SET(un), - MD_SID(un)); - - /* Save the mstr key */ - sv.setno = MD_MIN2SET(mnum); - sv.key = un->un_m_key; - - vtoc_id = un->c.un_vtoc_id; - - mddb_deleterec_wrapper(un->c.un_record_id); - - /* Remove the vtoc, if present */ - if (vtoc_id) - mddb_deleterec_wrapper(vtoc_id); - md_rem_names(&sv, 1); - return (0); -} - -static void -trans_wait_panic(struct buf *cb) -{ - while ((cb->b_flags & B_DONE) == 0) { - md_daemon(1, &md_done_daemon); - drv_usecwait(10); - } -} - -static void -trans_error(md_tps_t *ps) -{ - md_dev64_t md_dev; - md_dev64_t m_dev; - char *str; - struct buf *pb; - mdi_unit_t *ui; - - pb = ps->ps_bp; - ui = ps->ps_ui; - - /* - * gather up params for cmn_err - */ - if (pb->b_flags & B_READ) - str = "read"; - else - str = "write"; - md_dev = md_expldev(pb->b_edev); - m_dev = ps->ps_un->un_m_dev; - - /* - * free up the resources for this request and done the errored buf - */ - md_kstat_done(ui, pb, 0); - kmem_cache_free(trans_parent_cache, ps); - md_unit_readerexit(ui); - md_biodone(pb); - - /* - * print pretty error message - */ - cmn_err(CE_WARN, "md: %s: %s error on %s", - md_shortname(md_getminor(md_dev)), str, - md_devname(MD_DEV2SET(md_dev), m_dev, NULL, 0)); -} - -int -trans_done(struct buf *cb) -{ - struct buf *pb; - mdi_unit_t *ui; - md_tps_t *ps; - - ps = (md_tps_t *)cb->b_chain; - pb = ps->ps_bp; - ui = ps->ps_ui; - - if (cb->b_flags & B_ERROR) { - pb->b_flags |= B_ERROR; - pb->b_error = cb->b_error; - /* - * device not in hard error state; report error - */ - if (!ldl_isherror(ps->ps_un->un_l_unit)) { - daemon_request(&md_done_daemon, trans_error, - (daemon_queue_t *)ps, REQ_OLD); - - if (cb->b_flags & B_REMAPPED) - bp_mapout(cb); - if (panicstr) - cb->b_flags |= B_DONE; - else - kmem_cache_free(trans_child_cache, cb); - - return (1); - } - } - - if (cb->b_flags & B_REMAPPED) - bp_mapout(cb); - - if (panicstr) - cb->b_flags |= B_DONE; - else - kmem_cache_free(trans_child_cache, cb); - kmem_cache_free(trans_parent_cache, ps); - md_kstat_done(ui, pb, 0); - md_unit_readerexit(ui); - md_biodone(pb); - - return (0); -} - -static void -md_trans_strategy(buf_t *pb, int flag, void *private) -{ - md_tps_t *ps; - buf_t *cb; /* child buf pointer */ - mt_unit_t *un; - mdi_unit_t *ui; - - ui = MDI_UNIT(getminor(pb->b_edev)); - - md_kstat_waitq_enter(ui); - - un = (mt_unit_t *)md_unit_readerlock(ui); - - if (md_inc_iocount(MD_MIN2SET(getminor(pb->b_edev))) != 0) { - pb->b_flags |= B_ERROR; - pb->b_error = ENXIO; - pb->b_resid = pb->b_bcount; - md_kstat_waitq_exit(ui); - md_unit_readerexit(ui); - biodone(pb); - return; - } - - ASSERT(!(flag & MD_STR_NOTTOP)); - - /* check and map */ - if (md_checkbuf(ui, (md_unit_t *)un, pb) != 0) { - md_kstat_waitq_exit(ui); - return; - } - - bp_mapin(pb); - - ps = kmem_cache_alloc(trans_parent_cache, MD_ALLOCFLAGS); - trans_parent_init(ps); - - /* - * Save essential information from the original buffhdr - * in the md_save structure. - */ - ps->ps_un = un; - ps->ps_ui = ui; - ps->ps_bp = pb; - - cb = kmem_cache_alloc(trans_child_cache, MD_ALLOCFLAGS); - trans_child_init(cb); - - cb = bioclone(pb, 0, pb->b_bcount, md_dev64_to_dev(un->un_m_dev), - pb->b_blkno, trans_done, cb, KM_NOSLEEP); - - cb->b_chain = (void *)ps; - - /* - * RELEASE DEBUG - * The following calls shadow debug for testing purposes if we are - * writing and if shadowing is turned on. - */ - if ((un->un_s_dev != NODEV64) && - ((pb->b_flags & B_READ) == 0)) - shadow_debug(un, pb, ps, cb, flag, private); - - md_kstat_waitq_to_runq(ui); - - (void) md_call_strategy(cb, flag | MD_STR_MAPPED | MD_NOBLOCK, private); - - /* - * panic in progress; process daemon queues - */ - if (panicstr) { - trans_wait_panic(cb); - kmem_cache_free(trans_child_cache, cb); - } -} - -/* ARGSUSED */ -static int -md_trans_read(dev_t dev, struct uio *uio, cred_t *credp) -{ - int error; - - if ((error = md_chk_uio(uio)) != 0) - return (error); - - return (physio(mdstrategy, NULL, dev, B_READ, minphys, uio)); -} - -/* ARGSUSED */ -static int -md_trans_aread(dev_t dev, struct aio_req *aio, cred_t *credp) -{ - int error; - - if ((error = md_chk_uio(aio->aio_uio)) != 0) - return (error); - - return (aphysio(mdstrategy, anocancel, dev, B_READ, minphys, aio)); -} - -/* ARGSUSED */ -static int -md_trans_write(dev_t dev, struct uio *uio, cred_t *credp) -{ - int error; - - if ((error = md_chk_uio(uio)) != 0) - return (error); - - return (physio(mdstrategy, NULL, dev, B_WRITE, minphys, uio)); -} - -/* ARGSUSED */ -static int -md_trans_awrite(dev_t dev, struct aio_req *aio, cred_t *credp) -{ - int error; - - if ((error = md_chk_uio(aio->aio_uio)) != 0) - return (error); - - return (aphysio(mdstrategy, anocancel, dev, B_WRITE, minphys, aio)); -} - -static void -trans_cleanup(mt_unit_t *un) -{ - sv_dev_t sv; - - MD_STATUS(un) |= MD_UN_LOG_DELETED; - trans_commit(un, 0); - - /* Save the mstr key */ - sv.setno = MD_UN2SET(un); - sv.key = un->un_m_key; - - mddb_deleterec_wrapper(un->c.un_record_id); - - md_rem_names(&sv, 1); -} - -static int -trans_snarf(md_snarfcmd_t cmd, set_t setno) -{ - mt_unit_t *un; - ml_unit_t *ul; - mddb_recid_t recid; - int gotsomething; - mddb_type_t typ1; - int all_trans_gotten; - mddb_de_ic_t *dep; - mddb_rb32_t *rbp; - size_t newreqsize; - static int trans_found = 0; - - - - if (cmd == MD_SNARF_CLEANUP) { - - if (md_get_setstatus(setno) & MD_SET_STALE) - return (0); - - /* - * clean up partially cleared trans devices - */ - typ1 = (mddb_type_t)md_getshared_key(setno, - trans_md_ops.md_driver.md_drivername); - recid = mddb_makerecid(setno, 0); - while ((recid = mddb_getnextrec(recid, typ1, TRANS_REC)) > 0) { - un = (mt_unit_t *)mddb_getrecaddr(recid); - (void) trans_detach(un, 1); - if (mddb_getrecprivate(recid) & MD_PRV_CLEANUP) { - trans_cleanup(un); - recid = mddb_makerecid(setno, 0); - } - } - /* - * clean up partially cleared log devices - */ - recid = mddb_makerecid(setno, 0); - while ((recid = mddb_getnextrec(recid, typ1, LOG_REC)) > 0) { - if (mddb_getrecprivate(recid) & MD_PRV_CLEANUP) { - ul = (ml_unit_t *)mddb_getrecaddr(recid); - ldl_cleanup(ul); - recid = mddb_makerecid(setno, 0); - } - } - - return (0); - } - - /* - * must snarf up the log devices first - */ - gotsomething = 0; - all_trans_gotten = 1; - typ1 = (mddb_type_t)md_getshared_key(setno, - trans_md_ops.md_driver.md_drivername); - recid = mddb_makerecid(setno, 0); - while ((recid = mddb_getnextrec(recid, typ1, LOG_REC)) > 0) { - ml_unit_t *big_ul; - ml_unit32_od_t *small_ul; - - if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) - continue; - - small_ul = (ml_unit32_od_t *)mddb_getrecaddr(recid); - dep = mddb_getrecdep(recid); - dep->de_flags = MDDB_F_TRANS_LOG; - rbp = dep->de_rb; - /* - * As trans records are always old records, - * we have to check if this record already has been converted. - * We don't want to do that work twice. - */ - if ((rbp->rb_private & MD_PRV_CONVD) == 0) { - newreqsize = sizeof (ml_unit_t); - big_ul = (ml_unit_t *)kmem_zalloc(newreqsize, KM_SLEEP); - trans_log_convert((caddr_t)small_ul, (caddr_t)big_ul, - SMALL_2_BIG); - kmem_free(small_ul, dep->de_reqsize); - /* - * Update userdata and incore userdata - * incores are at the end of ul - */ - dep->de_rb_userdata_ic = big_ul; - dep->de_rb_userdata = big_ul; - dep->de_icreqsize = newreqsize; - rbp->rb_private |= MD_PRV_CONVD; - ul = big_ul; - } else { - /* already converted, just set the pointer */ - ul = dep->de_rb_userdata; - } - all_trans_gotten = 0; - if (ldl_build_incore(ul, 1) == 0) { - mddb_setrecprivate(recid, MD_PRV_GOTIT); - gotsomething = 1; - } - } - - /* - * now snarf up metatrans devices - */ - gotsomething = 0; - recid = mddb_makerecid(setno, 0); - while ((recid = mddb_getnextrec(recid, typ1, TRANS_REC)) > 0) { - mt_unit_t *big_un; - mt_unit32_od_t *small_un; - - if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) - continue; - - if ((trans_found == 0) && (!MD_UPGRADE)) { - cmn_err(CE_WARN, MD_EOF_TRANS_MSG MD_EOF_TRANS_WARNING); - trans_found = 1; - } - - small_un = (mt_unit32_od_t *)mddb_getrecaddr(recid); - - dep = mddb_getrecdep(recid); - dep->de_flags = MDDB_F_TRANS_MASTER; - rbp = dep->de_rb; - /* - * As trans records are always old records, - * we have to check if this record already has been converted. - * We don't want to do that work twice. - */ - if ((rbp->rb_private & MD_PRV_CONVD) == 0) { - newreqsize = sizeof (mt_unit_t); - big_un = (mt_unit_t *)kmem_zalloc(newreqsize, KM_SLEEP); - trans_master_convert((caddr_t)small_un, (caddr_t)big_un, - SMALL_2_BIG); - kmem_free(small_un, dep->de_reqsize); - /* - * Update userdata and incore userdata - * incores are at the end of ul - */ - dep->de_rb_userdata_ic = big_un; - dep->de_rb_userdata = big_un; - dep->de_icreqsize = newreqsize; - rbp->rb_private |= MD_PRV_CONVD; - un = big_un; - un->c.un_revision &= ~MD_64BIT_META_DEV; - } else { - /* already converted, just set the pointer */ - un = dep->de_rb_userdata; - } - - /* - * Create minor node for snarfed entry. - */ - (void) md_create_minor_node(MD_MIN2SET(MD_SID(un)), MD_SID(un)); - - if (MD_UNIT(MD_SID(un)) != NULL) { - mddb_setrecprivate(recid, MD_PRV_PENDDEL); - continue; - } - - all_trans_gotten = 0; - if (trans_build_incore(un, 1) == 0) { - mddb_setrecprivate(recid, MD_PRV_GOTIT); - md_create_unit_incore(MD_SID(un), &trans_md_ops, 0); - gotsomething = 1; - } - } - - if (!all_trans_gotten) - return (gotsomething); - - recid = mddb_makerecid(setno, 0); - while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0) - if (!(mddb_getrecprivate(recid) & MD_PRV_GOTIT)) - mddb_setrecprivate(recid, MD_PRV_PENDDEL); - return (0); -} - -static int -trans_halt(md_haltcmd_t cmd, set_t setno) -{ - unit_t i; - mdi_unit_t *ui; - minor_t mnum; - mt_unit_t *un; - - if (cmd == MD_HALT_CLOSE) { - for (i = 0; i < md_nunits; i++) { - mnum = MD_MKMIN(setno, i); - if ((ui = MDI_UNIT(mnum)) == NULL) - continue; - if (ui->ui_opsindex != trans_md_ops.md_selfindex) - continue; - if (md_unit_isopen(ui)) { - return (1); - } - } - for (i = 0; i < md_nunits; i++) { - mnum = MD_MKMIN(setno, i); - if ((ui = MDI_UNIT(mnum)) == NULL) - continue; - if (ui->ui_opsindex != trans_md_ops.md_selfindex) - continue; - un = (mt_unit_t *)MD_UNIT(mnum); - if ((un->un_flags & TRANS_NEED_OPEN) == 0) { - trans_close_all_devs(un); - } - } - return (0); - } - - if (cmd == MD_HALT_OPEN) { - for (i = 0; i < md_nunits; i++) { - mnum = MD_MKMIN(setno, i); - if ((ui = MDI_UNIT(mnum)) == NULL) - continue; - if (ui->ui_opsindex != trans_md_ops.md_selfindex) - continue; - ldl_open_underlying((mt_unit_t *)MD_UNIT(mnum)); - } - return (0); - } - - if (cmd == MD_HALT_CHECK) { - for (i = 0; i < md_nunits; i++) { - mnum = MD_MKMIN(setno, i); - if ((ui = MDI_UNIT(mnum)) == NULL) - continue; - if (ui->ui_opsindex != trans_md_ops.md_selfindex) - continue; - if (md_unit_isopen(ui)) { - return (1); - } - } - return (0); - } - if (cmd == MD_HALT_DOIT) { - for (i = 0; i < md_nunits; i++) { - mnum = MD_MKMIN(setno, i); - if ((ui = MDI_UNIT(mnum)) == NULL) - continue; - if (ui->ui_opsindex != trans_md_ops.md_selfindex) - continue; - (void) trans_reset((mt_unit_t *)MD_UNIT(mnum), mnum, - 0, 1); - } - return (0); - } - if (cmd == MD_HALT_UNLOAD) - return (0); - - return (1); -} - -/*ARGSUSED3*/ -static int -trans_open( - dev_t *dev, - int flag, - int otyp, - cred_t *cred_p, - int md_oflags -) -{ - minor_t mnum = getminor(*dev); - mdi_unit_t *ui = MDI_UNIT(mnum); - mt_unit_t *un; - int err; - - /* disallow layered opens (e.g., PrestoServe) */ - if (otyp == OTYP_LYR) - return (EINVAL); - - /* single thread */ - un = (mt_unit_t *)md_unit_openclose_enter(ui); - - /* if already open, count open, return success */ - if (md_unit_isopen(ui)) { - err = md_unit_incopen(mnum, flag, otyp); - md_unit_openclose_exit(ui); - if (err != 0) - return (err); - return (0); - } - - /* - * For some reason, not all of the metatrans devices attached to - * this log were openable at snarf; try again now. All of the - * underlying devices have to be openable for the roll thread to work. - */ - if (un->un_flags & TRANS_NEED_OPEN) { - md_unit_openclose_exit(ui); - ldl_open_underlying(un); - if (un->un_flags & TRANS_NEED_OPEN) - return (EINVAL); - un = (mt_unit_t *)md_unit_openclose_enter(ui); - } - - /* count open */ - err = md_unit_incopen(mnum, flag, otyp); - md_unit_openclose_exit(ui); - if (err != 0) - return (err); - - /* return success */ - return (0); -} - -/*ARGSUSED1*/ -static int -trans_close( - dev_t dev, - int flag, - int otyp, - cred_t *cred_p, - int md_oflags -) -{ - minor_t mnum = getminor(dev); - mdi_unit_t *ui = MDI_UNIT(mnum); - mt_unit_t *un; - int err = 0; - - /* single thread */ - un = (mt_unit_t *)md_unit_openclose_enter(ui); - - /* count closed */ - if ((err = md_unit_decopen(mnum, otyp)) != 0) { - md_unit_openclose_exit(ui); - return (err); - } - - /* if still open */ - if (md_unit_isopen(ui)) { - md_unit_openclose_exit(ui); - return (0); - } - md_unit_openclose_exit(ui); - - if (un->un_flags & TRANS_DETACHING) { - /* - * prevent new opens and try to detach the log - */ - rw_enter(&md_unit_array_rw.lock, RW_WRITER); - (void) trans_detach(un, 0); - rw_exit(&md_unit_array_rw.lock); - } - if (un->un_flags & TRANS_ATTACHING) { - /* - * prevent new opens and try to attach the log - */ - rw_enter(&md_unit_array_rw.lock, RW_WRITER); - trans_attach(un, 1); - rw_exit(&md_unit_array_rw.lock); - } - - return (0); -} - -static int -trans_imp_set( - set_t setno -) -{ - mt_unit32_od_t *un32; - ml_unit32_od_t *ul32; - mddb_recid_t recid; - int gotsomething = 0; - mddb_type_t typ1; - minor_t *self_id; /* minor needs to be updated */ - mddb_recid_t *record_id; /* record id needs to be updated */ - - /* - * Do log first if there is any - * Note that trans record is always 32 bit - */ - typ1 = (mddb_type_t)md_getshared_key(setno, - trans_md_ops.md_driver.md_drivername); - recid = mddb_makerecid(setno, 0); - - while ((recid = mddb_getnextrec(recid, typ1, LOG_REC)) > 0) { - if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) - continue; - - ul32 = (ml_unit32_od_t *)mddb_getrecaddr(recid); - - /* - * Trans log record always is old format - * Go ahead update the record with the new set info - */ - record_id = &(ul32->un_recid); - - /* - * Mark the record and update it - */ - *record_id = MAKERECID(setno, DBID(*record_id)); - if (!md_update_minor(setno, mddb_getsidenum - (setno), ul32->un_key)) - goto out; - mddb_setrecprivate(recid, MD_PRV_GOTIT); - } - - - /* - * Now do the master - */ - recid = mddb_makerecid(setno, 0); - while ((recid = mddb_getnextrec(recid, typ1, TRANS_REC)) > 0) { - if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) - continue; - - un32 = (mt_unit32_od_t *)mddb_getrecaddr(recid); - - /* - * Trans master record always is old format - */ - self_id = &(un32->c.un_self_id); - record_id = &(un32->c.un_record_id); - - /* - * Mark the record and update it - */ - *record_id = MAKERECID(setno, DBID(*record_id)); - *self_id = MD_MKMIN(setno, MD_MIN2UNIT(*self_id)); - if (!md_update_minor(setno, mddb_getsidenum - (setno), un32->un_m_key)) - goto out; - mddb_setrecprivate(recid, MD_PRV_GOTIT); - - gotsomething = 1; - } - -out: - return (gotsomething); -} - -static md_named_services_t trans_named_services[] = { - {(intptr_t (*)()) trans_rename_listkids, MDRNM_LIST_URKIDS }, - {(intptr_t (*)()) trans_rename_check, MDRNM_CHECK }, - {(intptr_t (*)()) trans_renexch_update_kids, MDRNM_UPDATE_KIDS }, - {(intptr_t (*)()) trans_rename_update_self, MDRNM_UPDATE_SELF }, - {(intptr_t (*)()) trans_exchange_self_update_from_down, - MDRNM_SELF_UPDATE_FROM_DOWN }, - {(intptr_t (*)()) trans_exchange_parent_update_to, - MDRNM_PARENT_UPDATE_TO }, - {NULL, 0 } -}; - -md_ops_t trans_md_ops = { - trans_open, /* open */ - trans_close, /* close */ - md_trans_strategy, /* strategy */ - NULL, /* print */ - NULL, /* dump */ - md_trans_read, /* read */ - md_trans_write, /* write */ - md_trans_ioctl, /* trans ioctl */ - trans_snarf, /* trans_snarf */ - trans_halt, /* halt */ - md_trans_aread, /* aread */ - md_trans_awrite, /* awrite */ - trans_imp_set, /* import set */ - trans_named_services -}; - -static void -init_init(void) -{ - _init_ldl(); - ASSERT(_init_debug()); - trans_parent_cache = kmem_cache_create("md_trans_parent", - sizeof (md_tps_t), 0, NULL, NULL, NULL, NULL, NULL, 0); - trans_child_cache = kmem_cache_create("md_trans_child", biosize(), 0, - trans_child_constructor, trans_child_destructor, - NULL, NULL, NULL, 0); -} - -static void -fini_uninit(void) -{ - ASSERT(_fini_debug()); - _fini_ldl(); - kmem_cache_destroy(trans_parent_cache); - kmem_cache_destroy(trans_child_cache); - trans_parent_cache = trans_child_cache = NULL; -} - -/* define the module linkage */ -MD_PLUGIN_MISC_MODULE("trans module", init_init(), fini_uninit()) diff --git a/usr/src/uts/common/io/lvm/trans/trans_ioctl.c b/usr/src/uts/common/io/lvm/trans/trans_ioctl.c deleted file mode 100644 index 6b0fd8e22cf9..000000000000 --- a/usr/src/uts/common/io/lvm/trans/trans_ioctl.c +++ /dev/null @@ -1,2756 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - * Copyright 2012 Milan Jurik. All rights reserved. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -extern int md_status; -extern unit_t md_nunits; -extern set_t md_nsets; -extern md_set_t md_set[]; -extern md_ops_t trans_md_ops; -extern md_krwlock_t md_unit_array_rw; -extern uint_t mt_debug; - -extern major_t md_major; - -static mt_unit_t * -trans_getun(minor_t mnum, md_error_t *mde, int flags, IOLOCK *lock) -{ - mt_unit_t *un; - mdi_unit_t *ui; - set_t setno = MD_MIN2SET(mnum); - - if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits)) { - (void) mdmderror(mde, MDE_INVAL_UNIT, mnum); - return (NULL); - } - - if (! (flags & STALE_OK)) { - if (md_get_setstatus(setno) & MD_SET_STALE) { - (void) mdmddberror(mde, MDE_DB_STALE, mnum, setno); - return (NULL); - } - } - - ui = MDI_UNIT(mnum); - if (flags & NO_OLD) { - if (ui != NULL) { - (void) mdmderror(mde, MDE_UNIT_ALREADY_SETUP, mnum); - return (NULL); - } - return ((mt_unit_t *)1); - } - - if (ui == NULL) { - (void) mdmderror(mde, MDE_UNIT_NOT_SETUP, mnum); - return (NULL); - } - - if (flags & ARRAY_WRITER) - md_array_writer(lock); - else if (flags & ARRAY_READER) - md_array_reader(lock); - - if (!(flags & NO_LOCK)) { - if (flags & WR_LOCK) - (void) md_ioctl_writerlock(lock, ui); - else /* RD_LOCK */ - (void) md_ioctl_readerlock(lock, ui); - } - un = (mt_unit_t *)MD_UNIT(mnum); - - if (un->c.un_type != MD_METATRANS) { - (void) mdmderror(mde, MDE_NOT_MT, mnum); - return (NULL); - } - - return (un); -} - -#ifdef DEBUG -/* - * DEBUG ROUTINES - * THESE ROUTINES ARE ONLY USED WHEN ASSERTS ARE ENABLED - */ - -extern int (*mdv_strategy_tstpnt)(buf_t *, int, void*); - -/* - * return the global stats struct - */ -static int -trans_get_transstats(void *d, int mode) -{ - md_i_get_t *migp = d; - - mdclrerror(&migp->mde); - - if (migp->size == 0) { - migp->size = sizeof (struct transstats); - return (0); - } - - if (migp->size < sizeof (struct transstats)) - return (EFAULT); - - if (ddi_copyout(&transstats, (caddr_t)(uintptr_t)migp->mdp, - sizeof (struct transstats), mode)) - return (EFAULT); - return (0); -} - -/* - * test ioctls - */ -/* - * TEST TRYGETBLK - */ -/*ARGSUSED1*/ -static int -trans_test_trygetblk(void *d, int mode, IOLOCK *lock) -{ - mt_unit_t *un; - int test; - dev_t dev; - struct buf *bp; - struct buf *trygetblk(); - - md_i_get_t *migp = d; - - mdclrerror(&migp->mde); - migp->size = 0; - - un = trans_getun(migp->id, &migp->mde, - RD_LOCK, lock); - if (un == NULL) - return (EINVAL); - - dev = un->un_m_dev; - - /* - * test 1 -- don't find nonexistant buf - */ - test = 1; - if (bp = trygetblk(dev, 0)) - goto errout; - - /* - * test 2 - don't find stale buf - */ - test = 2; - if ((bp = getblk(dev, 0, DEV_BSIZE)) == NULL) - goto errout; - bp->b_flags |= (B_STALE|B_DONE); - brelse(bp); - if (bp = trygetblk(dev, 0)) - goto errout; - - /* - * test 3 -- don't find busy buf - */ - test = 3; - if ((bp = getblk(dev, 0, DEV_BSIZE)) == NULL) - goto errout; - if (trygetblk(dev, 0)) - goto errout; - bp->b_flags |= B_STALE; - brelse(bp); - - /* - * test 4 -- don't find not-done buf - */ - test = 4; - if ((bp = getblk(dev, 0, DEV_BSIZE)) == NULL) - goto errout; - brelse(bp); - if (bp = trygetblk(dev, 0)) - goto errout; - - /* - * test 5 -- find an idle buf - */ - test = 5; - if ((bp = bread(dev, 0, DEV_BSIZE)) == NULL) - goto errout; - brelse(bp); - if ((bp = trygetblk(dev, 0)) == NULL) - goto errout; - bp->b_flags |= B_STALE; - brelse(bp); - bp = 0; - - test = 0; /* no test failed */ -errout: - if (bp) { - bp->b_flags |= B_STALE; - brelse(bp); - } - migp->size = test; - if (test) - return (EINVAL); - return (0); -} -/* - * TEST TRYGETPAGE - */ -static page_t * -trans_trypage(struct vnode *vp, uint_t off) -{ - page_t *pp; - - /* - * get a locked page - */ - if ((pp = page_lookup_nowait(vp, off, SE_EXCL)) == NULL) - return (NULL); - /* - * get the iolock - */ - if (!page_io_trylock(pp)) { - page_unlock(pp); - return (NULL); - } - return (pp); -} - -/*ARGSUSED1*/ -static int -trans_test_trypage(void *d, int mode, IOLOCK *lock) -{ - mt_unit_t *un; - int test; - dev_t dev; - struct page *pp; - struct vnode *devvp; - struct vnode *cvp; - extern struct vnode *common_specvp(struct vnode *); - extern void pvn_io_done(struct page *); - - md_i_get_t *migp = d; - - mdclrerror(&migp->mde); - migp->size = 0; - - un = trans_getun(migp->id, &migp->mde, - RD_LOCK, lock); - if (un == NULL) - return (EINVAL); - - dev = un->un_m_dev; - devvp = makespecvp(dev, VBLK); - cvp = common_specvp(devvp); - - /* - * get rid of the devices pages - */ - (void) VOP_PUTPAGE(cvp, (offset_t)0, (uint_t)0, B_INVAL, CRED(), NULL); - - /* - * test 1 -- don't find nonexistant page - */ - test = 1; - if (pp = trans_trypage(cvp, 0)) - goto errout; - - /* - * test 2 -- don't find busy page - */ - test = 2; - if ((pp = page_create(cvp, 0, 1, PG_WAIT)) == NULL) - goto errout; - if (trans_trypage(cvp, 0)) - goto errout; - pvn_io_done(pp); - pp = 0; - - /* - * test 3 - find an idle page - */ - test = 3; - if ((pp = page_create(cvp, 0, 1, PG_WAIT)) == NULL) - goto errout; - pvn_io_done(pp); - if ((pp = trans_trypage(cvp, 0)) == NULL) - goto errout; - pvn_io_done(pp); - pp = 0; - - test = 0; /* no test failed */ -errout: - if (pp) - pvn_io_done(pp); - /* - * get rid of the file's pages - */ - (void) VOP_PUTPAGE(cvp, (offset_t)0, (uint_t)0, B_INVAL, CRED(), NULL); - VN_RELE(devvp); - - migp->size = test; - if (test) - return (EINVAL); - return (0); -} -/* - * TEST TSD - */ -#define NKEYS (7) -#define NTSDTHREADS (3) -struct tothread { - int test; - int error; - int exits; - int step; - kmutex_t lock; - kcondvar_t cv; -}; -static uint_t keys[NKEYS]; -static struct tothread tta[NTSDTHREADS]; -static int allocatorvalue; -static int okdestructoralloc; - -static void -trans_test_stepwait(struct tothread *tp, int step) -{ - /* - * wait for other thread - */ - mutex_enter(&tp->lock); - while (tp->step < step) - cv_wait(&tp->cv, &tp->lock); - mutex_exit(&tp->lock); -} - -static void -trans_test_step(struct tothread *tp, int step) -{ - /* - * wakeup other threads - */ - mutex_enter(&tp->lock); - tp->step = step; - cv_broadcast(&tp->cv); - mutex_exit(&tp->lock); -} - -static void -trans_test_destructor(void *voidp) -{ - int exits; - struct tothread *tp = voidp; - - /* - * check that threads clean up *all* TSD at exit - */ - mutex_enter(&tp->lock); - exits = ++tp->exits; - mutex_exit(&tp->lock); - if (exits >= NKEYS) - trans_test_step(tp, 3); -} - -static void -trans_test_destructor_alloc(void *voidp) -{ - int *value = voidp; - - okdestructoralloc = 0; - if (value) { - if (*value == allocatorvalue) - okdestructoralloc = 1; - md_trans_free((caddr_t)value, sizeof (value)); - } -} - -static void * -trans_test_allocator(void) -{ - int *value; - - value = (int *)md_trans_zalloc(sizeof (value)); - *value = allocatorvalue; - return ((void *)value); -} - -/* - * thread used to test TSD destroy functionality - */ -static void -trans_test_thread(struct tothread *tp) -{ - int i; - callb_cpr_t cprinfo; - - /* - * Register cpr callback - */ - CALLB_CPR_INIT(&cprinfo, &tp->lock, callb_generic_cpr, - "trans_test_thread"); - - /* - * get some TSD - */ - for (i = NKEYS - 1; i >= 0; --i) - if (tsd_set(keys[i], tp)) { - tp->error = 500; - goto errout; - } - /* - * tell parent that we have TSD - */ - trans_test_step(tp, 1); - - /* - * wait for parent to destroy some of our TSD - */ - trans_test_stepwait(tp, 2); - - /* - * make sure that the appropriate TSD was destroyed - */ - if ((tsd_get(keys[0]) != NULL) || - (tsd_get(keys[NKEYS-1]) != NULL) || - (tsd_get(keys[NKEYS>>1]) != NULL)) { - tp->error = 510; - goto errout; - } - for (i = 0; i < NKEYS; ++i) - if (tsd_get(keys[i]) != tp) - if (i != 0 && i != NKEYS - 1 && i != NKEYS >> 1) { - tp->error = 520; - goto errout; - } - - /* - * set up cpr exit - */ - mutex_enter(&tp->lock); - CALLB_CPR_EXIT(&cprinfo); - thread_exit(); -errout: - /* - * error -- make sure the parent will wake up (error code in tp) - */ - trans_test_step(tp, 3); - - /* - * set up cpr exit - */ - mutex_enter(&tp->lock); - CALLB_CPR_EXIT(&cprinfo); - thread_exit(); -} - -static void -trans_test_threadcreate(struct tothread *tp) -{ - /* - * initialize the per thread struct and make a thread - */ - bzero((caddr_t)tp, sizeof (struct tothread)); - - mutex_init(&tp->lock, NULL, MUTEX_DEFAULT, NULL); - cv_init(&tp->cv, NULL, CV_DEFAULT, NULL); - - (void) thread_create(NULL, 0, trans_test_thread, tp, 0, &p0, - TS_RUN, minclsyspri); -} -/* - * driver for TSD tests -- *NOT REENTRANT* - */ -/*ARGSUSED1*/ -static int -trans_test_tsd(void *d, int mode) -{ - int test; - uint_t rekeys[NKEYS]; - int i; - uint_t key; - int error; - - md_i_get_t *migp = d; - - mdclrerror(&migp->mde); - migp->size = 0; - - /* - * destroy old keys, if any - */ - for (i = 0; i < NKEYS; ++i) - tsd_destroy(&keys[i]); - /* - * test 1 -- simple create and destroy keys tests - */ - test = 1; - error = 0; - for (i = 0; i < NKEYS; ++i) { - tsd_create(&keys[i], NULL); - - /* get with no set should return NULL */ - if (tsd_get(keys[i]) != NULL) { - error = 100; - goto errout; - } - - /* destroyed key should be 0 */ - key = keys[i]; - tsd_destroy(&keys[i]); - if (keys[i]) { - error = 110; - goto errout; - } - - /* destroy the key twice */ - keys[i] = key; - tsd_destroy(&keys[i]); - - /* destroyed key should be 0 */ - if (keys[i]) { - error = 120; - goto errout; - } - - /* getting a destroyed key should return NULL */ - if (tsd_get(keys[i]) != NULL) { - error = 130; - goto errout; - } - /* recreate the key */ - tsd_create(&keys[i], NULL); - - /* should be the same key as before */ - if (key != keys[i]) { - error = 140; - goto errout; - } - - /* initial value should be NULL */ - if (tsd_get(keys[i]) != NULL) { - error = 150; - goto errout; - } - - /* cleanup */ - tsd_destroy(&keys[i]); - } - - /* - * test 2 -- recreate keys - */ - test = 2; - error = 0; - for (i = 0; i < NKEYS; ++i) - tsd_create(&keys[i], NULL); - for (i = 0; i < NKEYS; ++i) { - /* make sure the keys were created */ - if (keys[i] == 0) { - error = 200; - goto errout; - } - - /* make sure that recreating key doesn't change it */ - rekeys[i] = keys[i]; - tsd_create(&rekeys[i], NULL); - if (rekeys[i] != keys[i]) { - error = 210; - goto errout; - } - } - for (i = 0; i < NKEYS; ++i) - tsd_destroy(&keys[i]); - - /* - * test 3 -- check processing for unset and destroyed keys - */ - test = 3; - error = 0; - - /* getting a 0 key returns NULL */ - if (tsd_get(0) != NULL) { - error = 300; - goto errout; - } - - /* setting a 0 key returns error */ - if (tsd_set(0, NULL) != EINVAL) { - error = 310; - goto errout; - } - tsd_create(&key, NULL); - - /* setting a created key returns no error */ - if (tsd_set(key, NULL) == EINVAL) { - error = 320; - goto errout; - } - tsd_destroy(&key); - - /* setting a destroyed key returns error */ - if (tsd_set(key, NULL) != EINVAL) { - error = 330; - goto errout; - } - - /* - * test 4 -- make sure that set and get work - */ - test = 4; - error = 0; - - for (i = 0; i < NKEYS; ++i) { - tsd_create(&keys[i], NULL); - - /* set a value */ - (void) tsd_set(keys[i], &key); - - /* get the value */ - if (tsd_get(keys[i]) != &key) { - error = 400; - goto errout; - } - - /* set the value to NULL */ - (void) tsd_set(keys[i], NULL); - - /* get the NULL */ - if (tsd_get(keys[i]) != NULL) { - error = 410; - goto errout; - } - } - /* cleanup */ - for (i = 0; i < NKEYS; ++i) - tsd_destroy(&keys[i]); - - /* - * test 5 -- destroying keys w/multiple threads - */ - test = 5; - error = 0; - - /* create the keys */ - for (i = 0; i < NKEYS; ++i) - tsd_create(&keys[i], trans_test_destructor); - - /* create some threads */ - for (i = 0; i < NTSDTHREADS; ++i) - trans_test_threadcreate(&tta[i]); - - /* wait for the threads to assign TSD */ - for (i = 0; i < NTSDTHREADS; ++i) - trans_test_stepwait(&tta[i], 1); - - /* destroy some of the keys */ - tsd_destroy(&keys[0]); - tsd_destroy(&keys[NKEYS - 1]); - tsd_destroy(&keys[NKEYS >> 1]); - tsd_destroy(&keys[NKEYS >> 1]); - - /* wakeup the threads -- they check that the destroy took */ - for (i = 0; i < NTSDTHREADS; ++i) - trans_test_step(&tta[i], 2); - - /* wait for the threads to exit (also checks for TSD cleanup) */ - for (i = 0; i < NTSDTHREADS; ++i) - trans_test_stepwait(&tta[i], 3); - - /* destroy the rest of the keys */ - for (i = 0; i < NKEYS; ++i) - tsd_destroy(&keys[i]); - - /* check for error */ - for (i = 0; i < NTSDTHREADS; ++i) { - if (!error) - error = tta[i].error; - mutex_destroy(&tta[i].lock); - cv_destroy(&tta[i].cv); - } - - /* - * test 6 -- test getcreate - */ - test = 6; - error = 0; - - /* make sure the keys are destroyed */ - for (i = 0; i < NKEYS; ++i) - tsd_destroy(&keys[i]); - - /* get w/create */ - for (i = 0; i < NKEYS; ++i) { - allocatorvalue = i; - if (*(int *)tsd_getcreate(&keys[i], trans_test_destructor_alloc, - trans_test_allocator) != allocatorvalue) { - error = 600; - goto errout; - } - } - for (i = 0; i < NKEYS; ++i) { - allocatorvalue = i; - if (*(int *)tsd_get(keys[i]) != allocatorvalue) { - error = 610; - goto errout; - } - } - /* make sure destructor gets called when we destroy the keys */ - for (i = 0; i < NKEYS; ++i) { - allocatorvalue = i; - okdestructoralloc = 0; - tsd_destroy(&keys[i]); - if (okdestructoralloc == 0) { - error = 620; - goto errout; - } - } - -errout: - /* make sure the keys are destroyed */ - for (i = 0; i < NKEYS; ++i) - tsd_destroy(&keys[i]); - - /* return test # and error code (if any) */ - migp->size = test; - return (error); -} - -/* - * Error Injection Structures, Data, and Functions: - * - * Error injection is used to test the Harpy error recovery system. The - * MD_IOC_INJECTERRORS ioctl is used to start or continue error injection on a - * unit, and MD_IOC_STOPERRORS turns it off. An mt_error structure is - * associated with every trans device for which we are injecting errors. When - * MD_IOC_INJECTERRORS is issued, mdv_strategy_tstpnt is set to point to - * trans_error_injector(), so that it gets called for every MDD I/O operation. - * - * The trans unit can be in one of three states: - * - * count down - Each I/O causes er_count_down to be decremented. - * When er_count_down reaches 0, an error is injected, - * the block number is remembered. Without makeing - * special provisions, the log area would receive a - * small percentage of the injected errors. Thus, - * trans_check_error() will be written, so that every - * other error is injected on the log. - * - * suspend - No errors are generated and the counters are not - * modified. This is so that fsck/mkfs can do their thing - * (we're not testing them) and so that the test script can - * set up another test. The transition back to the count - * down state occurs when MD_IOC_INJECTERRORS is invoked - * again. - */ - -typedef enum { - mte_count_down, - mte_suspend, - mte_watch_block -} mte_state; - -typedef struct mt_error { - struct mt_error *er_next; /* next error unit in list. */ - mte_state er_state; - mt_unit_t *er_unitp; /* unit to force errors on. */ - size_t er_count_down; /* i/o transactions until error. */ - size_t er_increment; /* increment for reset_count. */ - size_t er_reset_count; /* used to reset er_count_down */ - size_t er_total_errors; /* count generated errors. */ - /* Following fields describe error we are injecting. */ - dev_t er_bad_unit; /* Unit associated with block in */ - /* error. */ - off_t er_bad_block; /* Block in error. */ -} mt_error_t; - -#define ERROR_INCREMENT (1) -#define INITIAL_COUNT (1) - -static int default_increment = ERROR_INCREMENT; -static kmutex_t error_mutex; /* protects error_list */ -static mt_error_t error_list_head; -static int initial_count = INITIAL_COUNT; -static int (*tstpnt_save)(buf_t *, int, void*) = NULL; - -static mt_error_t * -find_by_mtunit(mt_unit_t *un, mt_error_t **pred_errp) -{ - mt_error_t *errp = (mt_error_t *)NULL; - - ASSERT(mutex_owned(&error_mutex) != 0); - *pred_errp = &error_list_head; - while ((errp = (*pred_errp)->er_next) != (mt_error_t *)NULL) { - if (errp->er_unitp == un) - break; - *pred_errp = errp; - } - return (errp); -} - -static mt_error_t * -find_by_dev(md_dev64_t dev) -{ - mt_error_t *errp = &error_list_head; - - ASSERT(mutex_owned(&error_mutex) != 0); - while ((errp = errp->er_next) != (mt_error_t *)NULL) { - if ((errp->er_unitp->un_m_dev == dev) || - (errp->er_unitp->un_l_dev == dev)) - break; - } - return (errp); -} - -static int -trans_check_error(buf_t *bp, mt_error_t *errp) -{ - int rv = 0; - md_dev64_t target = md_expldev(bp->b_edev); - - ASSERT(mutex_owned(&error_mutex) != 0); - switch (errp->er_state) { - case mte_count_down: - errp->er_count_down--; - if (errp->er_count_down == 0) { - /* - * Every other error that we inject should be on - * the log device. Errors will be injected on the - * log device when errp->er_total_errors is even - * and on the master device when it is odd. If - * this I/O is not for the appropriate device, we - * will set errp->er_count_down to 1, so that we - * can try again later. - */ - if ((((errp->er_total_errors % 2) == 0) && - (errp->er_unitp->un_l_dev == target)) || - (((errp->er_total_errors % 2) != 0) && - (errp->er_unitp->un_m_dev == target))) { - /* simulate an error */ - bp->b_flags |= B_ERROR; - bp->b_error = EIO; - /* remember the error. */ - errp->er_total_errors++; - errp->er_bad_unit = bp->b_edev; - errp->er_bad_block = bp->b_blkno; - /* reset counters. */ - errp->er_count_down = errp->er_reset_count; - errp->er_reset_count += errp->er_increment; - rv = 1; - } else { - /* Try again next time. */ - errp->er_count_down = 1; - } - } - break; - - case mte_suspend: - /* No errors while suspended. */ - break; - - case mte_watch_block: - if ((bp->b_edev == errp->er_bad_unit) && - (bp->b_blkno == errp->er_bad_block)) { - bp->b_flags |= B_ERROR; - bp->b_error = EIO; - rv = 1; - } - break; - } - return (rv); -} - -static int -trans_error_injector(buf_t *bp, int flag, void* private) -{ - mt_error_t *errp = (mt_error_t *)NULL; - int (*tstpnt)(buf_t *, int, void*) = NULL; - int rv = 0; - md_dev64_t target = md_expldev(bp->b_edev); - int trv = 0; - mt_unit_t *un; - - mutex_enter(&error_mutex); - errp = find_by_dev(target); - if (errp != (mt_error_t *)NULL) { - un = errp->er_unitp; - if (target == un->un_m_dev) { - /* Target is our master device. */ - rv = trans_check_error(bp, errp); - } - if (target == un->un_l_dev) { - /* - * Target is our log device. Unfortunately, the same - * device may also be used for the MDD database. - * Therefore, we need to make sure that the I/O is for - * the range of blocks designated as our log. - */ - if ((bp->b_blkno >= un->un_l_pwsblk) && - ((bp->b_blkno + btodb(bp->b_bcount)) <= - (un->un_l_sblk + un->un_l_tblks))) { - rv = trans_check_error(bp, errp); - } - } - } - tstpnt = tstpnt_save; - mutex_exit(&error_mutex); - - if (tstpnt != NULL) - trv = (*tstpnt)(bp, flag, private); - - /* - * If we are producing an error (rv != 0) we need to make sure that - * biodone gets called. If the tstpnt returned non-zero, - * we'll assume that it called biodone. - */ - if ((rv != 0) && (trv == 0)) { - md_biodone(bp); - } - rv = ((rv == 0) && (trv == 0)) ? 0 : 1; - return (rv); -} - -/* - * Prepare to inject errors on the master and log devices associated with the - * unit specified in migp. The first time that trans_inject_errors() is called - * for a unit, an mt_error_t structure is allocated and initialized for the - * unit. Subsequent calls for the unit will just insure that the unit is in the - * count down state. - * - * If an mt_error structure is allocated and it is the first one to be put in - * the list, mdv_strategy_tstpnt (which is referenced in md_call_strategy()) is - * set to trans_error_injector so that it will be called to see if an I/O - * request should be treated as an error. - */ - -/*ARGSUSED1*/ -static int -trans_inject_errors(void *d, int mode, IOLOCK *lock) -{ - mt_error_t *errp; - mt_error_t *do_not_care; - mt_unit_t *un; - int rv = 0; - - md_i_get_t *migp = d; - - mdclrerror(&migp->mde); - - un = trans_getun(migp->id, &migp->mde, - RD_LOCK, lock); - if (un == NULL) - return (EINVAL); - - /* - * If there is already a an error structure for the unit make sure that - * it is in count down mode. - */ - - mutex_enter(&error_mutex); - errp = find_by_mtunit(un, &do_not_care); - if (errp != (mt_error_t *)NULL) { - errp->er_state = mte_count_down; - } else { - - /* - * Initialize error structure. - */ - - errp = (mt_error_t *)md_trans_zalloc(sizeof (mt_error_t)); - errp->er_state = mte_count_down; - errp->er_unitp = un; - errp->er_count_down = initial_count; - errp->er_increment = default_increment; - errp->er_reset_count = initial_count; - errp->er_total_errors = 0; - errp->er_bad_unit = 0; - errp->er_bad_block = 0; - - /* Insert it into the list. */ - - errp->er_next = error_list_head.er_next; - error_list_head.er_next = errp; - - /* - * Set up md_call_strategy to call our error injector. - */ - - if (mdv_strategy_tstpnt != trans_error_injector) { - tstpnt_save = mdv_strategy_tstpnt; - mdv_strategy_tstpnt = trans_error_injector; - } - } - mutex_exit(&error_mutex); - return (rv); -} - -/*ARGSUSED1*/ -static int -trans_stop_errors(void *d, int mode, IOLOCK *lock) -{ - mt_error_t *errp = (mt_error_t *)NULL; - mt_error_t *pred_errp; - mt_unit_t *un; - int rv = 0; - - md_i_get_t *migp = d; - - mdclrerror(&migp->mde); - - un = trans_getun(migp->id, &migp->mde, - RD_LOCK, lock); - if (un == NULL) - return (EINVAL); - - mutex_enter(&error_mutex); - errp = find_by_mtunit(un, &pred_errp); - if (errp != (mt_error_t *)NULL) { - /* Remove from list. */ - pred_errp->er_next = errp->er_next; - if ((error_list_head.er_next == (mt_error_t *)NULL) && - (mdv_strategy_tstpnt == trans_error_injector)) { - mdv_strategy_tstpnt = tstpnt_save; - } - } else { - /* unit not set up for errors. */ - rv = ENXIO; - } - mutex_exit(&error_mutex); - - /* Free memory. */ - - if (errp != (mt_error_t *)NULL) { - md_trans_free((void *)errp, sizeof (*errp)); - } - return (rv); -} - -int -_init_ioctl() -{ - mutex_init(&error_mutex, NULL, MUTEX_DRIVER, (void *)NULL); - return (1); -} - -int -_fini_ioctl() -{ - mutex_destroy(&error_mutex); - return (1); -} - -/* - * END OF DEBUG ROUTINES - */ -#endif /* DEBUG */ -/* - * BEGIN RELEASE DEBUG - * The following routines remain in the released product for testability - */ - -/* - * ufs error injection remains in the released product - */ -/*ARGSUSED1*/ -static int -trans_ufserror(void *d, int mode, IOLOCK *lock) -{ - mt_unit_t *un; - - md_i_get_t *migp = d; - - mdclrerror(&migp->mde); - - un = trans_getun(migp->id, &migp->mde, - RD_LOCK, lock); - if (un == NULL || un->un_ut == NULL) - return (EINVAL); - - return (0); -} -/* - * shadow test remains in the released product - */ -static int -trans_set_shadow(void *d, int mode, IOLOCK *lock) -{ - dev32_t device; /* shadow device */ - mt_unit_t *un; - - md_i_get_t *migp = d; - - mdclrerror(&migp->mde); - - un = trans_getun(migp->id, &migp->mde, - WR_LOCK, lock); - if (un == NULL) - return (EINVAL); - - if ((un->un_debug & MT_SHADOW) == 0) - return (EINVAL); - - /* Get shadow device. User always passes down 32 bit devt */ - - if (ddi_copyin((caddr_t)(uintptr_t)migp->mdp, - &device, sizeof (device), mode)) { - return (EFAULT); - } - - /* Save shadow device designator. */ - un->un_s_dev = md_expldev((md_dev64_t)device); - return (0); -} - -/* - * END RELEASE DEBUG - */ - -static int -trans_get(void *d, int mode, IOLOCK *lock) -{ - mt_unit_t *un; - ml_unit_t *ul; - - md_i_get_t *migp = d; - - mdclrerror(&migp->mde); - - un = trans_getun(migp->id, &migp->mde, - RD_LOCK, lock); - if (un == NULL) - return (0); - - if (migp->size == 0) { - migp->size = un->c.un_size; - return (0); - } - - if (migp->size < un->c.un_size) - return (EFAULT); - -log: - ul = un->un_l_unit; - if (ul == NULL) - goto master; - - /* - * refresh log fields in case log was metattach'ed - */ - un->un_l_head = (daddr32_t)btodb(ul->un_head_lof); - un->un_l_sblk = un->un_l_head; - un->un_l_pwsblk = ul->un_pwsblk; - un->un_l_maxtransfer = (uint_t)btodb(ul->un_maxtransfer); - un->un_l_nblks = ul->un_nblks; - un->un_l_tblks = ul->un_tblks; - un->un_l_tail = (daddr32_t)btodb(ul->un_tail_lof); - un->un_l_resv = ul->un_resv; - un->un_l_maxresv = ul->un_maxresv; - un->un_l_error = ul->un_error; - un->un_l_timestamp = ul->un_timestamp; - - /* - * check for log dev dynconcat; can only pick up extra space when the - * tail physically follows the head in the circular log - */ - if (un->un_l_head <= un->un_l_tail) - if (ul->un_status & LDL_METADEVICE) { - struct mdc_unit *c = MD_UNIT(md_getminor(ul->un_dev)); - - if (c->un_total_blocks > un->un_l_tblks) { - un->un_l_tblks = c->un_total_blocks; - un->un_l_nblks = un->un_l_tblks - un->un_l_sblk; - if (un->un_l_nblks > btodb(LDL_MAXLOGSIZE)) - un->un_l_nblks = btodb(LDL_MAXLOGSIZE); - un->un_l_maxresv = (uint_t)(un->un_l_nblks * - LDL_USABLE_BSIZE); - } - } - -master: - - if (ddi_copyout(un, (void *)(uintptr_t)migp->mdp, un->c.un_size, mode)) - return (EFAULT); - return (0); -} - -static int -trans_replace(replace_params_t *params) -{ - minor_t mnum = params->mnum; - mt_unit_t *un; - mdi_unit_t *ui; - md_dev64_t cmp_dev; - md_dev64_t ldev; - md_dev64_t mdev; - - mdclrerror(¶ms->mde); - - ui = MDI_UNIT(mnum); - un = md_unit_writerlock(ui); - - if (MD_STATUS(un) & MD_UN_RESYNC_ACTIVE) { - return (mdmderror(¶ms->mde, MDE_RESYNC_ACTIVE, mnum)); - } - - cmp_dev = params->old_dev; - mdev = un->un_m_dev; - ldev = un->un_l_dev; - if (cmp_dev == mdev) { - un->un_m_key = params->new_key; - un->un_m_dev = params->new_dev; - } else if (cmp_dev == ldev) { - un->un_l_key = params->new_key; - un->un_l_dev = params->new_dev; - } - - trans_commit(un, 1); - md_unit_writerexit(ui); - return (0); -} - -/*ARGSUSED1*/ -static int -trans_grow(void *d, int mode, IOLOCK *lock) -{ - mt_unit_t *un; - - md_grow_params_t *mgp = d; - - mdclrerror(&mgp->mde); - - un = trans_getun(mgp->mnum, &mgp->mde, - RD_LOCK, lock); - if (un == NULL) - return (0); - - /* - * check for master dev dynconcat - */ - if (md_getmajor(un->un_m_dev) == md_major) { - struct mdc_unit *c; - - c = MD_UNIT(md_getminor(un->un_m_dev)); - if (c->un_total_blocks > MD_MAX_BLKS_FOR_SMALL_DEVS) { - un->c.un_total_blocks = MD_MAX_BLKS_FOR_SMALL_DEVS; - } else { - un->c.un_total_blocks = c->un_total_blocks; - } - md_nblocks_set(MD_SID(un), un->c.un_total_blocks); - } - - return (0); -} - -/*ARGSUSED1*/ -static int -trans_detach_ioctl(void *d, int mode, IOLOCK *lock) -{ - mt_unit_t *un; - int error; - - md_i_get_t *migp = d; - - mdclrerror(&migp->mde); - - /* acquire both md_unit_array_rw, and unit_reader lock */ - un = trans_getun(migp->id, &migp->mde, - READERS, lock); - if (un == NULL) - return (0); - - /* - * simply too much work to make debug modes w/out a log - */ - if (un->un_debug) - return (EACCES); - - /* - * detach the log - */ - error = trans_detach(un, migp->size); - - return (error); -} - -static int -trans_get_log(void *d, int mode, IOLOCK *lock) -{ - mt_unit_t *un; - ml_unit_t *ul; - - md_i_get_t *migp = d; - - mdclrerror(&migp->mde); - - un = trans_getun(migp->id, &migp->mde, RD_LOCK, lock); - - if (un == NULL) - return (0); - - ul = un->un_l_unit; - - if (migp->size == 0) { - migp->size = ML_UNIT_ONDSZ; - return (0); - } - - if (migp->size < ML_UNIT_ONDSZ) - return (EFAULT); - - if (ddi_copyout(ul, (void *)(uintptr_t)migp->mdp, ML_UNIT_ONDSZ, - mode)) - return (EFAULT); - return (0); -} - -static int -trans_getdevs(void *d, int mode, IOLOCK *lock) -{ - int ndev; - mt_unit_t *un; - md_dev64_t *udevs; - md_dev64_t unit_dev; - - md_getdevs_params_t *mgdp = d; - - mdclrerror(&mgdp->mde); - - un = trans_getun(mgdp->mnum, &mgdp->mde, RD_LOCK, lock); - if (un == NULL) - return (0); - - ndev = (un->un_flags & (TRANS_DETACHED | TRANS_ATTACHING)) ? 1 : 2; - - if (mgdp->cnt == 0) { - mgdp->cnt = ndev; - return (0); - } - - if (mgdp->cnt > 2) - mgdp->cnt = ndev; - - udevs = (md_dev64_t *)(uintptr_t)mgdp->devs; - unit_dev = un->un_m_dev; - - if (md_getmajor(unit_dev) != md_major) { - if ((unit_dev = md_xlate_mini_2_targ(unit_dev)) == NODEV64) - return (ENODEV); - } - - if (mgdp->cnt >= 1) - if (ddi_copyout(&unit_dev, (caddr_t)&udevs[0], - sizeof (*udevs), mode) != 0) - return (EFAULT); - - unit_dev = un->un_l_dev; - if (md_getmajor(unit_dev) != md_major) { - if ((unit_dev = md_xlate_mini_2_targ(unit_dev)) == NODEV64) - return (ENODEV); - } - - if (mgdp->cnt >= 2) - if (ddi_copyout(&unit_dev, (caddr_t)&udevs[1], - sizeof (*udevs), mode) != 0) - return (EFAULT); - - return (0); -} - -static int -trans_reset_ioctl(md_i_reset_t *mirp, IOLOCK *lock) -{ - minor_t mnum = mirp->mnum; - mt_unit_t *un; - int error; - - mdclrerror(&mirp->mde); - - un = trans_getun(mnum, &mirp->mde, NO_LOCK, lock); - if (un == NULL) - return (0); - - - /* This prevents new opens */ - rw_enter(&md_unit_array_rw.lock, RW_WRITER); - - if (MD_HAS_PARENT(MD_PARENT(un))) { - rw_exit(&md_unit_array_rw.lock); - return (mdmderror(&mirp->mde, MDE_IN_USE, mnum)); - } - - if (md_unit_isopen(MDI_UNIT(mnum))) { - rw_exit(&md_unit_array_rw.lock); - return (mdmderror(&mirp->mde, MDE_IS_OPEN, mnum)); - } - /* - * detach the log - */ - error = trans_detach(un, mirp->force); - - /* - * reset (aka remove; aka delete) the trans device - */ - if (error == 0) - error = trans_reset(un, mnum, 1, mirp->force); - - rw_exit(&md_unit_array_rw.lock); - return (error); -} - -static int -trans_get_geom(mt_unit_t *un, struct dk_geom *geomp) -{ - md_get_geom((md_unit_t *)un, geomp); - - return (0); -} - -static int -trans_get_vtoc(mt_unit_t *un, struct vtoc *vtocp) -{ - md_get_vtoc((md_unit_t *)un, vtocp); - - return (0); -} - -static int -trans_get_extvtoc(mt_unit_t *un, struct extvtoc *vtocp) -{ - md_get_extvtoc((md_unit_t *)un, vtocp); - - return (0); -} - -static int -trans_islog(mt_unit_t *un) -{ - if (un->un_l_unit == NULL) - return (ENXIO); - return (0); -} - -static int -trans_set_vtoc( - mt_unit_t *un, - struct vtoc *vtocp -) -{ - return (md_set_vtoc((md_unit_t *)un, vtocp)); -} - -static int -trans_set_extvtoc(mt_unit_t *un, struct extvtoc *vtocp) -{ - return (md_set_extvtoc((md_unit_t *)un, vtocp)); -} - -static int -trans_get_cgapart( - mt_unit_t *un, - struct dk_map *dkmapp -) -{ - md_get_cgapart((md_unit_t *)un, dkmapp); - return (0); -} - -static int -trans_admin_ioctl(int cmd, void *data, int mode, IOLOCK *lockp) -{ - size_t sz = 0; - void *d = NULL; - int err = 0; - - /* We can only handle 32-bit clients for internal commands */ - if ((mode & DATAMODEL_MASK) != DATAMODEL_ILP32) { - return (EINVAL); - } - - switch (cmd) { - - case MD_IOCGET: - { - if (! (mode & FREAD)) - return (EACCES); - - sz = sizeof (md_i_get_t); - - if ((d = md_trans_zalloc(sz)) == NULL) - return (ENOMEM); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = trans_get(d, mode, lockp); - break; - } - - case MD_IOCGET_LOG: - { - if (! (mode & FREAD)) - return (EACCES); - - sz = sizeof (md_i_get_t); - - if ((d = md_trans_zalloc(sz)) == NULL) - return (ENOMEM); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = trans_get_log(d, mode, lockp); - break; - } - - case MD_IOCRESET: - { - md_i_reset_t *p; - - if (! (mode & FWRITE)) - return (EACCES); - - if ((d = p = md_trans_zalloc((sz = sizeof (*p)))) == NULL) - return (ENOMEM); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = trans_reset_ioctl(p, lockp); - break; - } - - case MD_IOCGROW: - { - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (md_grow_params_t); - - if ((d = md_trans_zalloc(sz)) == NULL) - return (ENOMEM); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = trans_grow(d, mode, lockp); - break; - } - - case MD_IOC_TRANS_DETACH: - { - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (md_i_get_t); - - if ((d = md_trans_zalloc(sz)) == NULL) - return (ENOMEM); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = trans_detach_ioctl(d, mode, lockp); - break; - } - - case MD_IOCREPLACE: - { - replace_params_t *p; - - if (! (mode & FWRITE)) - return (EACCES); - - if ((d = p = kmem_alloc((sz = sizeof (*p)), KM_SLEEP)) == NULL) - return (ENOMEM); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = trans_replace(p); - break; - } - - - case MD_IOCGET_DEVS: - { - if (! (mode & FREAD)) - return (EACCES); - - sz = sizeof (md_getdevs_params_t); - - if ((d = md_trans_zalloc(sz)) == NULL) - return (ENOMEM); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = trans_getdevs(d, mode, lockp); - break; - } - -/* - * debug ioctls - */ -#ifdef DEBUG - - - case MD_IOCGET_TRANSSTATS: - { - if (! (mode & FREAD)) - return (EACCES); - - sz = sizeof (md_i_get_t); - - if ((d = md_trans_zalloc(sz)) == NULL) - return (ENOMEM); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = trans_get_transstats(d, mode); - break; - } - - case MD_IOC_DEBUG: - { - md_i_get_t *mdigp; - - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (md_i_get_t); - - if ((d = md_trans_zalloc(sz)) == NULL) - return (ENOMEM); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - mdigp = d; - - mdclrerror(&mdigp->mde); - mt_debug = mdigp->size; - break; - } - - case MD_IOC_TSD: - { - if (! (mode & FWRITE)) - return (EACCES); - - - sz = sizeof (md_i_get_t); - - if ((d = md_trans_zalloc(sz)) == NULL) - return (ENOMEM); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = trans_test_tsd(d, mode); - break; - } - - case MD_IOC_TRYGETBLK: - { - if (! (mode & FWRITE)) - return (EACCES); - - - sz = sizeof (md_i_get_t); - - if ((d = md_trans_zalloc(sz)) == NULL) - return (ENOMEM); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = trans_test_trygetblk(d, mode, lockp); - break; - } - - case MD_IOC_TRYPAGE: - { - if (! (mode & FWRITE)) - return (EACCES); - - - sz = sizeof (md_i_get_t); - - if ((d = md_trans_zalloc(sz)) == NULL) - return (ENOMEM); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = trans_test_trypage(d, mode, lockp); - break; - } - - - case MD_IOC_INJECTERRORS: - { - if (! (mode & FWRITE)) - return (EACCES); - - - sz = sizeof (md_i_get_t); - - if ((d = md_trans_zalloc(sz)) == NULL) - return (ENOMEM); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = trans_inject_errors(d, mode, lockp); - break; - } - - case MD_IOC_STOPERRORS: - { - if (! (mode & FWRITE)) - return (EACCES); - - - sz = sizeof (md_i_get_t); - - if ((d = md_trans_zalloc(sz)) == NULL) - return (ENOMEM); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = trans_stop_errors(d, mode, lockp); - break; - } - - case MD_IOC_ISDEBUG: - break; - -#else /* ! DEBUG */ - - case MD_IOC_ISDEBUG: - case MD_IOCGET_TRANSSTATS: - case MD_IOC_STOPERRORS: - case MD_IOC_TSD: - case MD_IOC_TRYGETBLK: - case MD_IOC_TRYPAGE: - break; - - /* - * error injection behaves like MD_IOC_UFSERROR in released product - */ - case MD_IOC_INJECTERRORS: - { - if (! (mode & FWRITE)) - return (EACCES); - - - sz = sizeof (md_i_get_t); - - if ((d = md_trans_zalloc(sz)) == NULL) - return (ENOMEM); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = trans_ufserror(d, mode, lockp); - break; - } - - /* - * only the shadow test is allowed in the released product - */ - case MD_IOC_DEBUG: - { - md_i_get_t *mdigp; - - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (md_i_get_t); - - if ((d = md_trans_zalloc(sz)) == NULL) - return (ENOMEM); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - mdigp = d; - - mdclrerror(&mdigp->mde); - mt_debug = mdigp->size & MT_SHADOW; - break; - } - -#endif /* ! DEBUG */ - -/* - * BEGIN RELEASE DEBUG - * The following routines remain in the released product for testability - */ - - case MD_IOC_UFSERROR: - { - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (md_i_get_t); - - if ((d = md_trans_zalloc(sz)) == NULL) - return (ENOMEM); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = trans_ufserror(d, mode, lockp); - break; - } - - case MD_IOC_SETSHADOW: - { - if (! (mode & FWRITE)) - return (EACCES); - - sz = sizeof (md_i_get_t); - - if ((d = md_trans_zalloc(sz)) == NULL) - return (ENOMEM); - - if (ddi_copyin(data, d, sz, mode)) { - err = EFAULT; - break; - } - - err = trans_set_shadow(d, mode, lockp); - break; - } - -/* - * END RELEASE DEBUG - */ - - - default: - return (ENOTTY); - } - - /* - * copyout and free any args - */ - if (sz != 0) { - if (err == 0) { - if (ddi_copyout(d, data, sz, mode) != 0) { - err = EFAULT; - } - } - md_trans_free(d, sz); - } - return (err); -} - -int -md_trans_ioctl(dev_t dev, int cmd, void *data, int mode, IOLOCK *lockp) -{ - minor_t mnum = getminor(dev); - mt_unit_t *un; - md_error_t mde = mdnullerror; - int err = 0; - - /* handle admin ioctls */ - if (mnum == MD_ADM_MINOR) - return (trans_admin_ioctl(cmd, data, mode, lockp)); - - /* check unit */ - if ((MD_MIN2SET(mnum) >= md_nsets) || - (MD_MIN2UNIT(mnum) >= md_nunits) || - ((un = trans_getun(mnum, &mde, RD_LOCK, lockp)) == NULL)) - return (ENXIO); - - /* dispatch ioctl */ - switch (cmd) { - - case DKIOCINFO: - { - struct dk_cinfo *p; - - if (! (mode & FREAD)) - return (EACCES); - - if ((p = md_trans_zalloc(sizeof (*p))) == NULL) - return (ENOMEM); - - get_info(p, mnum); - if (ddi_copyout((caddr_t)p, data, sizeof (*p), mode) != 0) - err = EFAULT; - - md_trans_free(p, sizeof (*p)); - return (err); - } - - case DKIOCGGEOM: - { - struct dk_geom *p; - - if (! (mode & FREAD)) - return (EACCES); - - if ((p = md_trans_zalloc(sizeof (*p))) == NULL) - return (ENOMEM); - - if ((err = trans_get_geom(un, p)) == 0) { - if (ddi_copyout((caddr_t)p, data, sizeof (*p), - mode) != 0) - err = EFAULT; - } - - md_trans_free(p, sizeof (*p)); - return (err); - } - - case DKIOCGVTOC: - { - struct vtoc *vtoc; - - if (! (mode & FREAD)) - return (EACCES); - - vtoc = kmem_zalloc(sizeof (*vtoc), KM_SLEEP); - if ((err = trans_get_vtoc(un, vtoc)) != 0) { - kmem_free(vtoc, sizeof (*vtoc)); - return (err); - } - - if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) { - if (ddi_copyout(vtoc, data, sizeof (*vtoc), mode)) - err = EFAULT; - } -#ifdef _SYSCALL32 - else { - struct vtoc32 *vtoc32; - - vtoc32 = kmem_zalloc(sizeof (*vtoc32), KM_SLEEP); - - vtoctovtoc32((*vtoc), (*vtoc32)); - if (ddi_copyout(vtoc32, data, sizeof (*vtoc32), mode)) - err = EFAULT; - kmem_free(vtoc32, sizeof (*vtoc32)); - } -#endif /* _SYSCALL32 */ - - kmem_free(vtoc, sizeof (*vtoc)); - return (err); - } - - case DKIOCSVTOC: - { - struct vtoc *vtoc; - - if (! (mode & FWRITE)) - return (EACCES); - - vtoc = kmem_zalloc(sizeof (*vtoc), KM_SLEEP); - if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) { - if (ddi_copyin(data, vtoc, sizeof (*vtoc), mode)) { - err = EFAULT; - } - } -#ifdef _SYSCALL32 - else { - struct vtoc32 *vtoc32; - - vtoc32 = kmem_zalloc(sizeof (*vtoc32), KM_SLEEP); - - if (ddi_copyin(data, vtoc32, sizeof (*vtoc32), mode)) { - err = EFAULT; - } else { - vtoc32tovtoc((*vtoc32), (*vtoc)); - } - kmem_free(vtoc32, sizeof (*vtoc32)); - } -#endif /* _SYSCALL32 */ - - if (err == 0) - err = trans_set_vtoc(un, vtoc); - - kmem_free(vtoc, sizeof (*vtoc)); - return (err); - } - - - case DKIOCGEXTVTOC: - { - struct extvtoc *extvtoc; - - if (! (mode & FREAD)) - return (EACCES); - - extvtoc = kmem_zalloc(sizeof (*extvtoc), KM_SLEEP); - if ((err = trans_get_extvtoc(un, extvtoc)) != 0) { - return (err); - } - - if (ddi_copyout(extvtoc, data, sizeof (*extvtoc), mode)) - err = EFAULT; - - kmem_free(extvtoc, sizeof (*extvtoc)); - return (err); - } - - case DKIOCSEXTVTOC: - { - struct extvtoc *extvtoc; - - if (! (mode & FWRITE)) - return (EACCES); - - extvtoc = kmem_zalloc(sizeof (*extvtoc), KM_SLEEP); - if (ddi_copyin(data, extvtoc, sizeof (*extvtoc), mode)) { - err = EFAULT; - } - - if (err == 0) - err = trans_set_extvtoc(un, extvtoc); - - kmem_free(extvtoc, sizeof (*extvtoc)); - return (err); - } - - case DKIOCGAPART: - { - struct dk_map dmp; - - if ((err = trans_get_cgapart(un, &dmp)) != 0) { - return (err); - } - - if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) { - if (ddi_copyout((caddr_t)&dmp, data, sizeof (dmp), - mode) != 0) - err = EFAULT; - } -#ifdef _SYSCALL32 - else { - struct dk_map32 dmp32; - - dmp32.dkl_cylno = dmp.dkl_cylno; - dmp32.dkl_nblk = dmp.dkl_nblk; - - if (ddi_copyout((caddr_t)&dmp32, data, sizeof (dmp32), - mode) != 0) - err = EFAULT; - } -#endif /* _SYSCALL32 */ - - return (err); - } - - /* - * _FIOISLOG, _FIOISLOGOK, _FIOLOGRESET are used by fsck/mkfs - * after opening the device. fsck/mkfs use these ioctls for - * error recovery. - */ - case _FIOISLOG: - return (trans_islog(un)); - - default: - return (ENOTTY); - } -} - -/* - * rename named service entry points and support functions - */ - -/* rename/exchange role swap functions */ - -/* - * MDRNM_UPDATE_SELF - * This role swap function is identical for all unit types, - * so keep it here. It's also the best example because it - * touches all the modified portions of the relevant - * in-common structures. - */ -void -trans_rename_update_self( - md_rendelta_t *delta, - md_rentxn_t *rtxnp) -{ - minor_t from_min, to_min; - sv_dev_t sv; - mt_unit_t *un; - - ASSERT(rtxnp); - ASSERT(rtxnp->op == MDRNOP_RENAME); - ASSERT(delta); - ASSERT(delta->unp); - ASSERT(delta->uip); - ASSERT(rtxnp->rec_idx >= 0); - ASSERT(rtxnp->recids); - ASSERT(delta->old_role == MDRR_SELF); - ASSERT(delta->new_role == MDRR_SELF); - - from_min = rtxnp->from.mnum; - to_min = rtxnp->to.mnum; - un = (mt_unit_t *)delta->unp; - - /* - * self id changes in our own unit struct - * both mechanisms for identifying the trans must be reset. - */ - - MD_SID(delta->unp) = to_min; - un->un_dev = makedevice(md_major, to_min); - - /* - * clear old array pointers to unit in-core and unit - */ - - MDI_VOIDUNIT(from_min) = NULL; - MD_VOIDUNIT(from_min) = NULL; - - /* - * and point the new slots at the unit in-core and unit structs - */ - - MDI_VOIDUNIT(to_min) = delta->uip; - MD_VOIDUNIT(to_min) = delta->unp; - - /* - * recreate kstats - */ - md_kstat_destroy_ui(delta->uip); - md_kstat_init_ui(to_min, delta->uip); - - /* - * the unit in-core reference to the get next link's id changes - */ - - delta->uip->ui_link.ln_id = to_min; - - /* - * name space addition of new key was done from user-level - * remove the old name's key here - */ - - sv.setno = MD_MIN2SET(from_min); - sv.key = rtxnp->from.key; - - md_rem_names(&sv, 1); - - - /* - * and store the record id (from the unit struct) into recids - * for later commitment by md_rename() - */ - - md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp); -} - -/* - * MDRNM_UPDATE_KIDS - * rename/exchange of our child or grandchild - */ -void -trans_renexch_update_kids( - md_rendelta_t *delta, - md_rentxn_t *rtxnp) -{ - mt_unit_t *un; - minor_t from_min, to_min, log_min, master_min; - - ASSERT(delta); - ASSERT(rtxnp); - ASSERT((rtxnp->op == MDRNOP_RENAME) || (rtxnp->op == MDRNOP_EXCHANGE)); - ASSERT(delta->unp); - ASSERT(rtxnp->recids); - ASSERT(rtxnp->rec_idx >= 0); - ASSERT(delta->old_role == MDRR_PARENT); - ASSERT(delta->new_role == MDRR_PARENT); - - un = (mt_unit_t *)delta->unp; - from_min = rtxnp->from.mnum; - to_min = rtxnp->to.mnum; - log_min = md_getminor(un->un_l_dev); - master_min = md_getminor(un->un_m_dev); - - /* - * since our role isn't changing (parent->parent) - * one of our children must be changing; which one is it? - * find the child being modified, and update - * our notion of it - */ - - /* both devices must be metadevices in order to be updated */ - ASSERT(md_getmajor(un->un_m_dev) == md_major); - ASSERT(!(un->un_l_unit && (md_getmajor(un->un_l_dev) != md_major))); - - if ((md_getmajor(un->un_m_dev) == md_major) && - (master_min == from_min)) { - - ASSERT(!(un->un_l_unit && (log_min == from_min))); - - un->un_m_dev = makedevice(md_major, to_min); - un->un_m_key = rtxnp->to.key; - - } else if ((md_getmajor(un->un_m_dev) == md_major) && - un->un_l_unit && (log_min == from_min)) { - - ASSERT(master_min != from_min); - - un->un_l_dev = makedevice(md_major, to_min); - un->un_l_key = rtxnp->to.key; - - } else { - ASSERT(FALSE); - panic("trans_renexch_update_kids: not a metadevice"); - /*NOTREACHED*/ - } - - md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp); -} - -/* - * MDRNM_SELF_UPDATE_FROM (exchange down) [self->child] - */ -void -trans_exchange_self_update_from_down( - md_rendelta_t *delta, - md_rentxn_t *rtxnp) -{ - mt_unit_t *un; - minor_t from_min, to_min, master_min, log_min; - sv_dev_t sv; - - ASSERT(delta); - ASSERT(delta->unp); - ASSERT(delta->uip); - ASSERT(rtxnp); - ASSERT(MDRNOP_EXCHANGE == rtxnp->op); - ASSERT(rtxnp->from.uip); - ASSERT(rtxnp->rec_idx >= 0); - ASSERT(rtxnp->recids); - ASSERT(delta->old_role == MDRR_SELF); - ASSERT(delta->new_role == MDRR_CHILD); - ASSERT(md_getminor(delta->dev) == rtxnp->from.mnum); - - un = (mt_unit_t *)delta->unp; - - /* - * if we're exchanging a trans, it had better be a metadevice - */ - ASSERT(md_getmajor(un->un_m_dev) == md_major); - - to_min = rtxnp->to.mnum; - from_min = rtxnp->from.mnum; - master_min = md_getminor(un->un_m_dev); - log_min = md_getminor(un->un_l_dev); - - /* - * both mechanisms for identifying a trans must be updated - */ - - MD_SID(delta->unp) = to_min; - un->un_dev = makedevice(md_major, to_min); - - /* - * parent identifier need not change - */ - - /* - * point the set array pointers at the "new" unit and unit in-cores - * Note: the other half of this transfer is done in the "update to" - * rename/exchange named service. - */ - - MDI_VOIDUNIT(to_min) = delta->uip; - MD_VOIDUNIT(to_min) = delta->unp; - - /* - * transfer kstats - */ - - delta->uip->ui_kstat = rtxnp->to.kstatp; - - /* - * the unit in-core reference to the get next link's id changes - */ - - delta->uip->ui_link.ln_id = to_min; - - /* - * which one of our children is changing? - * - * Note that the check routines forbid changing the log (for now) - * because there's no lockfs-like trans-ufs "freeze and remount" - * or "freeze and bobbit the log." - */ - - /* both devices must be metadevices in order to be updated */ - ASSERT(md_getmajor(un->un_m_dev) == md_major); - ASSERT(!(un->un_l_unit && (md_getmajor(un->un_l_dev) != md_major))); - - if ((md_getmajor(un->un_m_dev) == md_major) && - (master_min == to_min)) { - - /* master and log can't both be changed */ - ASSERT(!(un->un_l_unit && (log_min == to_min))); - - un->un_m_dev = makedevice(md_major, from_min); - sv.key = un->un_m_key; - un->un_m_key = rtxnp->from.key; - - } else if ((md_getmajor(un->un_m_dev) == md_major) && - un->un_l_unit && (log_min == to_min)) { - - /* master and log can't both be changed */ - ASSERT(!(master_min == to_min)); - - un->un_l_dev = makedevice(md_major, from_min); - sv.key = un->un_l_key; - un->un_l_key = rtxnp->from.key; - - } else { - ASSERT(FALSE); - panic("trans_exchange_self_update_from_down: not a metadevice"); - /*NOTREACHED*/ - } - - /* - * the new master must exist in the name space - */ - ASSERT(rtxnp->from.key != MD_KEYWILD); - ASSERT(rtxnp->from.key != MD_KEYBAD); - - /* - * delete the key for the changed child from the namespace - */ - - sv.setno = MD_MIN2SET(from_min); - md_rem_names(&sv, 1); - - /* - * and store the record id (from the unit struct) into recids - */ - - md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp); -} - -/* - * MDRNM_PARENT_UPDATE_TO (exchange down) [parent->self] - */ -void -trans_exchange_parent_update_to( - md_rendelta_t *delta, - md_rentxn_t *rtxnp) -{ - mt_unit_t *un; - minor_t from_min, to_min, master_min, log_min; - sv_dev_t sv; - - ASSERT(delta); - ASSERT(delta->unp); - ASSERT(delta->uip); - ASSERT(rtxnp); - ASSERT(MDRNOP_EXCHANGE == rtxnp->op); - ASSERT(rtxnp->from.uip); - ASSERT(rtxnp->rec_idx >= 0); - ASSERT(rtxnp->recids); - ASSERT(delta->old_role == MDRR_PARENT); - ASSERT(delta->new_role == MDRR_SELF); - ASSERT(md_getminor(delta->dev) == rtxnp->to.mnum); - - un = (mt_unit_t *)delta->unp; - - ASSERT(md_getmajor(un->un_m_dev) == md_major); - - to_min = rtxnp->to.mnum; - from_min = rtxnp->from.mnum; - master_min = md_getminor(un->un_m_dev); - log_min = md_getminor(un->un_l_dev); - - /* - * both mechanisms for identifying a trans must be updated - */ - - MD_SID(delta->unp) = from_min; - un->un_dev = makedevice(md_major, from_min); - - /* - * parent identifier need not change - */ - - /* - * point the set array pointers at the "new" unit and unit in-cores - * Note: the other half of this transfer is done in the "update to" - * rename/exchange named service. - */ - - MDI_VOIDUNIT(from_min) = delta->uip; - MD_VOIDUNIT(from_min) = delta->unp; - - /* - * transfer kstats - */ - - delta->uip->ui_kstat = rtxnp->from.kstatp; - - /* - * the unit in-core reference to the get next link's id changes - */ - - delta->uip->ui_link.ln_id = from_min; - - /* - * which one of our children is changing? - */ - - /* both devices must be metadevices in order to be updated */ - ASSERT(md_getmajor(un->un_m_dev) == md_major); - ASSERT(!(un->un_l_unit && (md_getmajor(un->un_l_dev) != md_major))); - - if ((md_getmajor(un->un_m_dev) == md_major) && - (master_min == from_min)) { - - /* can't be changing log and master */ - ASSERT(!(un->un_l_unit && (log_min == to_min))); - - un->un_m_dev = makedevice(md_major, to_min); - sv.key = un->un_m_key; - un->un_m_key = rtxnp->to.key; - - } else if (un->un_l_unit && - ((md_getmajor(un->un_l_dev) == md_major) && log_min == to_min)) { - - /* can't be changing log and master */ - ASSERT(master_min != from_min); - - un->un_l_dev = makedevice(md_major, to_min); - sv.key = un->un_l_key; - un->un_l_key = rtxnp->to.key; - - } else { - ASSERT(FALSE); - panic("trans_exchange_parent_update_to: not a metadevice"); - /*NOTREACHED*/ - } - - /* - * delete the key for the changed child from the namespace - */ - - sv.setno = MD_MIN2SET(from_min); - md_rem_names(&sv, 1); - - /* - * and store the record id (from the unit struct) into recids - */ - - md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp); -} - -/* - * MDRNM_LIST_URKIDS: named svc entry point - * all all delta entries appropriate for our children onto the - * deltalist pointd to by dlpp - */ -int -trans_rename_listkids( - md_rendelta_t **dlpp, - md_rentxn_t *rtxnp) -{ - minor_t from_min, to_min, master_min, log_min; - mt_unit_t *from_un; - md_rendelta_t *new, *p; - int n_children; - - ASSERT(rtxnp); - ASSERT(dlpp); - ASSERT((rtxnp->op == MDRNOP_EXCHANGE) || (rtxnp->op == MDRNOP_RENAME)); - - from_min = rtxnp->from.mnum; - to_min = rtxnp->to.mnum; - n_children = 0; - - if (!MDI_UNIT(from_min) || !(from_un = MD_UNIT(from_min))) { - (void) mdmderror(&rtxnp->mde, MDE_UNIT_NOT_SETUP, from_min); - return (-1); - } - - for (p = *dlpp; p && p->next != NULL; p = p->next) { - /* NULL */ - } - - if (md_getmajor(from_un->un_m_dev) == md_major) { - - master_min = md_getminor(from_un->un_m_dev); - - p = new = md_build_rendelta(MDRR_CHILD, - to_min == master_min? MDRR_SELF: MDRR_CHILD, - from_un->un_m_dev, p, MD_UNIT(master_min), - MDI_UNIT(master_min), &rtxnp->mde); - - if (!new) { - if (mdisok(&rtxnp->mde)) { - (void) mdsyserror(&rtxnp->mde, ENOMEM); - } - return (-1); - } - ++n_children; - } - - if (from_un->un_l_unit && - (md_getmajor(from_un->un_l_dev) == md_major)) { - - log_min = md_getminor(from_un->un_l_dev); - - new = md_build_rendelta(MDRR_CHILD, - to_min == log_min? MDRR_SELF: MDRR_CHILD, - from_un->un_l_dev, p, MD_UNIT(log_min), - MDI_UNIT(log_min), &rtxnp->mde); - if (!new) { - if (mdisok(&rtxnp->mde)) { - (void) mdsyserror(&rtxnp->mde, ENOMEM); - } - return (-1); - } - ++n_children; - } - - return (n_children); -} - -/* - * support routine for MDRNM_CHECK - */ -static int -trans_may_renexch_self( - mt_unit_t *un, - mdi_unit_t *ui, - md_rentxn_t *rtxnp) -{ - minor_t from_min; - minor_t to_min; - - ASSERT(rtxnp); - ASSERT((rtxnp->op == MDRNOP_RENAME) || (rtxnp->op == MDRNOP_EXCHANGE)); - - from_min = rtxnp->from.mnum; - to_min = rtxnp->to.mnum; - - if (!un || !ui) { - (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR, - from_min); - return (EINVAL); - } - - ASSERT(MD_CAPAB(un) & MD_CAN_META_CHILD); - - if (!(MD_CAPAB(un) & MD_CAN_META_CHILD)) { - (void) mdmderror(&rtxnp->mde, MDE_RENAME_SOURCE_BAD, from_min); - return (EINVAL); - } - - if (MD_PARENT(un) == MD_MULTI_PARENT) { - (void) mdmderror(&rtxnp->mde, MDE_RENAME_SOURCE_BAD, from_min); - return (EINVAL); - } - - switch (rtxnp->op) { - case MDRNOP_EXCHANGE: - /* - * may only swap with our child (master) if it is a metadevice - */ - if (md_getmajor(un->un_m_dev) != md_major) { - (void) mdmderror(&rtxnp->mde, MDE_RENAME_TARGET_BAD, - to_min); - return (EINVAL); - } - - if (un->un_l_unit && - (md_getmajor(un->un_l_dev) != md_major)) { - - (void) mdmderror(&rtxnp->mde, MDE_RENAME_TARGET_BAD, - to_min); - return (EINVAL); - } - - if (md_getminor(un->un_m_dev) != to_min) { - (void) mdmderror(&rtxnp->mde, MDE_RENAME_TARGET_BAD, - to_min); - return (EINVAL); - } - - break; - - case MDRNOP_RENAME: - break; - - default: - (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR, - from_min); - return (EINVAL); - } - - return (0); /* ok */ -} - -/* - * Named service entry point: MDRNM_CHECK - */ -intptr_t -trans_rename_check( - md_rendelta_t *delta, - md_rentxn_t *rtxnp) -{ - int err = 0; - mt_unit_t *un; - - ASSERT(delta); - ASSERT(rtxnp); - ASSERT(delta->unp); - ASSERT(delta->uip); - ASSERT((rtxnp->op == MDRNOP_RENAME) || (rtxnp->op == MDRNOP_EXCHANGE)); - - if (!delta || !rtxnp || !delta->unp || !delta->uip) { - (void) mdsyserror(&rtxnp->mde, EINVAL); - return (EINVAL); - } - - un = (mt_unit_t *)delta->unp; - - if (rtxnp->revision == MD_RENAME_VERSION_OFFLINE) { - /* - * trans' may not be open, if it is being modified in the exchange - * or rename; trans-UFS hasn't been verified to handle the change - * out from underneath it. - */ - if ((md_unit_isopen(delta->uip)) && - ((md_getminor(delta->dev) == rtxnp->from.mnum) || - (md_getminor(delta->dev) == rtxnp->to.mnum))) { - (void) mdmderror(&rtxnp->mde, - MDE_RENAME_BUSY, rtxnp->from.mnum); - return (EBUSY); - } - } - - /* - * can't rename or exchange with a log attached - */ - - if (un->un_l_unit) { - (void) mdmderror(&rtxnp->mde, - MDE_RENAME_BUSY, rtxnp->from.mnum); - return (EBUSY); - } - - switch (delta->old_role) { - case MDRR_SELF: - /* - * self does additional checks - */ - err = trans_may_renexch_self((mt_unit_t *)delta->unp, - delta->uip, rtxnp); - if (err != 0) { - goto out; - } - /* FALLTHROUGH */ - - case MDRR_PARENT: - /* - * top_is_trans is only used to check for online - * rename/exchange when MD_RENAME_VERSION == OFFLINE - * since trans holds the sub-devices open - */ - rtxnp->stat.trans_in_stack = TRUE; - break; - default: - break; - } -out: - return (err); -} - -/* end of rename/exchange */ diff --git a/usr/src/uts/common/io/lvm/trans/trans_log.c b/usr/src/uts/common/io/lvm/trans/trans_log.c deleted file mode 100644 index 96d8a7cb434d..000000000000 --- a/usr/src/uts/common/io/lvm/trans/trans_log.c +++ /dev/null @@ -1,697 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -extern unit_t md_nunits; -extern set_t md_nsets; -extern md_set_t md_set[]; - -extern md_ops_t trans_md_ops; -extern major_t md_major; - - - - -static kmutex_t ml_lock; -static ml_unit_t *ul_list; /* List of all log units */ -static int md_nlogs; -static kmutex_t ut_mutex; /* per log list of metatrans units */ -static kmutex_t oc_mutex; /* single threads opens/closes */ - -static void md_free_cirbuf(cirbuf_ic_t *cb); - -#define IOWAIT(bp) sema_p(&bp->b_io) -#define IODONE(bp) sema_v(&bp->b_io) - -void -_init_ldl(void) -{ - mutex_init(&ut_mutex, NULL, MUTEX_DRIVER, NULL); - mutex_init(&oc_mutex, NULL, MUTEX_DRIVER, NULL); - mutex_init(&ml_lock, NULL, MUTEX_DRIVER, NULL); -} - -void -_fini_ldl(void) -{ - mutex_destroy(&ut_mutex); - mutex_destroy(&oc_mutex); - mutex_destroy(&ml_lock); -} - -static void -ldl_errorstate(ml_unit_t *ul) -{ - char *str; - - if (ldl_iserror(ul)) - str = "Error"; - else if (ldl_isherror(ul)) - str = "Hard Error"; - else - str = "Okay"; - - cmn_err(CE_WARN, "md: logging device: %s changed state to %s", - md_devname(mddb_getsetnum(ul->un_recid), ul->un_dev, NULL, 0), str); -} - - -/* - * atomically commit the log unit struct and any underlying metadevice struct - */ -static void -logcommitdb(ml_unit_t *ul) -{ - mddb_recid_t recids[4]; - - TRANSSTATS(ts_logcommitdb); - - uniqtime32(&ul->un_timestamp); - - /* - * commit the log device and its child (if metadevice) - */ - recids[0] = ul->un_recid; - if (ul->un_status & LDL_METADEVICE) { - struct mdc_unit *c = MD_UNIT(md_getminor(ul->un_dev)); - recids[1] = c->un_record_id; - recids[2] = 0; - } else - recids[1] = 0; - - mddb_commitrecs_wrapper(recids); -} - -static void -md_alloc_wrbuf(cirbuf_ic_t *cb, size_t bufsize) -{ - int i; - buf_t *bp; - - /* - * Clear previous allocation - */ - if (cb->cb_nb) - md_free_cirbuf(cb); - - bzero((caddr_t)cb, sizeof (*cb)); - rw_init(&cb->cb_rwlock.lock, NULL, RW_DRIVER, NULL); - - rw_enter(&cb->cb_rwlock.lock, RW_WRITER); - - /* - * preallocate 3 bp's and put them on the free list. - */ - for (i = 0; i < 3; ++i) { - bp = md_trans_zalloc(sizeof (buf_t)); - sema_init(&bp->b_sem, 1, NULL, SEMA_DEFAULT, NULL); - sema_init(&bp->b_io, 0, NULL, SEMA_DEFAULT, NULL); - bp->b_offset = -1; - bp->b_forw = cb->cb_free; - cb->cb_free = bp; - - TRANSSTATS(ts_alloc_bp); - } - - cb->cb_va = md_trans_alloc(bufsize); - cb->cb_nb = bufsize; - - /* - * first bp claims entire write buffer - */ - bp = cb->cb_free; - cb->cb_free = bp->b_forw; - - bp->b_forw = bp; - bp->b_back = bp; - cb->cb_bp = bp; - bp->b_un.b_addr = cb->cb_va; - bp->b_bufsize = cb->cb_nb; - - rw_exit(&cb->cb_rwlock.lock); -} - -static void -md_alloc_rdbuf(cirbuf_ic_t *cb, size_t bufsize, size_t blksize) -{ - caddr_t va; - size_t nb; - buf_t *bp; - - /* - * Clear previous allocation - */ - if (cb->cb_nb) - md_free_cirbuf(cb); - - bzero((caddr_t)cb, sizeof (*cb)); - rw_init(&cb->cb_rwlock.lock, NULL, RW_DRIVER, NULL); - - rw_enter(&cb->cb_rwlock.lock, RW_WRITER); - - cb->cb_va = md_trans_alloc(bufsize); - cb->cb_nb = bufsize; - - /* - * preallocate N bufs that are hard-sized to blksize - * in other words, the read buffer pool is a linked list - * of statically sized bufs. - */ - va = cb->cb_va; - while ((nb = bufsize) != 0) { - if (nb > blksize) - nb = blksize; - bp = md_trans_alloc(sizeof (buf_t)); - bzero((caddr_t)bp, sizeof (buf_t)); - sema_init(&bp->b_sem, 1, NULL, SEMA_DEFAULT, NULL); - sema_init(&bp->b_io, 0, NULL, SEMA_DEFAULT, NULL); - bp->b_un.b_addr = va; - bp->b_bufsize = nb; - bp->b_offset = -1; - if (cb->cb_bp) { - bp->b_forw = cb->cb_bp->b_forw; - bp->b_back = cb->cb_bp; - cb->cb_bp->b_forw->b_back = bp; - cb->cb_bp->b_forw = bp; - } else - bp->b_forw = bp->b_back = bp; - cb->cb_bp = bp; - - TRANSSTATS(ts_alloc_bp); - - bufsize -= nb; - va += nb; - } - - rw_exit(&cb->cb_rwlock.lock); -} - - -static void -md_free_cirbuf(cirbuf_ic_t *cb) -{ - buf_t *bp; - - if (cb->cb_nb == 0) - return; - - rw_enter(&cb->cb_rwlock.lock, RW_WRITER); - ASSERT(cb->cb_dirty == NULL); - - /* - * free the active bufs - */ - while ((bp = cb->cb_bp) != NULL) { - if (bp == bp->b_forw) - cb->cb_bp = NULL; - else - cb->cb_bp = bp->b_forw; - bp->b_back->b_forw = bp->b_forw; - bp->b_forw->b_back = bp->b_back; - sema_destroy(&bp->b_sem); - sema_destroy(&bp->b_io); - md_trans_free(bp, sizeof (buf_t)); - } - - /* - * free the free bufs - */ - while ((bp = cb->cb_free) != NULL) { - cb->cb_free = bp->b_forw; - sema_destroy(&bp->b_sem); - sema_destroy(&bp->b_io); - md_trans_free(bp, sizeof (buf_t)); - } - md_trans_free(cb->cb_va, cb->cb_nb); - cb->cb_va = NULL; - cb->cb_nb = 0; - rw_exit(&cb->cb_rwlock.lock); - rw_destroy(&cb->cb_rwlock.lock); -} - -int -ldl_build_incore(ml_unit_t *ul, int snarfing) -{ - size_t bufsize; - set_t setno; - - setno = mddb_getsetnum(ul->un_recid); - - ASSERT(ul->un_head_lof >= ul->un_bol_lof); - ASSERT(ul->un_bol_lof); - - if (ul->un_status & LDL_BEING_RESET) { - mddb_setrecprivate(ul->un_recid, MD_PRV_PENDCLEAN); - return (1); - } - - /* - * If snarfing the log device, - * then remake the device number - * else (we are creating the log device) - * set the driver name in the shared name space. - */ - if (snarfing) { - ul->un_dev = md_getdevnum(setno, mddb_getsidenum(setno), - ul->un_key, MD_NOTRUST_DEVT); - } - - /* - * With the current device id implementation there is possibility - * that we may have NODEV if the underlying can't be resolved at - * snarf time. If this is the case we want to be consistent with - * the normal behavior and continue to allow log to be put on the list. - * We delay the resolve of the dev_t so we can resolve at the open - * time of the log device by device id - */ - if ((md_getmajor(ul->un_dev) == md_major) && - (md_dev_exists(ul->un_dev) == 0)) { - return (1); - } - - mutex_enter(&ml_lock); - - /* - * initialize incore structs - * LDL_FIND_TAIL flag indicates that all I/O must wait until the - * tail has been found. - */ - ul->un_opencnt = 0; - ul->un_transcnt = 0; - ul->un_resv = 0; - ul->un_utlist = NULL; - ul->un_logmap = NULL; - ul->un_status |= LDL_FIND_TAIL; - ul->un_status &= ~LDL_SCAN_ACTIVE; - ASSERT(ul->un_devbsize == DEV_BSIZE); - - mutex_init(&ul->un_log_mutex, NULL, MUTEX_DRIVER, NULL); - - /* - * allocate some read and write buffers - */ - bufsize = md_ldl_bufsize(ul); - ul->un_rdbuf.cb_nb = 0; - md_alloc_rdbuf(&ul->un_rdbuf, bufsize, MAPBLOCKSIZE); - ul->un_wrbuf.cb_nb = 0; - md_alloc_wrbuf(&ul->un_wrbuf, bufsize); - - if (snarfing) { - if (ul->un_error & LDL_ANYERROR) { - ul->un_error = LDL_HERROR; - ldl_errorstate(ul); - } else - ul->un_error = 0; - } - - /* Put on the unit list */ - ul->un_next = ul_list; - ul_list = ul; - md_nlogs++; - - mutex_exit(&ml_lock); - return (0); -} - -ml_unit_t * -ldl_findlog(mddb_recid_t recid) -{ - ml_unit_t *ul; - - /* - * Find a unit struct by database recid - */ - mutex_enter(&ml_lock); - for (ul = ul_list; ul; ul = ul->un_next) - if (ul->un_recid == recid) - break; - mutex_exit(&ml_lock); - return (ul); -} - -/* - * ldl_utadd adds a metatrans device to the log's list of mt devices. - * WARNING: top_end_sync() scans this list W/O locking for performance!!! - */ -void -ldl_utadd(mt_unit_t *un) -{ - ml_unit_t *ul = un->un_l_unit; - - if (ul == NULL) - return; - - mutex_enter(&ut_mutex); - un->un_next = ul->un_utlist; - ul->un_utlist = un; - ASSERT((ul->un_logmap == NULL) || (ul->un_logmap == un->un_logmap)); - ul->un_logmap = un->un_logmap; - mutex_exit(&ut_mutex); -} - -/* - * ldl_utdel removes a metatrans device to the log's list of mt devices. - * WARNING: top_end_sync() scans this list W/O locking for performance!!! - */ -static void -ldl_utdel(mt_unit_t *un) -{ - ml_unit_t *ul = un->un_l_unit; - mt_unit_t **utp = &ul->un_utlist; - - mutex_enter(&ut_mutex); - for (utp = &ul->un_utlist; - *utp && (*utp != un); - utp = &(*utp)->un_next); - if (*utp) - *utp = un->un_next; - un->un_l_unit = NULL; - mutex_exit(&ut_mutex); -} - -mddb_recid_t -ldl_create(mdkey_t key, mt_unit_t *un) -{ - ml_unit_t *ul; - mddb_recid_t recid; - struct timeval32 tv; - mddb_type_t typ1; - set_t setno; - - setno = MD_UN2SET(un); - - /* - * Find a unit struct for this key and set - * If we found one then, we are done. - * Else create one. - */ - mutex_enter(&ml_lock); - for (ul = ul_list; ul; ul = ul->un_next) - if ((ul->un_key == key) && - (mddb_getsetnum(ul->un_recid) == setno)) - break; - mutex_exit(&ml_lock); - - if (ul) - return (ul->un_recid); - - typ1 = (mddb_type_t)md_getshared_key(setno, - trans_md_ops.md_driver.md_drivername); - recid = mddb_createrec(ML_UNIT_ONDSZ, typ1, LOG_REC, - MD_CRO_32BIT | MD_CRO_TRANS_LOG, setno); - if (recid < 0) - return (recid); - mddb_setrecprivate(recid, MD_PRV_GOTIT); - - ul = (ml_unit_t *)mddb_getrecaddr_resize(recid, sizeof (*ul), 0); - - ul->un_recid = recid; - ul->un_key = key; - ul->un_dev = md_getdevnum(setno, mddb_getsidenum(setno), key, - MD_NOTRUST_DEVT); - ul->un_bol_lof = (off32_t)dbtob(un->un_l_sblk); - ul->un_eol_lof = ul->un_bol_lof + (off32_t)dbtob(un->un_l_nblks); - ul->un_pwsblk = un->un_l_pwsblk; - ul->un_nblks = un->un_l_nblks; - ul->un_tblks = un->un_l_tblks; - ul->un_maxresv = un->un_l_maxresv; - ul->un_maxtransfer = (uint_t)dbtob(un->un_l_maxtransfer); - ul->un_devbsize = DEV_BSIZE; - - /* - * empty log - */ - uniqtime32(&tv); - ul->un_head_lof = ul->un_bol_lof; - ul->un_tail_lof = ul->un_bol_lof; - ul->un_head_ident = tv.tv_sec; - ul->un_tail_ident = tv.tv_sec; - - if (md_getmajor(ul->un_dev) == md_major) - ul->un_status |= LDL_METADEVICE; - - md_set_parent(ul->un_dev, (int)MD_MULTI_PARENT); - (void) ldl_build_incore(ul, 0); - logcommitdb(ul); - return (recid); -} - -int -ldl_open_dev(mt_unit_t *un, ml_unit_t *ul) -{ - int err = 0; - md_dev64_t tmpdev; - minor_t mnum = MD_SID(un); - set_t setno = MD_MIN2SET(MD_SID(un)); - side_t side = mddb_getsidenum(setno); - - mutex_enter(&oc_mutex); - - if (ul->un_opencnt) { - ul->un_opencnt++; - mutex_exit(&oc_mutex); - return (0); - } - - tmpdev = ul->un_dev; - /* - * Do the open by device id if it is regular device - */ - if ((md_getmajor(tmpdev) != md_major) && - md_devid_found(setno, side, ul->un_key) == 1) { - tmpdev = md_resolve_bydevid(mnum, tmpdev, ul->un_key); - } - err = md_layered_open(mnum, &tmpdev, MD_OFLG_NULL); - ul->un_dev = tmpdev; - - if (err == 0) - ul->un_opencnt++; - - mutex_exit(&oc_mutex); - return (err); -} - -void -ldl_close_dev(ml_unit_t *ul) -{ - - mutex_enter(&oc_mutex); - - ul->un_opencnt--; - - if (ul->un_opencnt) { - mutex_exit(&oc_mutex); - return; - } - - /* Last reference to the log, close it */ - md_layered_close(ul->un_dev, MD_OFLG_NULL); - - mutex_exit(&oc_mutex); -} - - -/* - * LOGSCAN STUFF - */ -int -ldl_isherror(ml_unit_t *ul) -{ - return ((ul != NULL) && (ul->un_error & LDL_HERROR)); -} - -int -ldl_iserror(ml_unit_t *ul) -{ - return ((ul != NULL) && (ul->un_error & LDL_ERROR)); -} - -size_t -md_ldl_bufsize(ml_unit_t *ul) -{ - size_t bufsize; - - /* - * initial guess is the maxtransfer value for this log device - * reduce by number of logs - * increase for sharing - * increase if too small - * decrease if too large - */ - bufsize = ul->un_maxtransfer; - if (md_nlogs) - bufsize /= md_nlogs; - if (ul->un_transcnt) - bufsize *= ul->un_transcnt; - bufsize = dbtob(btod(bufsize)); - if (bufsize < LDL_MINBUFSIZE) - bufsize = LDL_MINBUFSIZE; - if (bufsize > maxphys) - bufsize = maxphys; - if (bufsize > ul->un_maxtransfer) - bufsize = ul->un_maxtransfer; - return (bufsize); -} - -/* - * if necessary; open all underlying devices for ul and start threads - * called at snarf, metainit, and open - */ -void -ldl_open_underlying(mt_unit_t *un) -{ - ml_unit_t *ul = un->un_l_unit; - int err = 0; - - - /* - * first, handle the case of detached logs - */ - if (ul == NULL) { - err = trans_open_all_devs(un); - if (err == 0) { - un->un_flags &= ~TRANS_NEED_OPEN; - un->un_flags |= TRANS_OPENED; - } - } -} - -/* - * remove log unit struct from global linked list - */ -static void -ldl_unlist(ml_unit_t *ul) -{ - ml_unit_t **ulp; - - /* - * remove from list - */ - mutex_enter(&ml_lock); - for (ulp = &ul_list; *ulp && (*ulp != ul); ulp = &(*ulp)->un_next); - if (*ulp) { - *ulp = ul->un_next; - --md_nlogs; - } - mutex_exit(&ml_lock); -} - -/* - * get rid of a log unit from the database - */ -void -ldl_cleanup(ml_unit_t *ul) -{ - sv_dev_t sv; - - /* Save the log key */ - sv.setno = mddb_getsetnum(ul->un_recid); - sv.key = ul->un_key; - - mddb_deleterec_wrapper(ul->un_recid); - md_rem_names(&sv, 1); -} - -static void -ldl_delete(ml_unit_t *ul, int removing) -{ - - /* - * remove from list - */ - ldl_unlist(ul); - - /* - * free up resources - */ - md_free_cirbuf(&ul->un_rdbuf); - md_free_cirbuf(&ul->un_wrbuf); - - mutex_destroy(&ul->un_log_mutex); - - if (removing) { - md_reset_parent(ul->un_dev); - ul->un_status |= LDL_BEING_RESET; - logcommitdb(ul); - ldl_cleanup(ul); - } -} - -/* - * detach log from trans device - * caller insures that trans device is idle and will remain idle - */ -/* ARGSUSED */ -int -ldl_reset(mt_unit_t *un, int removing, int force) -{ - ml_unit_t *ul = un->un_l_unit; - - if (ul == NULL) - return (0); - - if (un->un_flags & TRANS_DETACHING) { - un->un_flags &= ~TRANS_DETACHING; - un->un_flags |= TRANS_DETACHED; - trans_commit(un, 0); - } - - /* - * remove this metatrans device from the log's list of mt devices - */ - ldl_utdel(un); - - /* - * busy; do nothing - */ - if (ul->un_utlist) - return (0); - - ldl_delete(ul, removing); - - return (0); -} diff --git a/usr/src/uts/common/os/modsysfile.c b/usr/src/uts/common/os/modsysfile.c index a4d66c6563a1..8dca86880fa9 100644 --- a/usr/src/uts/common/os/modsysfile.c +++ b/usr/src/uts/common/os/modsysfile.c @@ -18,9 +18,11 @@ * * CDDL HEADER END */ + /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2016 Nexenta Systems, Inc. */ #include @@ -69,7 +71,6 @@ static struct sysparam *sysparam_tl; /* tail of parameters list */ static vmem_t *mod_sysfile_arena; /* parser memory */ char obp_bootpath[BO_MAXOBJNAME]; /* bootpath from obp */ -char svm_bootpath[BO_MAXOBJNAME]; /* bootpath redirected via rootdev */ #if defined(_PSM_MODULES) @@ -1503,11 +1504,6 @@ setparams() bootobjp = &rootfs; switch (sysp->sys_type) { - case MOD_ROOTDEV: - root_is_svm = 1; - (void) copystr(sysp->sys_ptr, svm_bootpath, - BO_MAXOBJNAME, NULL); - break; case MOD_SWAPDEV: bootobjp->bo_flags |= BO_VALID; (void) copystr(sysp->sys_ptr, bootobjp->bo_name, @@ -1519,6 +1515,7 @@ setparams() (void) copystr(sysp->sys_ptr, bootobjp->bo_fstype, BO_MAXOBJNAME, NULL); break; + case MOD_ROOTDEV: default: break; } diff --git a/usr/src/uts/common/os/space.c b/usr/src/uts/common/os/space.c index 6b6bff8b4171..2b9b7030482c 100644 --- a/usr/src/uts/common/os/space.c +++ b/usr/src/uts/common/os/space.c @@ -22,6 +22,7 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2016 Nexenta Systems, Inc. */ /* @@ -110,7 +111,6 @@ struct vnode kvps[KV_MAX]; */ struct vnode *rootvp; /* vnode of the root device */ dev_t rootdev; /* dev_t of the root device */ -boolean_t root_is_svm; /* root is a mirrored device flag */ boolean_t root_is_ramdisk; /* root is ramdisk */ uint32_t ramdisk_size; /* (KB) currently set only for sparc netboots */ diff --git a/usr/src/uts/common/os/swapgeneric.c b/usr/src/uts/common/os/swapgeneric.c index b573485353a7..77167149fea1 100644 --- a/usr/src/uts/common/os/swapgeneric.c +++ b/usr/src/uts/common/os/swapgeneric.c @@ -18,9 +18,10 @@ * * CDDL HEADER END */ + /* * Copyright (c) 1982, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2015 Nexenta Systems, Inc. All rights reserved. + * Copyright 2016 Nexenta Systems, Inc. */ /* @@ -135,13 +136,6 @@ rootconf(void) if (error = clboot_rootconf()) return (error); - if (root_is_svm) { - (void) strncpy(rootfs.bo_name, obp_bootpath, BO_MAXOBJNAME); - - BMDPRINTF(("rootconf: svm: rootfs name %s\n", rootfs.bo_name)); - BMDPRINTF(("rootconf: svm: svm name %s\n", svm_bootpath)); - } - /* * Run _init on the root filesystem (we already loaded it * but we've been waiting until now to _init it) which will @@ -173,10 +167,6 @@ rootconf(void) VFS_INIT(rootvfs, &vsw->vsw_vfsops, (caddr_t)0); VFS_HOLD(rootvfs); - if (root_is_svm) { - rootvfs->vfs_flag |= VFS_RDONLY; - } - /* * This pm-releated call has to occur before root is mounted since we * need to power up all devices. It is placed after VFS_INIT() such @@ -237,41 +227,6 @@ rootconf(void) return (error); } -/* - * Remount root on an SVM mirror root device - * Only supported on UFS filesystems at present - */ -int -svm_rootconf(void) -{ - int error; - extern int ufs_remountroot(struct vfs *vfsp); - - ASSERT(root_is_svm == 1); - - if (strcmp(rootfs.bo_fstype, "ufs") != 0) { - cmn_err(CE_CONT, "Mounting root on %s with filesystem " - "type %s is not supported\n", - rootfs.bo_name, rootfs.bo_fstype); - return (EINVAL); - } - - (void) strncpy(rootfs.bo_name, svm_bootpath, BO_MAXOBJNAME); - - BMDPRINTF(("svm_rootconf: rootfs %s\n", rootfs.bo_name)); - - error = ufs_remountroot(rootvfs); - - if (error) { - cmn_err(CE_CONT, "Cannot remount root on %s fstype %s\n", - rootfs.bo_name, rootfs.bo_fstype); - } else { - cmn_err(CE_CONT, "?root remounted on %s fstype %s\n", - rootfs.bo_name, rootfs.bo_fstype); - } - return (error); -} - /* * Under the assumption that our root file system is on a * disk partition, get the dev_t of the partition in question. @@ -350,22 +305,10 @@ loadrootmodules(void) BMDPRINTF(("loadrootmodules: flags 0x%x\n", rootfs.bo_flags)); /* - * zzz We need to honor what's in rootfs if it's not null. - * non-null means use what's there. This way we can - * change rootfs with /etc/system AND with tunetool. + * Get the root fstype and root device path from boot. */ - if (root_is_svm) { - /* user replaced rootdev, record obp_bootpath */ - obp_bootpath[0] = '\0'; - (void) getphysdev("root", obp_bootpath, BO_MAXOBJNAME); - BMDPRINTF(("loadrootmodules: obp_bootpath %s\n", obp_bootpath)); - } else { - /* - * Get the root fstype and root device path from boot. - */ - rootfs.bo_fstype[0] = '\0'; - rootfs.bo_name[0] = '\0'; - } + rootfs.bo_fstype[0] = '\0'; + rootfs.bo_name[0] = '\0'; /* * This lookup will result in modloadonly-ing the root @@ -413,17 +356,13 @@ loadrootmodules(void) */ err = 0; BMDPRINTF(("loadrootmodules: rootfs %s\n", rootfs.bo_name)); - if (root_is_svm == 0) { - BMDPRINTF(("loadrootmodules: rootfs %s\n", rootfs.bo_name)); - name = rootfs.bo_name; - err = load_bootpath_drivers(rootfs.bo_name); - } + name = rootfs.bo_name; + err = load_bootpath_drivers(rootfs.bo_name); /* * Load driver modules in obp_bootpath, this is always * required for mountroot to succeed. obp_bootpath is - * is set if rootdev is set via /etc/system, which is - * the case if booting of a SVM/VxVM mirror. + * is set if rootdev is set via /etc/system. */ if ((err == 0) && obp_bootpath[0] != '\0') { BMDPRINTF(("loadrootmodules: obp_bootpath %s\n", obp_bootpath)); diff --git a/usr/src/uts/common/sys/Makefile b/usr/src/uts/common/sys/Makefile index f1edc47f0820..78cee8077715 100644 --- a/usr/src/uts/common/sys/Makefile +++ b/usr/src/uts/common/sys/Makefile @@ -815,34 +815,6 @@ ISCSITHDRS= \ ISOHDRS= \ signal_iso.h -DERIVED_LVMHDRS= \ - md_mdiox.h \ - md_basic.h \ - mdmed.h \ - md_mhdx.h \ - mdmn_commd.h - -LVMHDRS= \ - md_convert.h \ - md_crc.h \ - md_hotspares.h \ - md_mddb.h \ - md_mirror.h \ - md_mirror_shared.h \ - md_names.h \ - md_notify.h \ - md_raid.h \ - md_rename.h \ - md_sp.h \ - md_stripe.h \ - md_trans.h \ - mdio.h \ - mdvar.h - -ALL_LVMHDRS= \ - $(LVMHDRS) \ - $(DERIVED_LVMHDRS) - FMHDRS= \ protocol.h \ util.h @@ -1009,7 +981,6 @@ SYSEVENTHDRS= \ eventdefs.h \ ipmp.h \ pwrctl.h \ - svm.h \ vrrp.h CONTRACTHDRS= \ @@ -1184,7 +1155,6 @@ CHECKHDRS= \ $(FMFSHDRS:%.h=fm/fs/%.check) \ $(FMIOHDRS:%.h=fm/io/%.check) \ $(FSHDRS:%.h=fs/%.check) \ - $(LVMHDRS:%.h=lvm/%.check) \ $(SCSIHDRS:%.h=scsi/%.check) \ $(SCSIADHDRS:%.h=scsi/adapters/%.check) \ $(SCSICONFHDRS:%.h=scsi/conf/%.check) \ @@ -1250,7 +1220,6 @@ CHECKHDRS= \ $(ROOTTAVORHDRS) \ $(ROOTHERMONHDRS) \ $(ROOTMLNXHDRS) \ - $(ROOTLVMHDRS) \ $(ROOTSCSIHDRS) \ $(ROOTSCSIADHDRS) \ $(ROOTSCSICONFHDRS) \ @@ -1284,7 +1253,6 @@ CHECKHDRS= \ install_h: \ $(ROOTDIRS) \ - LVMDERIVED_H \ .WAIT \ $(ROOTHDRS) \ $(ROOTAUDHDRS) \ @@ -1319,7 +1287,6 @@ install_h: \ $(ROOTTAVORHDRS) \ $(ROOTHERMONHDRS) \ $(ROOTMLNXHDRS) \ - $(ROOTLVMHDRS) \ $(ROOTSCSIHDRS) \ $(ROOTSCSIADHDRS) \ $(ROOTSCSIISCSIHDRS) \ @@ -1361,14 +1328,10 @@ priv_names.h: $(PRIVS_AWK) $(PRIVS_DEF) usb/usbdevs.h: $(USBDEVS_AWK) $(USBDEVS_DATA) $(AWK) -f $(USBDEVS_AWK) $(USBDEVS_DATA) -H > $@ -LVMDERIVED_H: - cd $(SRC)/uts/common/sys/lvm; pwd; $(MAKE) all_h - clean: $(RM) $(GENHDRS) clobber: clean - cd $(SRC)/uts/common/sys/lvm; pwd; $(MAKE) clobber check: $(CHECKHDRS) diff --git a/usr/src/uts/common/sys/Makefile.syshdrs b/usr/src/uts/common/sys/Makefile.syshdrs index 578177f5297a..3d9bc82d1ac1 100644 --- a/usr/src/uts/common/sys/Makefile.syshdrs +++ b/usr/src/uts/common/sys/Makefile.syshdrs @@ -17,9 +17,12 @@ # information: Portions Copyright [yyyy] [name of copyright owner] # # CDDL HEADER END +# + # # Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. # Copyright 2014 Garrett D'Amore +# Copyright 2016 Nexenta Systems, Inc. # # Common definitions for open and closed headers. @@ -101,9 +104,6 @@ idm/%.check: idm/%.h iscsit/%.check: iscsit/%.h $(DOT_H_CHECK) -lvm/%.check: lvm/%.h - $(DOT_H_CHECK) - scsi/%.check: scsi/%.h $(DOT_H_CHECK) @@ -213,7 +213,6 @@ ROOTDIRS= \ $(ROOTDIR)/ib/clients/of/sol_ucma \ $(ROOTDIR)/idm \ $(ROOTDIR)/iscsit \ - $(ROOTDIR)/lvm \ $(ROOTDIR)/scsi \ $(ROOTDIR)/scsi/conf \ $(ROOTDIR)/scsi/generic \ @@ -279,8 +278,6 @@ ROOTFMFSHDRS= $(FMFSHDRS:%=$(ROOTDIR)/fm/fs/%) ROOTFSHDRS= $(FSHDRS:%=$(ROOTDIR)/fs/%) -ROOTLVMHDRS= $(ALL_LVMHDRS:%=$(ROOTDIR)/lvm/%) - ROOTSCSIHDRS= $(SCSIHDRS:%=$(ROOTDIR)/scsi/%) ROOTSATAGENHDRS= $(SATAGENHDRS:%=$(ROOTDIR)/sata/%) ROOTSCSICONFHDRS= $(SCSICONFHDRS:%=$(ROOTDIR)/scsi/conf/%) @@ -415,9 +412,6 @@ $(ROOTDIR)/ib/adapters/hermon/%: ib/adapters/hermon/% $(ROOTDIR)/ib/adapters/%: ib/adapters/% $(INS.file) -$(ROOTDIR)/lvm/%: lvm/% - $(INS.file) - $(ROOTDIR)/scsi/%: scsi/% $(INS.file) diff --git a/usr/src/uts/common/sys/dkio.h b/usr/src/uts/common/sys/dkio.h index a5b0c312f9df..3d1a839164c3 100644 --- a/usr/src/uts/common/sys/dkio.h +++ b/usr/src/uts/common/sys/dkio.h @@ -81,7 +81,7 @@ struct dk_cinfo { #define DKC_SMSFLOPPY 12 #define DKC_SCSI_CCS 13 /* SCSI CCS compatible */ #define DKC_INTEL82072 14 /* native floppy chip */ -#define DKC_MD 16 /* meta-disk (virtual-disk) driver */ +#define DKC_MD 16 /* meta-disk (virtual-disk) driver (obsolete) */ #define DKC_INTEL82077 19 /* 82077 floppy disk controller */ #define DKC_DIRECT 20 /* Intel direct attached device i.e. IDE */ #define DKC_PCMCIA_MEM 21 /* PCMCIA memory disk-like type (Obsolete) */ diff --git a/usr/src/uts/common/sys/lvm/Makefile b/usr/src/uts/common/sys/lvm/Makefile deleted file mode 100644 index 5a21d577eabd..000000000000 --- a/usr/src/uts/common/sys/lvm/Makefile +++ /dev/null @@ -1,88 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2004 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# Copyright 2015 Igor Kozhukhov -# -# -# uts/common/sys/lvm/Makefile -# -# This makefile derive some .h files via rpcgen -# -# architecture independent -# - -UTSBASE = ../../.. - -include $(UTSBASE)/../Makefile.master - -.KEEP_STATE: - -DERIVED_FILES = \ - md_basic.h \ - mdmed.h \ - md_mdiox.h \ - md_mhdx.h \ - mdmn_commd.h - -RPCGENFLAGS += -C -M -D_KERNEL -DSYSV - - -def all install lint modlintlib clean.lint: $(DERIVED_FILES) -all_h install_h: $(DERIVED_FILES) - -clean: - $(RM) $(DERIVED_FILES) - -clobber: clean - -md_basic.h: meta_basic.x - $(RPCGEN) $(RPCGENFLAGS) -h meta_basic.x | \ - $(AWK) '// { print "#ifdef _REENTRANT"; print $$0; print "#endif\t/* _REENTRANT */"; next } \ - // { print "#ifdef _REENTRANT"; print $$0; print "#endif\t/* _REENTRANT */"; next } \ - { print $0 } \ - ' > $@ - -md_mhdx.h: mhdx.x - $(RPCGEN) $(RPCGENFLAGS) -h mhdx.x | \ - $(AWK) '// { print "#ifdef _REENTRANT"; print $$0; print "#endif\t/* _REENTRANT */"; next } \ - // { print "#ifdef _REENTRANT"; print $$0; print "#endif\t/* _REENTRANT */"; next } \ - { print $0 } \ - ' > $@ - -mdmed.h: metamed.x - $(RPCGEN) $(RPCGENFLAGS) -h metamed.x | \ - $(AWK) '// { print "#ifdef _REENTRANT"; print $$0; print "#endif\t/* _REENTRANT */"; next } \ - // { print "#ifdef _REENTRANT"; print $$0; print "#endif\t/* _REENTRANT */"; next } \ - { print $0 } \ - ' > $@ - -md_mdiox.h: mdiox.x - $(RPCGEN) $(RPCGENFLAGS) -h mdiox.x | \ - $(AWK) '// { print "#ifdef _REENTRANT"; print $$0; print "#endif\t/* _REENTRANT */"; next } \ - // { print "#ifdef _REENTRANT"; print $$0; print "#endif\t/* _REENTRANT */"; next } \ - { print $0 } \ - ' > $@ - -mdmn_commd.h: mdmn_commd.x - $(RPCGEN) -h mdmn_commd.x > $@ diff --git a/usr/src/uts/common/sys/lvm/md_convert.h b/usr/src/uts/common/sys/lvm/md_convert.h deleted file mode 100644 index 17da34443002..000000000000 --- a/usr/src/uts/common/sys/lvm/md_convert.h +++ /dev/null @@ -1,400 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SYS__MD_CONVERT_H -#define _SYS__MD_CONVERT_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include -#include -#include -#include -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -size_t get_big_stripe_req_size(ms_unit32_od_t *, int); -size_t get_small_stripe_req_size(ms_unit_t *, int); - -void stripe_convert(caddr_t, caddr_t, int); -void mirror_convert(caddr_t, caddr_t, int); -void raid_convert(caddr_t, caddr_t, int); -void hs_convert(caddr_t, caddr_t, int); -void hsp_convert(caddr_t, caddr_t, int); -void softpart_convert(caddr_t, caddr_t, int); -void trans_master_convert(caddr_t, caddr_t, int); -void trans_log_convert(caddr_t, caddr_t, int); - -extern void dump_mdc_unit(mdc_unit_t *); -extern void dump_mdc_unit32_od(mdc_unit32_od_t *); - -extern void dump_mm_unit(mm_unit_t *); -extern void dump_mm_unit32_od(mm_unit32_od_t *); - -extern void dump_ms_unit(ms_unit_t *); -extern void dump_ms_unit32_od(ms_unit32_od_t *); - -extern void dump_mr_unit(mr_unit_t *); -extern void dump_mr_unit32_od(mr_unit32_od_t *); - - -/* - * Nice debug printing macro: - * eg: HBDBG(stripe_convert, 0x%llx, msp->c.un_revision); - */ -#define HBDBG(r, f, v) printf(#r "," #v ":[" #f "]\n", v) - -/* Compacting a timeval64 to a timeval32 */ -#define CMPLTV(dest, source) \ - { \ - (dest).tv_sec = (int32_t)(source).tv_sec; \ - (dest).tv_usec = (int32_t)(source).tv_usec; \ - } - -/* Expanding a timeval32 to a timeval64 */ -#define EXPLTV(dest, source) \ - { \ - (dest).tv_sec = (long)(source).tv_sec; \ - (dest).tv_usec = (long)(source).tv_usec; \ - } - -#define COMPLETE_STRUCTURE 0 -#define FIRST_COMP_OFFSET 1 - -#define SMALL_2_BIG 1 -#define BIG_2_SMALL 2 - -/* Used by different types */ - -/* mdc_unit -> mdc_unit32_od */ -#define MDC_UNIT_BIG2SMALL(big_un, small_un) \ - small_un->c.un_revision = big_un->c.un_revision;\ - small_un->c.un_type = big_un->c.un_type;\ - small_un->c.un_status = big_un->c.un_status;\ - small_un->c.un_parent_res = big_un->c.un_parent_res;\ - small_un->c.un_child_res = big_un->c.un_child_res;\ - small_un->c.un_self_id = big_un->c.un_self_id;\ - small_un->c.un_record_id = big_un->c.un_record_id;\ - small_un->c.un_flag = big_un->c.un_flag;\ - small_un->c.un_total_blocks = (daddr32_t)big_un->c.un_total_blocks;\ - small_un->c.un_actual_tb = (daddr32_t)big_un->c.un_actual_tb;\ - small_un->c.un_nhead = (ushort_t)big_un->c.un_nhead;\ - small_un->c.un_nsect = (ushort_t)big_un->c.un_nsect;\ - small_un->c.un_rpm = big_un->c.un_rpm;\ - small_un->c.un_wr_reinstruct = big_un->c.un_wr_reinstruct;\ - small_un->c.un_rd_reinstruct = big_un->c.un_rd_reinstruct;\ - small_un->c.un_vtoc_id = big_un->c.un_vtoc_id;\ - small_un->c.un_capabilities = big_un->c.un_capabilities;\ - small_un->c.un_parent = big_un->c.un_parent;\ - small_un->c.un_user_flags = big_un->c.un_user_flags; - -#define MDC_UNIT_SMALL2BIG(small_un, big_un) \ - big_un->c.un_revision = small_un->c.un_revision;\ - big_un->c.un_type = small_un->c.un_type;\ - big_un->c.un_status = small_un->c.un_status;\ - big_un->c.un_parent_res = small_un->c.un_parent_res;\ - big_un->c.un_child_res = small_un->c.un_child_res;\ - big_un->c.un_self_id = small_un->c.un_self_id;\ - big_un->c.un_record_id = small_un->c.un_record_id;\ - big_un->c.un_flag = small_un->c.un_flag;\ - big_un->c.un_total_blocks = (diskaddr_t)small_un->c.un_total_blocks;\ - big_un->c.un_actual_tb = (diskaddr_t)small_un->c.un_actual_tb;\ - big_un->c.un_nhead = (uint_t)small_un->c.un_nhead;\ - big_un->c.un_nsect = (uint_t)small_un->c.un_nsect;\ - big_un->c.un_rpm = small_un->c.un_rpm;\ - big_un->c.un_wr_reinstruct = small_un->c.un_wr_reinstruct;\ - big_un->c.un_rd_reinstruct = small_un->c.un_rd_reinstruct;\ - big_un->c.un_vtoc_id = small_un->c.un_vtoc_id;\ - big_un->c.un_capabilities = small_un->c.un_capabilities;\ - big_un->c.un_parent = small_un->c.un_parent;\ - big_un->c.un_user_flags = small_un->c.un_user_flags; - -/* md_m_shared -> md_m_shared32_od */ -#define MMSH_BIG2SMALL(big_mdms, small_mdms) \ - small_mdms->ms_flags = big_mdms->ms_flags; \ - small_mdms->xms_mx[0] = 0; \ - small_mdms->xms_mx[1] = 0; \ - small_mdms->ms_state = big_mdms->ms_state; \ - small_mdms->ms_lasterrcnt = big_mdms->ms_lasterrcnt; \ - small_mdms->ms_orig_dev = md_cmpldev(big_mdms->ms_orig_dev); \ - small_mdms->ms_orig_blk = (daddr32_t)big_mdms->ms_orig_blk; \ - small_mdms->ms_hs_key = big_mdms->ms_hs_key; \ - small_mdms->ms_hs_id = big_mdms->ms_hs_id; \ - CMPLTV(small_mdms->ms_timestamp, big_mdms->ms_timestamp); - -/* mdc_unit32_od -> mdc_unit */ -/* md_m_shared32_od -> md_m_shared */ -#define MMSH_SMALL2BIG(small_mdms, big_mdms) \ - big_mdms->ms_flags = small_mdms->ms_flags; \ - big_mdms->ms_state = small_mdms->ms_state; \ - big_mdms->ms_lasterrcnt = small_mdms->ms_lasterrcnt; \ - big_mdms->ms_orig_dev = md_expldev(small_mdms->ms_orig_dev); \ - big_mdms->ms_orig_blk = (diskaddr_t)small_mdms->ms_orig_blk; \ - big_mdms->ms_hs_key = small_mdms->ms_hs_key; \ - big_mdms->ms_hs_id = small_mdms->ms_hs_id; \ - CMPLTV(big_mdms->ms_timestamp, small_mdms->ms_timestamp); - - -/* Used by Stripes */ - -/* ms_comp -> ms_comp32_od */ -#define MSCOMP_BIG2SMALL(big_mdcp, small_mdcp) \ - small_mdcp->un_key = big_mdcp->un_key; \ - small_mdcp->un_dev = md_cmpldev(big_mdcp->un_dev); \ - small_mdcp->un_start_block = (daddr32_t)big_mdcp->un_start_block; \ - MMSH_BIG2SMALL((&(big_mdcp->un_mirror)), (&(small_mdcp->un_mirror))); - -/* ms_comp32_od -> ms_comp */ -#define MSCOMP_SMALL2BIG(small_mdcp, big_mdcp) \ - big_mdcp->un_key = small_mdcp->un_key; \ - big_mdcp->un_dev = md_expldev(small_mdcp->un_dev); \ - big_mdcp->un_start_block = (diskaddr_t)small_mdcp->un_start_block; \ - MMSH_SMALL2BIG((&(small_mdcp->un_mirror)), (&(big_mdcp->un_mirror))); - - -/* ms_row -> ms_row32_od */ -#define MSROW_BIG2SMALL(big_mdr, small_mdr) \ - small_mdr->un_icomp = big_mdr->un_icomp; \ - small_mdr->un_ncomp = big_mdr->un_ncomp; \ - small_mdr->un_blocks = (daddr32_t)big_mdr->un_blocks; \ - small_mdr->un_cum_blocks = (daddr32_t)big_mdr->un_cum_blocks; \ - small_mdr->un_interlace = (daddr32_t)big_mdr->un_interlace; - -/* ms_row -> ms_row32_od */ -#define MSROW_SMALL2BIG(small_mdr, big_mdr) \ - big_mdr->un_icomp = small_mdr->un_icomp; \ - big_mdr->un_ncomp = small_mdr->un_ncomp; \ - big_mdr->un_blocks = (diskaddr_t)small_mdr->un_blocks; \ - big_mdr->un_cum_blocks = (diskaddr_t)small_mdr->un_cum_blocks; \ - big_mdr->un_interlace = (diskaddr_t)small_mdr->un_interlace; - - - -/* Used by Mirrors */ - -/* mm_submirror -> mm_submirror32_od */ -#define MMSM_BIG2SMALL(big_sm, small_sm) \ - small_sm->sm_key = big_sm->sm_key; \ - small_sm->sm_dev = md_cmpldev(big_sm->sm_dev); \ - small_sm->sm_state = big_sm->sm_state; \ - small_sm->sm_flags = big_sm->sm_flags; \ - small_sm->sm_hsp_id = big_sm->sm_hsp_id; \ - CMPLTV(small_sm->sm_timestamp, big_sm->sm_timestamp); \ - MMSH_BIG2SMALL((&(big_sm->sm_shared)), (&(small_sm->sm_shared))); - -/* mm_submirror32_od -> mm_submirror */ -#define MMSM_SMALL2BIG(small_sm, big_sm) \ - big_sm->sm_key = small_sm->sm_key; \ - big_sm->sm_dev = md_expldev(small_sm->sm_dev); \ - big_sm->sm_state = small_sm->sm_state; \ - big_sm->sm_flags = small_sm->sm_flags; \ - big_sm->sm_hsp_id = small_sm->sm_hsp_id; \ - CMPLTV(big_sm->sm_timestamp, small_sm->sm_timestamp); \ - MMSH_SMALL2BIG((&(small_sm->sm_shared)), (&(big_sm->sm_shared))); - - -/* Used by Raid */ -/* mr_column -> mr_column32_od */ -#define MRCOL_BIG2SMALL(big_rcol, small_rcol) \ - small_rcol->un_devstate = big_rcol->un_devstate; \ - small_rcol->un_devflags = big_rcol->un_devflags; \ - CMPLTV(small_rcol->un_devtimestamp, big_rcol->un_devtimestamp); \ - small_rcol->un_hs_id = big_rcol->un_hs_id; \ - small_rcol->un_hs_pwstart = (daddr32_t)big_rcol->un_hs_pwstart; \ - small_rcol->un_hs_devstart = (daddr32_t)big_rcol->un_hs_devstart; \ - small_rcol->un_hs_key = big_rcol->un_hs_key; \ - small_rcol->un_orig_dev = md_cmpldev(big_rcol->un_orig_dev); \ - small_rcol->un_orig_key = big_rcol->un_orig_key; \ - small_rcol->un_orig_pwstart = (daddr32_t)big_rcol->un_orig_pwstart;\ - small_rcol->un_orig_devstart = (daddr32_t)big_rcol->un_orig_devstart;\ - small_rcol->un_dev = md_cmpldev(big_rcol->un_dev); \ - small_rcol->un_pwstart = (daddr32_t)big_rcol->un_pwstart; \ - small_rcol->un_devstart = (daddr32_t)big_rcol->un_devstart; \ - small_rcol->un_alt_dev = md_cmpldev(big_rcol->un_alt_dev); \ - small_rcol->un_alt_pwstart = (daddr32_t)big_rcol->un_alt_pwstart; \ - small_rcol->un_alt_devstart = (daddr32_t)big_rcol->un_alt_devstart; - -/* mr_column32_od -> mr_column */ -#define MRCOL_SMALL2BIG(small_rcol, big_rcol) \ - big_rcol->un_devstate = small_rcol->un_devstate; \ - big_rcol->un_devflags = small_rcol->un_devflags; \ - CMPLTV(big_rcol->un_devtimestamp, small_rcol->un_devtimestamp); \ - big_rcol->un_hs_id = small_rcol->un_hs_id; \ - big_rcol->un_hs_pwstart = (diskaddr_t)small_rcol->un_hs_pwstart; \ - big_rcol->un_hs_devstart = (diskaddr_t)small_rcol->un_hs_devstart; \ - big_rcol->un_hs_key = small_rcol->un_hs_key; \ - big_rcol->un_orig_dev = md_expldev(small_rcol->un_orig_dev); \ - big_rcol->un_orig_key = small_rcol->un_orig_key; \ - big_rcol->un_orig_pwstart = (diskaddr_t)small_rcol->un_orig_pwstart; \ - big_rcol->un_orig_devstart = (diskaddr_t)small_rcol->un_orig_devstart;\ - big_rcol->un_dev = md_expldev(small_rcol->un_dev); \ - big_rcol->un_pwstart = (diskaddr_t)small_rcol->un_pwstart; \ - big_rcol->un_devstart = (diskaddr_t)small_rcol->un_devstart; \ - big_rcol->un_alt_dev = md_expldev(small_rcol->un_alt_dev); \ - big_rcol->un_alt_pwstart = (diskaddr_t)small_rcol->un_alt_pwstart; \ - big_rcol->un_alt_devstart = (diskaddr_t)small_rcol->un_alt_devstart; - -/* mr_unit -> mr_unit32_od */ -#define MRUNIT_BIG2SMALL(big_un, small_un) \ - MDC_UNIT_BIG2SMALL(big_un, small_un); \ - CMPLTV(small_un->un_timestamp, big_un->un_timestamp); \ - small_un->un_magic = big_un->un_magic; \ - small_un->un_state = big_un->un_state; \ - small_un->un_origcolumncnt = big_un->un_origcolumncnt; \ - small_un->un_totalcolumncnt = big_un->un_totalcolumncnt; \ - small_un->un_rflags = big_un->un_rflags; \ - small_un->un_segsize = big_un->un_segsize; \ - small_un->un_segsincolumn = (uint_t)big_un->un_segsincolumn;\ - small_un->un_maxio = big_un->un_maxio; \ - small_un->un_iosize = big_un->un_iosize; \ - small_un->un_linlck_flg = big_un->un_linlck_flg; \ - small_un->un_pwcnt = big_un->un_pwcnt; \ - small_un->un_pwsize = big_un->un_pwsize; \ - small_un->un_pwid = big_un->un_pwid; \ - small_un->un_percent_done = big_un->un_percent_done; \ - small_un->un_resync_copysize = big_un->un_resync_copysize; \ - small_un->un_hsp_id = big_un->un_hsp_id; - -/* mr_unit32_od -> mr_unit */ -#define MRUNIT_SMALL2BIG(small_un, big_un) \ - MDC_UNIT_SMALL2BIG(small_un, big_un); \ - CMPLTV(big_un->un_timestamp, small_un->un_timestamp); \ - big_un->un_magic = small_un->un_magic; \ - big_un->un_state = small_un->un_state; \ - big_un->un_origcolumncnt = small_un->un_origcolumncnt; \ - big_un->un_totalcolumncnt = small_un->un_totalcolumncnt; \ - big_un->un_rflags = small_un->un_rflags; \ - big_un->un_segsize = small_un->un_segsize; \ - big_un->un_segsincolumn = (diskaddr_t)small_un->un_segsincolumn;\ - big_un->un_maxio = small_un->un_maxio; \ - big_un->un_iosize = small_un->un_iosize; \ - big_un->un_linlck_flg = small_un->un_linlck_flg; \ - big_un->un_pwcnt = small_un->un_pwcnt; \ - big_un->un_pwsize = small_un->un_pwsize; \ - big_un->un_pwid = small_un->un_pwid; \ - big_un->un_percent_done = small_un->un_percent_done; \ - big_un->un_resync_copysize = small_un->un_resync_copysize; \ - big_un->un_hsp_id = small_un->un_hsp_id; - - -/* Used by Softpartitions */ -/* mp_unit -> mp_unit32_od */ -#define MPUNIT_BIG2SMALL(big_un, small_un) { \ - uint_t __i; \ - MDC_UNIT_BIG2SMALL(big_un, small_un); \ - small_un->un_key = big_un->un_key; \ - small_un->un_dev = md_cmpldev(big_un->un_dev); \ - small_un->un_start_blk = big_un->un_start_blk; \ - small_un->un_status = big_un->un_status; \ - small_un->un_numexts = big_un->un_numexts; \ - small_un->un_length = big_un->un_length; \ - for (__i = 0; __i < big_un->un_numexts; __i++) { \ - small_un->un_ext[__i].un_voff = big_un->un_ext[__i].un_voff; \ - small_un->un_ext[__i].un_poff = big_un->un_ext[__i].un_poff; \ - small_un->un_ext[__i].un_len = big_un->un_ext[__i].un_len; \ - } \ -} - -/* mp_unit32_od -> mp_unit */ -#define MPUNIT_SMALL2BIG(small_un, big_un) { \ - uint_t __j; \ - MDC_UNIT_BIG2SMALL(small_un, big_un); \ - big_un->un_key = small_un->un_key; \ - big_un->un_dev = md_expldev(small_un->un_dev); \ - big_un->un_start_blk = small_un->un_start_blk; \ - big_un->un_status = small_un->un_status; \ - big_un->un_numexts = small_un->un_numexts; \ - big_un->un_length = small_un->un_length; \ - for (__j = 0; __j < small_un->un_numexts; __j++) { \ - big_un->un_ext[__j].un_voff = small_un->un_ext[__j].un_voff; \ - big_un->un_ext[__j].un_poff = small_un->un_ext[__j].un_poff; \ - big_un->un_ext[__j].un_len = small_un->un_ext[__j].un_len; \ - } \ -} - - -/* Used by Hotspares */ -/* hot_spare -> hot_spare32_od */ -#define MHS_BIG2SMALL(big, small) \ - small->hs_revision = big->hs_revision; \ - small->hs_record_id = big->hs_record_id; \ - small->xx_hs_next = 0; \ - small->hs_devnum = md_cmpldev(big->hs_devnum); \ - small->hs_key = big->hs_key; \ - small->hs_start_blk = (daddr32_t)big->hs_start_blk; \ - small->hs_has_label = big->hs_has_label; \ - small->hs_number_blks = (daddr32_t)big->hs_number_blks; \ - small->hs_state = big->hs_state; \ - small->hs_refcount = big->hs_refcount; \ - small->hs_isopen = big->hs_isopen; \ - CMPLTV(small->hs_timestamp, big->hs_timestamp); - -/* hot_spare -> hot_spare32_od */ -#define MHS_SMALL2BIG(small, big) \ - big->hs_revision = small->hs_revision; \ - big->hs_record_id = small->hs_record_id; \ - big->hs_devnum = md_expldev(small->hs_devnum); \ - big->hs_key = small->hs_key; \ - big->hs_start_blk = (diskaddr_t)small->hs_start_blk; \ - big->hs_has_label = small->hs_has_label; \ - big->hs_number_blks = (diskaddr_t)small->hs_number_blks; \ - big->hs_state = small->hs_state; \ - big->hs_refcount = small->hs_refcount; \ - big->hs_isopen = small->hs_isopen; \ - CMPLTV(big->hs_timestamp, small->hs_timestamp); - -/* hot_spare_pool_ond -> hot_spare_pool_ond32 */ -#define MHSP_BIG2SMALL(big, small) { \ - int __i; \ - small->hsp_revision = big->hsp_revision; \ - small->hsp_self_id = big->hsp_self_id; \ - small->hsp_record_id = big->hsp_record_id; \ - small->hsp_refcount = big->hsp_refcount; \ - small->hsp_nhotspares = big->hsp_nhotspares; \ - for (__i = 0; __i < big->hsp_nhotspares; __i++) \ - small->hsp_hotspares[__i] = big->hsp_hotspares[__i]; \ -} - -/* hot_spare_pool_ond32 -> hot_spare_pool_ond */ -#define MHSP_SMALL2BIG(small, big) { \ - int __i; \ - big->hsp_revision = small->hsp_revision; \ - big->hsp_self_id = small->hsp_self_id; \ - big->hsp_record_id = small->hsp_record_id; \ - big->hsp_refcount = small->hsp_refcount; \ - big->hsp_nhotspares = small->hsp_nhotspares; \ - for (__i = 0; __i < small->hsp_nhotspares; __i++) \ - big->hsp_hotspares[__i] = small->hsp_hotspares[__i]; \ -} - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS__MD_CONVERT_H */ diff --git a/usr/src/uts/common/sys/lvm/md_crc.h b/usr/src/uts/common/sys/lvm/md_crc.h deleted file mode 100644 index 082ddd289c32..000000000000 --- a/usr/src/uts/common/sys/lvm/md_crc.h +++ /dev/null @@ -1,97 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SYS_MD_CRC_H -#define _SYS_MD_CRC_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/* md_crc.c */ -/* - * Structure to hold fields to be skipped when calculating the checksum - */ -typedef struct crc_skip { - struct crc_skip *skip_next; - int skip_offset; - int skip_size; -} crc_skip_t; - -extern uint_t crcfunc(uint_t check, - uchar_t *record, - uint_t *result, - size_t size, - crc_skip_t *skip); -extern void crcfreetab(void); - -/* - * The following crc defines allow for a number of areas to be skipped - * (not be included in the data being crc'd) in the record - * block (mddb_rb_32). These areas are the 12 byte area covering - * rb_checksum_fiddle, rb_private and rb_userdata - * - * In addition the skipped areas include the timestamps in the crc for - * MN disksets. - */ -#ifndef DEBUG -#define crcgen(record, result, size, skip) \ - (void) crcfunc(0, (uchar_t *)(record), (uint_t *)(result), \ - (size_t)(size), (crc_skip_t *)(skip)) - -#else /* DEBUG */ - -#ifdef _KERNEL -#define crcgen(record, result, size, skip) {\ - uint_t b = crcfunc(0, (uchar_t *)(record), (uint_t *)(result), \ - (size_t)(size), (crc_skip_t *)(skip)); \ - (void) crcfunc(0, (uchar_t *)(record), (uint_t *)(result), \ - (size_t)(size), (crc_skip_t *)(skip)); \ - ASSERT (*((uint_t *)(result)) == b); \ -} -#else /* !_KERNEL */ -#define crcgen(record, result, size, skip) {\ - uint_t b = crcfunc(0, (uchar_t *)(record), (uint_t *)(result), \ - (size_t)(size), (crc_skip_t *)(skip)); \ - (void) crcfunc(0, (uchar_t *)(record), (uint_t *)(result), \ - (size_t)(size), (crc_skip_t *)(skip)); \ - assert (*((uint_t *)(result)) == b); \ -} -#endif /* _KERNEL */ -#endif /* DEBUG */ - -#define crcchk(record, result, size, skip) crcfunc(1, (uchar_t *)(record), \ - (uint_t *)(result), (size_t)(size), (crc_skip_t *)(skip)) - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS_MD_CRC_H */ diff --git a/usr/src/uts/common/sys/lvm/md_hotspares.h b/usr/src/uts/common/sys/lvm/md_hotspares.h deleted file mode 100644 index 7e151c946e5d..000000000000 --- a/usr/src/uts/common/sys/lvm/md_hotspares.h +++ /dev/null @@ -1,194 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SYS_MD_HOTSPARES_H -#define _SYS_MD_HOTSPARES_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * ioctl parameter structures - */ - -typedef enum set_hs_command { - ADD_HOT_SPARE, DELETE_HOT_SPARE, REPLACE_HOT_SPARE, FIX_HOT_SPARE -} set_hs_command_t; - -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack(4) -#endif -typedef struct set_hs_params { - MD_DRIVER - md_error_t mde; /* error return */ - set_hs_command_t shs_cmd; /* ioctl command */ - hsp_t shs_hot_spare_pool; /* hsp identifier */ - md_dev64_t shs_component_old; /* dev for add, del, repl */ - md_dev64_t shs_component_new; /* new dev for repl */ - mdkey_t shs_key_old; /* key */ - mdkey_t shs_key_new; /* new key for repl */ - uint_t shs_options; /* see HS_OPT_* below */ - diskaddr_t shs_start_blk; /* used by add/repl */ - int shs_has_label; /* used by add/repl */ - diskaddr_t shs_number_blks; /* used by add/repl */ - int shs_size_option; /* big or small */ -} set_hs_params_t; - -#define HS_OPT_NONE 0x0000 /* Nothing special */ -#define HS_OPT_FORCE 0x0001 /* force flag */ -#define HS_OPT_POOL 0x0002 /* work on a hs pool */ -#define HS_OPT_DRYRUN 0x0004 /* just check if operation would be possible */ - -typedef struct get_hs_params { - MD_DRIVER - md_error_t mde; /* error return */ - mdkey_t ghs_key; /* hs name key */ - md_dev64_t ghs_devnum; /* returned hs dev_t */ - diskaddr_t ghs_start_blk; /* returned start blk */ - diskaddr_t ghs_number_blks; /* returned # of blks */ - hotspare_states_t ghs_state; /* returned state */ - md_timeval32_t ghs_timestamp; /* returned timestamp */ - uint_t ghs_revision; /* returned revision */ -} get_hs_params_t; - -typedef struct get_hsp { - hsp_t ghsp_id; /* hsp id */ - int ghsp_refcount; /* # metadevices using hsp */ - int ghsp_nhotspares; /* # of hs in hsp */ - mdkey_t ghsp_hs_keys[1]; /* array of keys */ -} get_hsp_t; - -#define MD_IOCSET_HS (MDIOC_MISC|0) -#define MD_IOCGET_HS (MDIOC_MISC|1) -#define HSP_REC 1 -#define HS_REC 2 - -/* - * Hot spare and hot spare pool data structures - * Note that hot_spare32_od is for old 32 bit format only - */ -typedef struct hot_spare32_od { - uint_t hs_revision; /* revision number */ - mddb_recid_t hs_record_id; /* db record id */ - caddr32_t xx_hs_next; /* hs list, link */ - dev32_t hs_devnum; /* hs device number */ - mdkey_t hs_key; /* namespace key */ - daddr32_t hs_start_blk; /* hs starting block */ - int hs_has_label; /* hs has a label */ - int hs_number_blks; /* hs # of blocks */ - hotspare_states_t hs_state; /* hs state */ - int hs_refcount; /* # hsp using the hs */ - int hs_isopen; /* is open flag */ - struct timeval32 hs_timestamp; /* time of last state change */ - /* - * Incore elements in this old format are not used by 64 bit kernel - * Comment out here for maintenance history - * struct hot_spare *hs_next; - */ -} hot_spare32_od_t; -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack() -#endif - -/* - * The pads are necessary for the hot_spare_t structure to be interpreted - * correctly in userland on the amd64 arch. - */ -typedef struct hot_spare { - uint_t hs_revision; /* revision number */ - mddb_recid_t hs_record_id; /* db record id */ - md_dev64_t hs_devnum; /* hs device number */ - mdkey_t hs_key; /* namespace key */ - int hs_pad1; - diskaddr_t hs_start_blk; /* hs starting block */ - int hs_has_label; /* hs has a label */ - int hs_pad2; - diskaddr_t hs_number_blks; /* hs # of blocks */ - hotspare_states_t hs_state; /* hs state */ - int hs_refcount; /* # hsp using the hs */ - int hs_isopen; /* is open flag */ - md_timeval32_t hs_timestamp; /* time of last state change */ - /* - * Incore elements. - * they should always be at the end of this data structure. - */ - struct hot_spare *hs_next; -} hot_spare_t; - -#define HS_ONDSK_STR_SIZE offsetof(hot_spare_t, hs_next) - - -/* - * Ondisk part of hot_spare_pool - */ -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack(4) -#endif -typedef struct hot_spare_pool_ond { - uint_t hsp_revision; - hsp_t hsp_self_id; - mddb_recid_t hsp_record_id; - uint32_t spare[4]; - int hsp_refcount; - int hsp_nhotspares; - mddb_recid_t hsp_hotspares[1]; -} hot_spare_pool_ond_t; -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack() -#endif - -typedef struct hot_spare_pool { - /* - * incore only elements - */ - struct hot_spare_pool *hsp_next; /* hsp list, link */ - md_link_t hsp_link; /* next hsp (for IOCGET_NEXT) */ - - /* - * ondisk and should be the same as hot_spare_pool_ond - */ - uint_t hsp_revision; /* revision number */ - hsp_t hsp_self_id; /* hsp identifier */ - mddb_recid_t hsp_record_id; /* db record id */ - uint32_t spare[4]; - int hsp_refcount; /* # metadevices using hsp */ - int hsp_nhotspares; /* # hs in the pool */ - mddb_recid_t hsp_hotspares[1]; /* array of recid's */ -} hot_spare_pool_t; - -#define HSP_ONDSK_STR_OFF ((off_t)(&((hot_spare_pool_t *)0)->hsp_revision)) - - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS_MD_HOTSPARES_H */ diff --git a/usr/src/uts/common/sys/lvm/md_mddb.h b/usr/src/uts/common/sys/lvm/md_mddb.h deleted file mode 100644 index 0668d8c023c6..000000000000 --- a/usr/src/uts/common/sys/lvm/md_mddb.h +++ /dev/null @@ -1,959 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SYS_MD_MDDB_H -#define _SYS_MD_MDDB_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -#if 0 /* DRP FOR DEBUGGING */ -#define MDDB_FAKE -#endif - -/* Private flags */ -#define MD_PRV_GOTIT 0x0001 /* Been snarfed */ -#define MD_PRV_DELETE 0x0002 /* Record pending to be deleted */ -#define MD_PRV_COMMIT 0x0004 /* Record pending to be commited */ -#define MD_PRV_CLEANUP 0x0008 /* Record pending to be cleaned up */ -#define MD_PRV_CONVD 0x0010 /* Record has been converted (32->64) */ -#define MD_PRV_PENDDEL (MD_PRV_GOTIT | MD_PRV_DELETE) -#define MD_PRV_PENDCOM (MD_PRV_GOTIT | MD_PRV_COMMIT) -#define MD_PRV_PENDCLEAN (MD_PRV_GOTIT | MD_PRV_CLEANUP) - - -#define MDDB_E_INVALID (-1) /* an invalid argument was passed */ -#define MDDB_E_EXISTS (-2) /* doing an operation a 2nd time which can */ - /* only be done once */ -#define MDDB_E_MASTER (-3) /* problem occurred accessing mastor block */ - /* returned from NEW_DEV */ -#define MDDB_E_TOOSMALL (-4) /* device is not large enough */ -#define MDDB_E_NORECORD (-5) /* record does not exits */ - /* - * returned from: mddb_getnextrec - * mddb_getrecsize - * mddb_commitrec - * mddb_commitrecs - * mddb_deleterec - */ -#define MDDB_E_NOSPACE (-6) /* no space to create record */ -#define MDDB_E_NOTNOW (-7) /* do not presently have enough resources */ - /* to perform requested operation */ -#define MDDB_E_NODB (-8) /* no database exist */ -#define MDDB_E_NOTOWNER (-9) /* have not been told to grab this set */ -#define MDDB_E_STALE (-10) /* database is stale */ -#define MDDB_E_TOOFEW (-11) /* not enough replicas available */ -#define MDDB_E_TAGDATA (-12) /* tagged data detected */ -#define MDDB_E_ACCOK (-13) /* 50/50 mode */ -#define MDDB_E_NTAGDATA (-14) /* tagop try, no tag data */ -#define MDDB_E_ACCNOTOK (-15) /* accop try, no accept possible */ -#define MDDB_E_NOLOCBLK (-16) /* No valid locators found */ -#define MDDB_E_NOLOCNMS (-17) /* No valid locator name information */ -#define MDDB_E_NODIRBLK (-18) /* No directory blocks found */ -#define MDDB_E_NOTAGREC (-19) /* No tag record blocks found */ -#define MDDB_E_NOTAG (-20) /* No matching tag record found */ -#define MDDB_E_NODEVID (-21) /* No device id found */ - -#define MDDB_MINBLKS 16 /* enough for a few metadevices */ -#define MDDB_MAXBLKS 8192 /* size of free bit map (must be / 8) */ -#define MDDB_MN_MINBLKS 32768 /* Multinode metadb minimum size */ - /* 16MB */ -#define MDDB_MN_MAXBLKS 524288 /* size of free bit map (must be / 8) */ - /* 256MB */ - -#define MDDB_C_STALE 0x0001 -#define MDDB_C_TOOFEW 0x0002 -#define MDDB_C_NOTOWNER 0x0004 -#define MDDB_C_SET_MN_STALE 0x0008 /* Set MN set to stale */ -#define MDDB_C_IMPORT 0x0010 - -/* - * Defines used to set/reset new master flag in set structure. - * Used during reconfig cycle to determine quickly if there is - * new master for the set. - */ -#define MDDB_NM_SET 0x0001 -#define MDDB_NM_RESET 0x0002 -#define MDDB_NM_GET 0x0004 - -/* Definitions of flag in Locator Block Device ID data area - mddb_did_info */ -#define MDDB_DID_EXISTS 0x0001 /* Device ID exists */ -#define MDDB_DID_VALID 0x0002 /* Device ID valid on current system */ -#define MDDB_DID_UPDATED 0x0004 /* locator/sidelocator info updated */ - -/* Definitions of flag in Locator Block - mddb_lb */ -#define MDDB_DEVID_STYLE 0x0001 /* Locator Block in Device ID format */ -#define MDDB_MNSET 0x0002 /* MDDB is for a multi-node set */ - - -#define MDDB_MAX_PATCH 25 /* number of locations that */ - /* can be patched in etc/system */ - -/* - * Set struct used by all parts of the driver, to store anchor pointers. - * - * Lock associated with field in this structure: - * - * Some of fields are accessible by both the single threaded ioctl thread - * and internal threads such as resync, hotsparing...etc. In this case - * additional protection is needed. For example, s_db is protected by - * s_dbmx additionally and s_un, s_ui are protected by md_unit_array_rw.lock - * s_nm, s_nmid, s_did_nm and s_did_nmid and s_dtp are protected by nm_lock - * Rest of other fileds are protected by md_mx. Two fields s_un_next and - * s_un_avail are introduced by the friendly name project and are ONLY - * accessible via a single threaded ioctl thread which already is protected - * by the ioctl lock and there is no need to add extra protection to them. - * However, in the future if they become accessible by other internal threads - * then an additional protection such as md_mx lock is highly recommended. - * - */ -typedef struct md_set { - uint_t s_status; /* set status */ - void **s_ui; /* set unit incore anchor */ - void **s_un; /* set unit anchor */ - void *s_hsp; /* set Hot Spare Pool anchor */ - void *s_hs; /* set Hot Spare anchor */ - void *s_db; /* set MDDB anchor */ - kmutex_t s_dbmx; /* set MDDB mutex */ - void *s_nm; /* set namespace anchor */ - mddb_recid_t s_nmid; /* set namespace anchor record */ - void *s_did_nm; /* set device id namespace anchor */ - mddb_recid_t s_did_nmid; /* set device id namespace anchor rec */ - void *s_dtp; /* set data tag rec */ - int s_am_i_master; /* incore master flag for this node */ - md_mn_nodeid_t s_nodeid; /* nodeid of this node - for MN sets */ - uint_t s_rcnt; /* incore resync count for set */ - unit_t s_un_next; /* s_un scan starts here */ - unit_t s_un_avail; /* number of avail slots */ -} md_set_t; - - -#define MDDB_MAGIC_MB 0x6d646d62 /* magic number for master blocks */ -#define MDDB_MAGIC_DB 0x6d646462 /* magic number for directory blocks */ -#define MDDB_MAGIC_RB 0x6d647262 /* magic number for record blocks */ -#define MDDB_MAGIC_LB 0x6d646c62 /* magic number for locator blocks */ -#define MDDB_MAGIC_LN 0x6d646c6e /* magic number for locator names */ -#define MDDB_MAGIC_DT 0x6d646474 /* magic number for data tag */ -#define MDDB_MAGIC_DI 0x6d646469 /* magic number for device ID block */ -#define MDDB_MAGIC_DU 0x6d646475 /* magic num for dummy mb */ -#define MDDB_MAGIC_DE 0x6d646465 /* magic num for mb devid */ - -#define MDDB_GLOBAL_XOR 1234567890 - -#define MDDB_REV_MAJOR (uint_t)0xff00 -#define MDDB_REV_MINOR (uint_t)0x00ff - -/* - * MDDB_REV_MNMB: - * If a MN diskset, master block revision is set to MDDB_REV_MNMB. - * Even though the master block structure is no different - * for a MN set, setting the revision field to a different - * number keeps any pre-MN_diskset code from accessing - * this diskset. It also allows for an early determination - * of a MN diskset when reading in from disk so that the - * proper size locator block and locator names structure - * can be read in thus saving time on diskset startup. - * Since no change in master block structure, the MDDB_REV_MINOR - * portion of the revision was incremented. - * - * MDDB_REV_MNLB: - * If a MN diskset, the locator block structure is a different size in - * order to accomodate up to MD_MNMAXSIDES nodes in a diskset - * with any nodeid (sideno) allowed. - * The revision is set to MDDB_REV_MNLB which is a change of the - * MDDB_REV_MAJOR portion of the revision. - * - * MDDB_REV_MNLN: - * If a MN diskset, the locator names is a different size in - * order to accomodate up to MD_MNMAXSIDES nodes in a diskset - * with any nodeid (sideno) allowed. - * The revision is set to MDDB_REV_MNLN which is a change of the - * MDDB_REV_MAJOR portion of the revision. - * - * The record blocks have two binary properties. A record block can - * represent either a 32 or 64 bit unit. A record block can also represent - * a traditionally named unit or a friendly named unit. Thus, there are - * minor revisions of record block. - * - * Traditional Friendly - * Name Name - * ----------- -------- - * 32 bit MDDB_REV_RB MDDB_REV_RBFN - * 64 bit MDDB_REV_RB64 MDDB_REV_RB64FN - */ - -#define MDDB_REV_MB (uint_t)0x0201 -#define MDDB_REV_MNMB (uint_t)0x0202 -#define MDDB_REV_DB (uint_t)0x0201 -#define MDDB_REV_LB (uint_t)0x0500 -#define MDDB_REV_MNLB (uint_t)0x0600 -#define MDDB_REV_LN (uint_t)0x0100 -#define MDDB_REV_MNLN (uint_t)0x0300 -#define MDDB_REV_RB (uint_t)0x0200 -#define MDDB_REV_RB64 (uint_t)0x0201 -#define MDDB_REV_RBFN (uint_t)0x0202 -#define MDDB_REV_RB64FN (uint_t)0x0203 -#define MDDB_REV_DT (uint_t)0x0100 -#define MDDB_REV_DI (uint_t)0x0100 - -/* - * Transfer record block friendly name status to unit/hs structure. - */ -#define MDDB_NOTE_FN(rbv, unv) switch (rbv) { \ - case MDDB_REV_RB: \ - case MDDB_REV_RB64: \ - unv &= ~MD_FN_META_DEV; \ - break; \ - case MDDB_REV_RBFN: \ - case MDDB_REV_RB64FN: \ - unv |= MD_FN_META_DEV; \ - break; \ - } - -#define MDDB_BSIZE (uint_t)DEV_BSIZE -#define MDDB_PREFIXCNT 10 -#define MDDB_DRVNMCNT 10 - -typedef int mddb_block_t; - -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack(4) -#endif -typedef struct md_mnname_suffix { - md_name_suffix mn_ln_suffix; - uint_t mn_ln_sideno; -} md_mnname_suffix_t; - -typedef struct mddb_ln { - int ln_magic; - uint_t ln_revision; - uint_t ln_checksum; - struct timeval32 ln_timestamp; - md_name_prefix ln_prefixes[MDDB_PREFIXCNT]; - /* Don't change array sizes without changing RNDUP_BLKCNT */ - md_name_suffix ln_suffixes[MD_MAXSIDES][MDDB_NLB]; -} mddb_ln_t; - -/* - * Locator name structure for MN diskset. Same as for traditional - * and local diskset except that more sides are supported and the - * side number can be any number since the side number is stored - * in the ln_mnsuffixes structure instead of being used as an index - * into that array. This means that the whole array may need to be - * searched in order to find the correct information given a side number. - */ -typedef struct mddb_mnln { - int ln_magic; - uint_t ln_revision; - uint_t ln_checksum; - struct timeval32 ln_timestamp; - md_name_prefix ln_prefixes[MDDB_PREFIXCNT]; - /* Don't change array sizes without changing MDDB_MNLNCNT */ - md_mnname_suffix_t ln_mnsuffixes[MD_MNMAXSIDES][MDDB_NLB]; -} mddb_mnln_t; - -#define RNDUP_BLKCNT(sz, delta) (((sz) - \ - ((delta) * \ - ((MD_MAXSIDES - 1) * MDDB_NLB)) + \ - MDDB_BSIZE - 1) / MDDB_BSIZE) -#define MDDB_LNCNT RNDUP_BLKCNT(sizeof (mddb_ln_t), 0) -#define MDDB_LOCAL_LNCNT RNDUP_BLKCNT(sizeof (mddb_ln_t), \ - sizeof (md_name_suffix)) - -#define MDDB_MNLNCNT ((sizeof (mddb_mnln_t) + (MDDB_BSIZE - 1)) \ - / MDDB_BSIZE) - -typedef struct mddb_dt { - uint_t dt_mag; - uint_t dt_rev; - uint_t dt_cks; - mddb_dtag_t dt_dtag; -} mddb_dt_t; - -#define MDDB_DT_BYTES (roundup(sizeof (mddb_dt_t), MDDB_BSIZE)) -#define MDDB_DT_BLOCKS (btodb(MDDB_DT_BYTES)) - -typedef union identifier { - char serial[MDDB_SN_LEN]; - struct timeval32 createtime; -} identifier_t; - -typedef struct mddb_locator { - dev32_t l_dev; - daddr32_t l_blkno; - int l_flags; -} mddb_locator_t; - -typedef struct mddb_sidelocator { - uchar_t l_drvnm_index; - minor_t l_mnum; -} mddb_sidelocator_t; - -typedef struct mddb_mnsidelocator { - uchar_t mnl_drvnm_index; - minor_t mnl_mnum; - uint_t mnl_sideno; -} mddb_mnsidelocator_t; - -typedef struct mddb_drvnm { - uchar_t dn_len; - char dn_data[MD_MAXDRVNM]; -} mddb_drvnm_t; - -/* - * Locator Block Device ID Information - * Several device id's may share one disk block in an effort to - * conserve used replica space. - */ -typedef struct mddb_did_info { - uint_t info_flags; /* MDDB Device ID flags */ - uint_t info_firstblk; /* Device ID Start Block */ - uint_t info_blkcnt; /* Device ID Block Count */ - uint_t info_offset; /* Device ID offset w/i Block */ - uint_t info_length; /* Device ID Length */ - uint_t info_checksum; /* Device ID Checksum */ - char info_minor_name[32]; /* Minor name of lb dev */ -} mddb_did_info_t; - -typedef struct mddb_did_blk { - int blk_magic; /* used for verification */ - uint_t blk_revision; /* used for verification */ - int blk_checksum; /* used for verification */ - uint_t blk_commitcnt; /* matches LB's commitcnt */ - mddb_did_info_t blk_info[MDDB_NLB]; -} mddb_did_blk_t; -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack() -#endif - -#define MDDB_DID_BYTES (roundup(sizeof (mddb_did_blk_t), MDDB_BSIZE)) -#define MDDB_DID_BLOCKS (btodb(MDDB_DID_BYTES)) - -/* - * Device ID Disk Blocks. - * Incore linked list of disk blocks containing device IDs. - * The list is built when reading in the mddb_did_blk structure and - * when reading in the actual disk blocks containing device ids. - * This list is used to easily write out all disk blocks containing - * device ids. - */ -typedef struct mddb_did_db { - uint_t db_firstblk; /* Disk Block's logical addr */ - uint_t db_blkcnt; /* Contig Disk Block Count */ - caddr_t db_ptr; /* Ptr to incore Block(s) */ - struct mddb_did_db *db_next; /* Ptr to next in list */ -} mddb_did_db_t; - -/* - * Device ID Free List. - * Incore linked list of free space in disk blocks containing device IDs. - * Used to manage placement of device IDs in disk blocks. - * All disk blocks on free list are also in linked list of disk block - * containing device IDs (mddb_did_db_t). - */ -typedef struct mddb_did_free { - uint_t free_blk; /* Disk Block's logical addr */ - uint_t free_offset; /* offset of free space */ - uint_t free_length; /* length of free space */ - struct mddb_did_free *free_next; /* Ptr to next in list */ -} mddb_did_free_t; - -/* - * Device ID Incore Area - * Contains pointer to Device ID Disk Block list and - * Device ID Free List. - * Also contains incore array of pointers to device IDs. Pointers - * point into the device ID Disk Block list and are used as a - * shortcut to find incore device IDs. - */ -typedef struct mddb_did_ic { - mddb_did_blk_t *did_ic_blkp; - mddb_did_db_t *did_ic_dbp; - mddb_did_free_t *did_ic_freep; - ddi_devid_t did_ic_devid[MDDB_NLB]; /* Ptr to device IDs */ -} mddb_did_ic_t; - -/* - * Locator Block (LB): - * - Are fixed size, but the size is different - * for local/shared set db replicas. - * - All LB's start at logical block 0. - * - After a replica quorum is found, there is - * is only one incore copy of the LB. - * - LB's are only written when replicas are added, deleted, or errored. - * - LB's provide information about other replica's and their state. - */ -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack(4) -#endif -typedef struct mddb_lb { - int lb_magic; /* used for verification */ - uint_t lb_revision; /* used for verification */ - int lb_checksum; /* used for verification */ - uint_t lb_commitcnt; /* IMPORTANT */ - struct timeval32 lb_timestamp; /* informative only */ - int lb_loccnt; /* used for verification */ - identifier_t lb_ident; /* used for verification */ - uint_t lb_flags; /* flags describing LB */ - uint_t lb_spare[8]; /* Spare/Pad */ - mddb_block_t lb_didfirstblk; /* Devid Array Start Block */ - mddb_block_t lb_didblkcnt; /* Devid Array Number Blocks */ - mddb_block_t lb_dtfirstblk; /* Data Tag Start Block */ - mddb_block_t lb_dtblkcnt; /* Data Tag Number Block(s) */ - struct timeval32 lb_inittime; /* creation of database */ - set_t lb_setno; /* used for verification */ - mddb_block_t lb_blkcnt; /* used for verification */ - mddb_block_t lb_lnfirstblk; - mddb_block_t lb_lnblkcnt; - mddb_block_t lb_dbfirstblk; - mddb_drvnm_t lb_drvnm[MDDB_DRVNMCNT]; - mddb_locator_t lb_locators[MDDB_NLB]; - /* Don't change array sizes without changing RNDUP_BLKCNT */ - mddb_sidelocator_t lb_sidelocators[MD_MAXSIDES][MDDB_NLB]; -} mddb_lb_t; -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack() -#endif - -/* - * Locator block structure for MN diskset. Same as for traditional - * and local diskset except that more sides are supported and the - * side number can be any number since the side number is stored - * in the lb_mnsidelocators structure instead of being used as an index - * into that array. This means that the whole array may need to be - * searched in order to find the correct information given a side number. - */ -typedef struct mddb_mnlb { - int lb_magic; /* used for verification */ - uint_t lb_revision; /* used for verification */ - int lb_checksum; /* used for verification */ - uint_t lb_commitcnt; /* IMPORTANT */ - struct timeval32 lb_timestamp; /* informative only */ - int lb_loccnt; /* used for verification */ - identifier_t lb_ident; /* used for verification */ - uint_t lb_flags; /* flags describing LB */ - uint_t lb_spare[8]; /* Spare/Pad */ - mddb_block_t lb_didfirstblk; /* Devid Array Start Block */ - mddb_block_t lb_didblkcnt; /* Devid Array Number Blocks */ - mddb_block_t lb_dtfirstblk; /* Data Tag Start Block */ - mddb_block_t lb_dtblkcnt; /* Data Tag Number Block(s) */ - struct timeval32 lb_inittime; /* creation of database */ - set_t lb_setno; /* used for verification */ - mddb_block_t lb_blkcnt; /* used for verification */ - mddb_block_t lb_lnfirstblk; - mddb_block_t lb_lnblkcnt; - mddb_block_t lb_dbfirstblk; - mddb_drvnm_t lb_drvnm[MDDB_DRVNMCNT]; - mddb_locator_t lb_locators[MDDB_NLB]; - /* Don't change array sizes without changing MDDB_MNLBCNT */ - mddb_mnsidelocator_t lb_mnsidelocators[MD_MNMAXSIDES][MDDB_NLB]; -} mddb_mnlb_t; - - -#define MDDB_LBCNT RNDUP_BLKCNT(sizeof (mddb_lb_t), 0) -#define MDDB_LOCAL_LBCNT RNDUP_BLKCNT(sizeof (mddb_lb_t), \ - sizeof (mddb_sidelocator_t)) - -#define MDDB_MNLBCNT ((sizeof (mddb_mnlb_t) + (MDDB_BSIZE - 1)) \ - / MDDB_BSIZE) - -typedef struct mddb_map { - daddr32_t m_consecutive; - daddr32_t m_firstblk; -} mddb_map_t; - -/* - * Master block(s) (MB) - * - Are written by userland; Never by the driver! - * - Each replica has there own master blocks, - * the master block(s) are not shared. - * - MB's are not in the logical block address space of the database. - * - MB's are a fixed size record (MDDB_BSIZE) - * - MB's provide the logical to physical block translation, - * for their replica. - */ -typedef struct mddb_mb { - int mb_magic; /* used for verification */ - uint_t mb_revision; /* used for verification */ - uint_t mb_checksum; /* used for verification */ -#ifdef _LP64 - uint32_t mb_next; /* incore to next mb */ -#else - struct mddb_mb *mb_next; /* incore to next mb */ -#endif /* _LP64 */ - daddr32_t mb_nextblk; /* block # for next mb */ - md_timeval32_t mb_timestamp; /* timestamp */ - daddr32_t mb_blkcnt; /* size of blkmap */ - daddr32_t mb_blkno; /* physical loc. for this MB */ - set_t mb_setno; /* used for verification */ - struct timeval32 mb_setcreatetime; /* set creation timestamp */ - int spares[7]; - mddb_map_t mb_blkmap; /* logical->physical blk map */ - int mb_devid_magic; /* verify devid in mb */ - short mb_devid_len; /* len of following devid */ - char mb_devid[1]; /* devid byte array */ -} mddb_mb_t; - -/* - * In-core version of mddb_mb. It is known that the mddb_mb is 512 bytes on - * disk, really, and so this structure is 512 + sizeof(struct mddb_mb_ic *) - */ -#define MDDB_IC_BSIZE (MDDB_BSIZE + sizeof (struct mddb_mb_ic *)) -typedef struct mddb_mb_ic { - struct mddb_mb_ic *mbi_next; - struct mddb_mb mbi_mddb_mb; -} mddb_mb_ic_t; - - -/* - * there can be no address in record block. The checksum must - * stay the same where ever the record is in memory. Many - * things depend on this. Also the timestamp is the time the the - * record was committed not the time it was written to a particular - * device. - * - * Old definition of mddb_rb, for 32-bit apps and libraries - */ -typedef struct mddb_rb { - uint_t rb_magic; - uint_t rb_revision; - uint_t rb_checksum; - uint_t rb_checksum_fiddle; - uint_t rb_private; - void *rb_userdata; - uint_t rb_commitcnt; - uint_t rb_spare[1]; - struct timeval32 rb_timestamp; - int rb_data[1]; -} mddb_rb_t; - -/* This is, and always will be, the on-disk version of mddb_rb */ -typedef struct mddb_rb32 { - uint_t rb_magic; - uint_t rb_revision; - uint_t rb_checksum; - uint_t rb_checksum_fiddle; - uint_t rb_private; - uint32_t rb_userdata; - uint_t rb_commitcnt; - uint_t rb_spare[1]; - struct timeval32 rb_timestamp; - int rb_data[1]; -} mddb_rb32_t; - -/* - * directory entries - */ -typedef struct mddb_optinfo { - int o_li; - int o_flags; -} mddb_optinfo_t; - -/* Old definition of mddb_de, for 32-bit apps and libraries */ -typedef struct mddb_de { - struct mddb_de *de_next; - mddb_rb_t *de_rb; - mddb_recid_t de_recid; - mddb_type_t de_type1; - uint_t de_type2; - uint_t de_reqsize; - uint_t de_recsize; - mddb_block_t de_blkcount; - uint_t de_flags; - mddb_optinfo_t de_optinfo[2]; - mddb_block_t de_blks[1]; -} mddb_de_t; - -/* - * In core version of mddb_de, includes pointer for mddb_rb32_t user data - * mddb_rb32_t is used incore - */ -typedef struct mddb_de_ic { - void *de_rb_userdata; - void *de_rb_userdata_ic; - uint_t de_owner_nodeid; - struct mddb_de_ic *de_next; - mddb_rb32_t *de_rb; - mddb_recid_t de_recid; - mddb_type_t de_type1; - uint_t de_type2; - size_t de_reqsize; - size_t de_icreqsize; - size_t de_recsize; - uint_t de_blkcount; - uint_t de_flags; - mddb_optinfo_t de_optinfo[2]; - mddb_block_t de_blks[1]; -} mddb_de_ic_t; - -typedef struct mddb_db { - uint_t db_magic; - uint_t db_revision; - uint_t db_checksum; - mddb_block_t db_blknum; - struct mddb_db *db_next; - mddb_block_t db_nextblk; - struct timeval32 db_timestamp; - uint_t db_recsum; -#ifdef _KERNEL - mddb_de_ic_t *db_firstentry; -#else - mddb_de_t *db_firstentry; -#endif -} mddb_db_t; - -/* - * This is, and always will be, the on-disk version of mddb_de - * When mddb_de32 is read in it is converted into mddb_de_ic - */ -typedef struct mddb_de32 { - uint32_t de32_next; - uint32_t de32_rb; - mddb_recid_t de32_recid; - mddb_type_t de32_type1; - uint_t de32_type2; - uint_t de32_reqsize; - uint_t de32_recsize; - mddb_block_t de32_blkcount; - uint_t de32_flags; - mddb_optinfo_t de32_optinfo[2]; - mddb_block_t de32_blks[1]; -} mddb_de32_t; - -/* - * This is, and always will be, the on-disk version of mddb_db - * When mddb_db32 is read in it is converted into mddb_db - * To minimize impact on mddb format mddb_db fileds remain intact - */ -typedef struct mddb_db32 { - uint_t db32_magic; - uint_t db32_revision; - uint_t db32_checksum; - mddb_block_t db32_blknum; - uint32_t db32_next; - mddb_block_t db32_nextblk; - struct timeval32 db32_timestamp; - uint_t db32_recsum; - uint32_t db32_firstentry; -} mddb_db32_t; - -#define de32tode(from, to) \ - { \ - int i; \ - to->de_rb_userdata = NULL; \ - to->de_owner_nodeid = MD_MN_INVALID_NID; \ - to->de_next = (struct mddb_de_ic *)(uintptr_t)from->de32_next; \ - to->de_rb = (mddb_rb32_t *)(uintptr_t)from->de32_rb; \ - to->de_recid = from->de32_recid; \ - to->de_type1 = from->de32_type1; \ - to->de_type2 = from->de32_type2; \ - to->de_reqsize = from->de32_reqsize; \ - to->de_recsize = from->de32_recsize; \ - to->de_blkcount = from->de32_blkcount; \ - to->de_flags = from->de32_flags; \ - to->de_optinfo[0] = from->de32_optinfo[0]; \ - to->de_optinfo[1] = from->de32_optinfo[1]; \ - for (i = 0; i < from->de32_blkcount; i++) \ - to->de_blks[i] = from->de32_blks[i]; \ - } - -#define detode32(from, to) \ - { \ - int i; \ - to->de32_next = (uint32_t)(uintptr_t)from->de_next; \ - to->de32_rb = (uint32_t)(uintptr_t)from->de_rb; \ - to->de32_recid = from->de_recid; \ - to->de32_type1 = from->de_type1; \ - to->de32_type2 = from->de_type2; \ - to->de32_reqsize = from->de_reqsize; \ - to->de32_recsize = from->de_recsize; \ - to->de32_blkcount = from->de_blkcount; \ - to->de32_flags = from->de_flags; \ - to->de32_optinfo[0] = from->de_optinfo[0]; \ - to->de32_optinfo[1] = from->de_optinfo[1]; \ - for (i = 0; i < from->de_blkcount; i++) \ - to->de32_blks[i] = from->de_blks[i]; \ - } - -#define db32todb(from, to) \ - to->db_magic = from->db32_magic; \ - to->db_revision = from->db32_revision; \ - to->db_checksum = from->db32_checksum; \ - to->db_blknum = from->db32_blknum; \ - to->db_next = (struct mddb_db *)(uintptr_t)from->db32_next; \ - to->db_nextblk = from->db32_nextblk; \ - to->db_timestamp = from->db32_timestamp; \ - to->db_recsum = from->db32_recsum; \ - to->db_firstentry = (mddb_de_ic_t *)(uintptr_t)from->db32_firstentry; - -#define dbtodb32(from, to) \ - to->db32_magic = from->db_magic; \ - to->db32_revision = from->db_revision; \ - to->db32_checksum = from->db_checksum; \ - to->db32_blknum = from->db_blknum; \ - to->db32_next = (uint32_t)(uintptr_t)from->db_next; \ - to->db32_nextblk = from->db_nextblk; \ - to->db32_timestamp = from->db_timestamp; \ - to->db32_recsum = from->db_recsum; \ - to->db32_firstentry = (uint32_t)(uintptr_t)from->db_firstentry; - -/* - * information about a replica of the data base - */ -typedef struct mddb_ri { - struct mddb_ri *ri_next; - uint_t ri_flags; - uint_t ri_commitcnt; - int ri_transplant; - md_dev64_t ri_dev; - daddr32_t ri_blkno; - char ri_driver[16]; - mddb_mb_ic_t *ri_mbip; - mddb_lb_t *ri_lbp; - mddb_dt_t *ri_dtp; - mddb_did_ic_t *ri_did_icp; - ddi_devid_t ri_devid; - ddi_devid_t ri_old_devid; - char ri_minor_name[MDDB_MINOR_NAME_MAX]; - char ri_devname[MAXPATHLEN]; -} mddb_ri_t; - -typedef struct mddb_bf { - struct mddb_bf *bf_next; - mddb_locator_t *bf_locator; - buf_t bf_buf; -} mddb_bf_t; - -/* - * Information for sets of databases (which include replicas) - */ -#define MDDB_BITSRECID 31 -#define MDDB_SETSHIFT (MDDB_BITSRECID - MD_BITSSET) -#define MDDB_SETMASK (MD_SETMASK << MDDB_SETSHIFT) -#define MDDB_RECIDMASK ((1 << MDDB_SETSHIFT) - 1) - -#define DBSET(id) (((id) & MDDB_SETMASK) >> MDDB_SETSHIFT) -#define DBID(id) ((id) & MDDB_RECIDMASK) -#define MAKERECID(s, i) ((((s) << MDDB_SETSHIFT) & MDDB_SETMASK) | \ - ((i) & MDDB_RECIDMASK)) - -#define MDDB_PARSE_LOCBLK 0x00000001 -#define MDDB_PARSE_LOCNM 0x00000002 -#define MDDB_PARSE_OPTRECS 0x00000004 -#define MDDB_PARSE_MASK 0x0000000F - - -#define MDDB_BLOCK_PARSE 0x00000001 /* Block sending parse msgs */ -#define MDDB_UNBLOCK_PARSE 0x00000002 /* Unblock sending parse msgs */ - -/* - * We need to keep s_ident and s_inittime 32 bit. They are used in mddb_lb - */ -typedef struct mddb_set { - uint_t s_setno; /* set number */ - uint_t s_sideno; /* side number */ - identifier_t s_ident; /* set identifier */ - char *s_setname; /* set name */ - mddb_mb_ic_t **s_mbiarray; /* master blocks array */ - mddb_db_t *s_dbp; /* directory block */ - mddb_lb_t *s_lbp; /* locator block */ - /* May be cast to mddb_mnlb_t */ - /* if accessing sidenames in */ - /* MN diskset */ - mddb_ln_t *s_lnp; /* locator names block */ - /* May be cast to mddb_mnln_t */ - /* if accessing sidenames in */ - /* MN diskset */ - mddb_dtag_lst_t *s_dtlp; /* List of data tags found */ - mddb_did_ic_t *s_did_icp; /* Device ID incore area */ - mddb_ri_t *s_rip; /* replicas incore list */ - int s_freeblkcnt; /* visable for test code */ - int s_totalblkcnt; /* visable for test code */ - int s_mn_parseflags; /* mddb parse flags for MNset */ - int s_mn_parseflags_sending; /* parse flgs sent to slaves */ - uchar_t *s_freebitmap; /* free blocks bitmap */ - uint_t s_freebitmapsize; /* size of bitmap */ - struct timeval32 s_inittime; /* timestamp set created */ - mddb_recid_t s_zombie; /* zombie record - createrec */ - int s_staledeletes; /* number of stale deleterec */ - int s_optcmtcnt; /* Following are opt. record */ - int s_opthavelck; /* bookkeeping records ... */ - int s_optwantlck; - kcondvar_t s_optwantlck_cv; - int s_optwaiterr; - int s_opthungerr; - kcondvar_t s_opthungerr_cv; - int s_opthavequeuinglck; - int s_optwantqueuinglck; - kcondvar_t s_optqueuing_cv; - ulong_t s_bufmisses; - mddb_bf_t *s_freebufhead; - int s_bufwakeup; - kcondvar_t s_buf_cv; - size_t s_databuffer_size; - void *s_databuffer; - int s_singlelockgotten; - int s_singlelockwanted; - kcondvar_t s_single_thread_cv; - md_hi_arr_t s_med; -} mddb_set_t; - -#ifndef MDDB_FAKE -#ifdef _KERNEL -/* md_mddb.c */ -extern uint_t mddb_lb_did_convert(mddb_set_t *, - uint_t, uint_t *); -extern void mddb_locatorblock2splitname(mddb_ln_t *, - int, side_t, md_splitname *); -extern int mddb_configure(mddb_cfgcmd_t, - struct mddb_config *); -extern mddb_recid_t mddb_getnextrec(mddb_recid_t, - mddb_type_t, uint_t); -extern int mddb_getoptloc(mddb_optloc_t *); -extern void *mddb_getrecaddr(mddb_recid_t); -extern void *mddb_getrecaddr_resize(mddb_recid_t, size_t, - off_t); -extern int mddb_getrecprivate(mddb_recid_t); -extern void mddb_setrecprivate(mddb_recid_t, uint_t); -extern mddb_de_ic_t *mddb_getrecdep(mddb_recid_t); -extern mddb_type_t mddb_getrectype1(mddb_recid_t); -extern int mddb_getrectype2(mddb_recid_t); -extern int mddb_getrecsize(mddb_recid_t); -extern int mddb_commitrec(mddb_recid_t); -extern int mddb_commitrecs(mddb_recid_t *); -extern int mddb_deleterec(mddb_recid_t); -extern mddb_recstatus_t mddb_getrecstatus(mddb_recid_t); -extern mddb_recid_t mddb_createrec(size_t usersize, - mddb_type_t type, uint_t type2, - md_create_rec_option_t option, set_t setno); -extern void mddb_init(void); -extern void mddb_unload(void); -extern void mddb_unload_set(set_t setno); -extern mddb_recid_t mddb_makerecid(set_t setno, mddb_recid_t id); -extern set_t mddb_getsetnum(mddb_recid_t id); -extern char *mddb_getsetname(set_t setno); -extern side_t mddb_getsidenum(set_t setno); -extern int mddb_ownset(set_t setno); -extern int getmed_ioctl(mddb_med_parm_t *medpp, int mode); -extern int setmed_ioctl(mddb_med_parm_t *medpp, int mode); -extern int updmed_ioctl(mddb_med_upd_parm_t *medpp, - int mode); -extern int take_set(mddb_config_t *cp, int mode); -extern int release_set(mddb_config_t *cp, int mode); -extern int gettag_ioctl(mddb_dtag_get_parm_t *dtgpp, - int mode); -extern int usetag_ioctl(mddb_dtag_use_parm_t *dtupp, - int mode); -extern int accept_ioctl(mddb_accept_parm_t *medpp, - int mode); -extern int md_update_locator_namespace(set_t setno, - side_t side, char *dname, char *pname, - md_dev64_t devt); -extern int mddb_validate_lb(set_t setno, int *rmaxsz); -extern int mddb_getinvlb_devid(set_t setno, int count, - int size, char **ctdptr); -extern int md_update_minor(set_t, side_t, mdkey_t); -extern int md_update_nm_rr_did_ioctl(mddb_config_t *cp); -extern int md_update_top_device_minor(set_t, side_t, - md_dev64_t); -#ifdef DEBUG -extern void mddb_check(void); -#endif /* DEBUG */ -#endif /* _KERNEL */ - -#else - -caddr_t mddb_fakeit; - -#define md_lb_did_convert(a, b, c) (0) -#define mddb_configure(a, b) (0) -#define mddb_getnextrec(a, b, c) ((mddb_recid_t)0) -#define mddb_getrecaddr(a) (mddb_fakeit) -#define mddb_getrecprivate(a) (0) -#define mddb_setrecprivate(a, b) (0) -#define mddb_getrectype1(a) (0) -#define mddb_getrectype2(a) (0) -#define mddb_getrecsize(a) (0) -#define mddb_commitrec(a) (0) -#define mddb_commitrecs(a) (0) -#define mddb_deleterec(a) (0) -#define mddb_getrecstatus(a) (MDDB_OK) -#define mddb_createrec(s, a, b) (0xffff & (int)(mddb_fakeit = \ - (caddr_t)kmem_zalloc(s, KM_SLEEP))) -#define mddb_unload() (0) - -#endif - -#define MDDB_NOSLEEP 1 -#define MDDB_SLEEPOK 0 - -#define MDDB_NOOLDOK 0x1 -#define MDDB_MUSTEXIST 0x2 -#define MDDB_NOINIT 0x4 -#define MDDB_MULTINODE 0x8 -#define MDDB_MN_STALE 0x10 /* MN set is stale */ - -/* Flags passed to selectreplicas - not a bit mask */ -#define MDDB_SCANALL 1 -#define MDDB_RETRYSCAN 0 -#define MDDB_SCANALLSYNC 2 /* During reconfig, sync up incore */ - /* and ondisk mddb by writing incore */ - /* values to disk. Don't write */ - /* change log records. */ - -/* Flags passed to writestart and writecopy */ -#define MDDB_WRITECOPY_ALL 1 /* Write all incore mddb to disk */ -#define MDDB_WRITECOPY_SYNC 2 /* Write incore mddb to disk except */ - /* - change log records */ - /* - optimized resync records */ - - -#define MDDB_PROBE 1 -#define MDDB_NOPROBE 0 - - -/* - * MN diskset definitions used to determine if a slave can write - * directly to the mddb. ONLY_MASTER only allows the master node - * to write to the mddb. ANY_NODE allows any node to write - * to the mddb. - */ -#define MDDB_WR_ONLY_MASTER 0 -#define MDDB_WR_ANY_NODE 1 - -#define MDDB_L_LOCKED 0x0001 /* this record is locked */ -#define MDDB_L_WANTED 0x0002 - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS_MD_MDDB_H */ diff --git a/usr/src/uts/common/sys/lvm/md_mirror.h b/usr/src/uts/common/sys/lvm/md_mirror.h deleted file mode 100644 index fc6bca9b07f2..000000000000 --- a/usr/src/uts/common/sys/lvm/md_mirror.h +++ /dev/null @@ -1,628 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SYS_MD_MIRROR_H -#define _SYS_MD_MIRROR_H - -#include -#include -#include -#include -#ifdef _KERNEL -#include -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * following bits are used in status word in the common section - * of unit structure - */ -#define SMS_IS(sm, state) (((sm)->sm_state & (state)) != 0) -#define SMS_BY_INDEX_IS(un, index, state) \ - (((un)->un_sm[(index)].sm_state & (state)) != 0) - -#define SMS_BY_INDEX_IS_TARGET(un, index) \ - ((un)->un_sm[(index)].sm_flags & MD_SM_RESYNC_TARGET) - -#define SUBMIRROR_IS_READABLE(un, isubmirror) \ - ((((un)->un_sm[(isubmirror)].sm_state & SMS_IGNORE) == 0) && \ - ((un)->un_sm[(isubmirror)].sm_state & \ - (SMS_RUNNING | SMS_COMP_ERRED | SMS_COMP_RESYNC))) - -#define SUBMIRROR_IS_WRITEABLE(un, isubmirror) \ - ((un)->un_sm[(isubmirror)].sm_state & \ - (SMS_RUNNING | SMS_COMP_ERRED | SMS_COMP_RESYNC | \ - SMS_ATTACHED_RESYNC | SMS_OFFLINE_RESYNC)) - -/* - * Default resync block size for MN resync messages - */ -#define MD_DEF_RESYNC_BLK_SZ 8192 - -/* - * macro to test if the current block is within the current resync region - */ -#define IN_RESYNC_REGION(un, ps) \ - ((un->un_rs_prev_overlap != NULL) && (ps->ps_firstblk >= \ - un->un_rs_prev_overlap->ps_firstblk) && \ - (ps->ps_lastblk <= un->un_rs_prev_overlap->ps_lastblk)) -/* - * Default resync update interval (in minutes). - */ -#define MD_DEF_MIRROR_RESYNC_INTVL 5 - -/* - * Defines for flags argument in function set_sm_comp_state() - */ -#define MD_STATE_NO_XMIT 0x0000 /* Local action, (sent from master) */ -#define MD_STATE_XMIT 0x0001 /* Non-local action, send to master */ -#define MD_STATE_WMUPDATE 0x0002 /* Action because of watermark update */ -#define MD_STATE_OCHELD 0x0004 /* open/close lock held */ - -/* - * Defines for flags argument in function check_comp_4_hotspares() - */ -#define MD_HOTSPARE_NO_XMIT 0x0000 /* Local action, (sent from master) */ -#define MD_HOTSPARE_XMIT 0x0001 /* Non-local action, send to master */ -#define MD_HOTSPARE_WMUPDATE 0x0002 /* Action because of watermark update */ -#define MD_HOTSPARE_LINKHELD 0x0004 /* md_link_rw lock held */ - -/* - * Defines for argument in function send_mn_resync_done_message() - */ -#define RESYNC_ERR 0x1 -#define CLEAR_OPT_NOT_DONE 0x2 - -/* - * Defines for argument in function resync_read_blk_range() - */ -#define MD_FIRST_RESYNC_NEXT 0x1 -#define MD_SEND_MESS_XMIT 0x2 -#define MD_RESYNC_FLAG_ERR 0x4 - -/* - * Define for argument in function wait_for_overlaps() - */ -#define MD_OVERLAP_ALLOW_REPEAT 0x1 /* Allow if ps already in tree */ -#define MD_OVERLAP_NO_REPEAT 0 /* ps must not already be in tree */ - -/* - * Define for max retries of mirror_owner - */ -#define MD_OWNER_RETRIES 10 - -/* - * mm_submirror32_od and mm_unit32_od are used only for 32 bit old format - */ -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack(4) -#endif -typedef struct mm_submirror32_od { /* submirrors */ - mdkey_t sm_key; - dev32_t sm_dev; - sm_state_t sm_state; - sm_flags_t sm_flags; - caddr32_t xx_sm_shared_by_blk; /* really void *) */ - caddr32_t xx_sm_shared_by_indx; /* really void *) */ - caddr32_t xx_sm_get_component_count; - caddr32_t xx_sm_get_bcss; /* block count skip size */ - md_m_shared32_od_t sm_shared; /* used for mirroring plain devices */ - int sm_hsp_id; /* used for mirroring plain devices */ - struct timeval32 sm_timestamp; /* time of last state change */ -} mm_submirror32_od_t; - -typedef struct mm_submirror { /* submirrors */ - mdkey_t sm_key; - md_dev64_t sm_dev; /* 64 bit */ - sm_state_t sm_state; - sm_flags_t sm_flags; - md_m_shared_t sm_shared; /* used for mirroring plain devices */ - int sm_hsp_id; /* used for mirroring plain devices */ - md_timeval32_t sm_timestamp; /* time of last state change, 32 bit */ -} mm_submirror_t; - -typedef struct mm_unit32_od { - mdc_unit32_od_t c; /* common stuff */ - - int un_last_read; /* last submirror index read */ - uint_t un_changecnt; - ushort_t un_nsm; /* number of submirrors */ - mm_submirror32_od_t un_sm[NMIRROR]; - int un_overlap_tree_flag; - int xx_un_overlap_tree_mx[2]; /* replaces mutex */ - ushort_t xx_un_overlap_tree_cv; - caddr32_t xx_un_overlap_root; - mm_rd_opt_t un_read_option; /* mirror read option */ - mm_wr_opt_t un_write_option; /* mirror write option */ - mm_pass_num_t un_pass_num; /* resync pass number */ - /* - * following used to keep dirty bitmaps - */ - int xx_un_resync_mx[2]; /* replaces mutex */ - ushort_t xx_un_resync_cv; - uint_t un_resync_flg; - uint_t un_waiting_to_mark; - uint_t un_waiting_to_commit; - caddr32_t xx_un_outstanding_writes; /* outstanding write */ - caddr32_t xx_un_goingclean_bm; - caddr32_t xx_un_goingdirty_bm; - caddr32_t xx_un_dirty_bm; - caddr32_t xx_un_resync_bm; - uint_t un_rrd_blksize; /* The blocksize of the dirty bits */ - uint_t un_rrd_num; /* The number of resync regions */ - mddb_recid_t un_rr_dirty_recid; /* resync region bm record id */ - /* - * following stuff is private to resync process - */ - int un_rs_copysize; - int un_rs_dests; /* destinations */ - daddr32_t un_rs_resync_done; /* used for percent done */ - daddr32_t un_rs_resync_2_do; /* user for percent done */ - int un_rs_dropped_lock; - caddr32_t un_rs_type; /* type of resync in progress */ - /* - * Incore elements in this old structure are no longer referenced by - * current 64 bit kernel. Comment them out for maintenance purpose. - * - * mm_submirror_ic_t un_smic[NMIRROR]; - * kmutex_t un_ovrlap_chn_mx; - * kcondvar_t un_ovrlap_chn_cv; - * struct md_mps *un_ovrlap_chn; - * kmutex_t un_resync_mx; - * kcondvar_t un_resync_cv; - * short *un_outstanding_writes; - * uchar_t *un_goingclean_bm; - * uchar_t *un_goingdirty_bm; - * uchar_t *un_dirty_bm; - * uchar_t *un_resync_bm; - * char *un_rs_buffer; - */ -} mm_unit32_od_t; -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack() -#endif - -/* Types of resync in progress (used for un_rs_type) */ -#define MD_RS_NONE 0 /* No resync */ -#define MD_RS_OPTIMIZED 0x0001 /* Optimized resync */ -#define MD_RS_COMPONENT 0x0002 /* Component resync */ -#define MD_RS_SUBMIRROR 0x0003 /* Submirror resync */ -#define MD_RS_ABR 0x0004 /* Application based resync */ - -/* - * un_rs_type is split into the following bitfields: - * - * 0-3 Resync type (as above) - * 4-7 Submirror index [0..3] - * 8-31 Component index - */ -#define RS_TYPE_MASK 0xF -#define RS_SMI_MASK 0xF0 -#define RS_CI_MASK 0x1FFF00 - -#define RS_TYPE(x) ((x) & RS_TYPE_MASK) -#define RS_SMI(x) (((x) & RS_SMI_MASK) >> 4) -#define RS_CI(x) (((x) & RS_CI_MASK) >> 8) - -#define SET_RS_TYPE(x, v) { \ - (x) &= ~RS_TYPE_MASK; \ - (x) |= ((v) & RS_TYPE_MASK); \ - } -#define SET_RS_TYPE_NONE(x) { \ - (x) &= ~RS_TYPE_MASK; \ - } -#define SET_RS_SMI(x, v) { \ - (x) &= ~RS_SMI_MASK; \ - (x) |= (((v) << 4) & RS_SMI_MASK); \ - } -#define SET_RS_CI(x, v) { \ - (x) &= ~RS_CI_MASK; \ - (x) |= (((v) << 8) & RS_CI_MASK); \ - } - -typedef struct mm_submirror_ic { - intptr_t (*sm_shared_by_blk)(md_dev64_t, void *, - diskaddr_t, u_longlong_t *); - intptr_t (*sm_shared_by_indx)(md_dev64_t, void *, int); - int (*sm_get_component_count)(md_dev64_t, void *); - int (*sm_get_bcss)(md_dev64_t, void *, int, diskaddr_t *, - size_t *, u_longlong_t *, u_longlong_t *); -} mm_submirror_ic_t; - -typedef struct md_mps { - DAEMON_QUEUE - buf_t *ps_bp; - struct mm_unit *ps_un; - mdi_unit_t *ps_ui; - uint_t ps_childbflags; - caddr_t ps_addr; - diskaddr_t ps_firstblk; - diskaddr_t ps_lastblk; - uint_t ps_flags; - uint_t ps_allfrom_sm; /* entire read came from here */ - uint_t ps_writable_sm; - uint_t ps_current_sm; - uint_t ps_active_cnt; - int ps_frags; - uint_t ps_changecnt; - struct md_mps *ps_unused1; - struct md_mps *ps_unused2; - void (*ps_call)(); - kmutex_t ps_mx; - avl_node_t ps_overlap_node; -} md_mps_t; - -#define MD_MPS_ON_OVERLAP 0x0001 -#define MD_MPS_ERROR 0x0002 -#define MD_MPS_WRITE_AFTER_READ 0x0004 -#define MD_MPS_WOW 0x0008 -#define MD_MPS_DONTFREE 0x0010 -#define MD_MPS_DONE 0x0020 -#define MD_MPS_MAPPED 0x0040 /* re: MD_STR_MAPPED */ -#define MD_MPS_NOBLOCK 0x0080 /* re: MD_NOBLOCK */ -#define MD_MPS_ABR 0x0100 /* re: MD_STR_ABR */ -#define MD_MPS_DMR 0x0200 /* re: MD_STR_DMR */ -#define MD_MPS_WMUPDATE 0x0400 /* re: MD_STR_WMUPDATE */ -#define MD_MPS_DIRTY_RD 0x0800 /* re: MD_STR_DIRTY_RD */ -#define MD_MPS_RESYNC_READ 0x1000 -#define MD_MPS_FLAG_ERROR 0x2000 /* re: MD_STR_FLAG_ERR */ -#define MD_MPS_BLOCKABLE_IO 0x4000 /* re: MD_STR_BLOCK_OK */ - -#define MPS_FREE(kc, ps) \ -{ \ - if ((ps)->ps_flags & MD_MPS_DONTFREE) \ - (ps)->ps_flags |= MD_MPS_DONE; \ - else \ - kmem_cache_free((kc), (ps)); \ -} - -typedef struct md_mcs { - DAEMON_QUEUE - md_mps_t *cs_ps; - minor_t cs_mdunit; - /* Add new structure members HERE!! */ - buf_t cs_buf; - /* DO NOT add structure members here; cs_buf is dynamically sized */ -} md_mcs_t; - -typedef struct mm_mirror_ic { - kmutex_t un_overlap_tree_mx; - kcondvar_t un_overlap_tree_cv; - avl_tree_t un_overlap_root; - kmutex_t un_resync_mx; - kcondvar_t un_resync_cv; - short *un_outstanding_writes; /* outstanding write array */ - uchar_t *un_goingclean_bm; - uchar_t *un_goingdirty_bm; - uchar_t *un_dirty_bm; - uchar_t *un_resync_bm; - char *un_rs_buffer; - int un_suspend_wr_flag; - kmutex_t un_suspend_wr_mx; - kcondvar_t un_suspend_wr_cv; - md_mn_nodeid_t un_mirror_owner; /* Node which owns mirror */ - diskaddr_t un_resync_startbl; /* Start block for resync */ - kmutex_t un_owner_mx; /* Mutex for un_owner_state */ - uint_t un_owner_state; /* See below */ - uint_t un_mirror_owner_status; /* status for ioctl request */ - kmutex_t un_dmr_mx; /* mutex for DMR requests */ - kcondvar_t un_dmr_cv; /* condvar for DMR requests */ - int un_dmr_last_read; /* last DMR submirror read */ - callb_cpr_t un_rs_cprinfo; /* CPR info for resync thread */ - kmutex_t un_rs_cpr_mx; /* mutex for resync CPR info */ - kmutex_t un_prr_cpr_mx; /* mutex for prr CPR info */ - uint_t un_resync_completed; /* type of last resync */ - int un_abr_count; /* count of sp's with abr set */ - - uchar_t *un_pernode_dirty_bm[MD_MNMAXSIDES]; - uchar_t *un_pernode_dirty_sum; - - krwlock_t un_pernode_dirty_mx[MD_MNMAXSIDES]; - ushort_t un_rr_clean_start_bit; /* where to start next clean */ - -#ifdef _KERNEL - ddi_taskq_t *un_drl_task; /* deferred RR_CLEAN taskq */ -#else - void *un_drl_task; /* deferred RR_CLEAN taskq */ -#endif /* _KERNEL */ - uint_t un_waiting_to_clear; /* Blocked waiting to clear */ - -}mm_mirror_ic_t; - -#define MM_MN_OWNER_SENT 0x0001 /* RPC in progress */ -#define MM_MN_BECOME_OWNER 0x0002 /* Ownership change in prog. */ -#define MM_MN_PREVENT_CHANGE 0x0004 /* Disallow ownership change */ - -typedef struct mm_unit { - mdc_unit_t c; /* common stuff */ - - int un_last_read; /* last submirror index read */ - uint_t un_changecnt; - ushort_t un_nsm; /* number of submirrors */ - mm_submirror_t un_sm[NMIRROR]; - int un_overlap_tree_flag; - mm_rd_opt_t un_read_option; /* mirror read option */ - mm_wr_opt_t un_write_option; /* mirror write option */ - mm_pass_num_t un_pass_num; /* resync pass number */ - /* - * following used to keep dirty bitmaps - */ - uint_t un_resync_flg; - uint_t un_waiting_to_mark; - uint_t un_waiting_to_commit; - uint_t un_rrd_blksize; /* The blocksize of the dirty bits */ - uint_t un_rrd_num; /* The number of resync regions */ - mddb_recid_t un_rr_dirty_recid; /* resync region bm db record id */ - /* - * following stuff is private to resync process - */ - int un_rs_copysize; - int un_rs_dests; /* destinations */ - diskaddr_t un_rs_resync_done; /* used for percent done */ - diskaddr_t un_rs_resync_2_do; /* user for percent done */ - int un_rs_dropped_lock; - uint_t un_rs_type; /* type of resync */ - /* - * Incore only elements - */ - mm_submirror_ic_t un_smic[NMIRROR]; /* NMIRROR elements array */ - mm_mirror_ic_t un_mmic; - kmutex_t un_rrp_inflight_mx; - /* - * resync thread control - */ - kthread_t *un_rs_thread; /* Resync thread ID */ - kmutex_t un_rs_thread_mx; /* Thread cv mutex */ - kcondvar_t un_rs_thread_cv; /* Cond. Var. for thread */ - uint_t un_rs_thread_flags; /* Thread control flags */ - md_mps_t *un_rs_prev_overlap; /* existing overlap request */ - timeout_id_t un_rs_resync_to_id; /* resync progress timeout */ - kmutex_t un_rs_progress_mx; /* Resync progress mutex */ - kcondvar_t un_rs_progress_cv; /* Cond. Var. for progress */ - uint_t un_rs_progress_flags; /* Thread control flags */ - void *un_rs_msg; /* Intra-node resync message */ -} mm_unit_t; - -#define un_overlap_tree_mx un_mmic.un_overlap_tree_mx -#define un_overlap_tree_cv un_mmic.un_overlap_tree_cv -#define un_overlap_root un_mmic.un_overlap_root -#define un_resync_mx un_mmic.un_resync_mx -#define un_resync_cv un_mmic.un_resync_cv -#define un_outstanding_writes un_mmic.un_outstanding_writes -#define un_goingclean_bm un_mmic.un_goingclean_bm -#define un_goingdirty_bm un_mmic.un_goingdirty_bm -#define un_dirty_bm un_mmic.un_dirty_bm -#define un_resync_bm un_mmic.un_resync_bm -#define un_rs_buffer un_mmic.un_rs_buffer -#define un_suspend_wr_mx un_mmic.un_suspend_wr_mx -#define un_suspend_wr_cv un_mmic.un_suspend_wr_cv -#define un_suspend_wr_flag un_mmic.un_suspend_wr_flag -#define un_mirror_owner un_mmic.un_mirror_owner -#define un_resync_startbl un_mmic.un_resync_startbl -#define un_owner_mx un_mmic.un_owner_mx -#define un_owner_state un_mmic.un_owner_state -#define un_mirror_reqs un_mmic.un_mirror_reqs -#define un_mirror_reqs_done un_mmic.un_mirror_reqs_done -#define un_mirror_owner_status un_mmic.un_mirror_owner_status -#define un_dmr_mx un_mmic.un_dmr_mx -#define un_dmr_cv un_mmic.un_dmr_cv -#define un_dmr_last_read un_mmic.un_dmr_last_read -#define un_rs_cprinfo un_mmic.un_rs_cprinfo -#define un_rs_cpr_mx un_mmic.un_rs_cpr_mx -#define un_prr_cpr_mx un_mmic.un_prr_cpr_mx -#define un_resync_completed un_mmic.un_resync_completed -#define un_abr_count un_mmic.un_abr_count -#define un_pernode_dirty_bm un_mmic.un_pernode_dirty_bm -#define un_pernode_dirty_sum un_mmic.un_pernode_dirty_sum -#define un_pernode_dirty_mx un_mmic.un_pernode_dirty_mx -#define un_rr_clean_start_bit un_mmic.un_rr_clean_start_bit -#define un_drl_task un_mmic.un_drl_task -#define un_waiting_to_clear un_mmic.un_waiting_to_clear - -#define MM_RF_GATECLOSED 0x0001 -#define MM_RF_COMMIT_NEEDED 0x0002 -#define MM_RF_COMMITING 0x0004 -#define MM_RF_STALL_CLEAN (MM_RF_COMMITING | \ - MM_RF_COMMIT_NEEDED | \ - MM_RF_GATECLOSED) - - -#define MD_MN_MIRROR_UNOWNED 0 -#define MD_MN_MIRROR_OWNER(un) (un->un_mirror_owner == md_mn_mynode_id) -#define MD_MN_NO_MIRROR_OWNER(un) \ - (un->un_mirror_owner == MD_MN_MIRROR_UNOWNED) - -typedef struct err_comp { - struct err_comp *ec_next; - int ec_smi; - int ec_ci; -} err_comp_t; - -extern int md_min_rr_size; -extern int md_def_num_rr; - -/* Optimized resync records controllers */ -#define MD_MIN_RR_SIZE (md_min_rr_size) -#define MD_DEF_NUM_RR (md_def_num_rr) -#define MD_MAX_NUM_RR (4192*NBBY - sizeof (struct optim_resync)) - -/* default resync buffer size */ -#define MD_DEF_RESYNC_BUF_SIZE (1024) - -/* Structure for optimized resync records */ -#define OR_MAGIC 0xFECA /* Only missing the L */ -typedef struct optim_resync { - uint_t or_revision; - uint_t or_magic; - uint_t or_blksize; - uint_t or_num; - uchar_t or_rr[1]; -} optim_resync_t; - -/* Type 2 for mirror records */ -#define MIRROR_REC 1 -#define RESYNC_REC 2 - -#ifdef _KERNEL - -#define NO_SUBMIRRORS (0) -#define ALL_SUBMIRRORS (0xFFF) -#define SMI2BIT(smi) (1 << (smi)) - -/* For use with mirror_other_sources() */ -#define WHOLE_SM (-1) - -#define BLK_TO_RR(i, b, un) {\ - (i) = ((b) / ((un))->un_rrd_blksize); \ - if ((i) > ((un))->un_rrd_num) \ - { panic("md: BLK_TO_RR"); } \ -} - -#define RR_TO_BLK(b, i, un) \ - (b) = ((i) * ((un))->un_rrd_blksize) - -#define IS_GOING_DIRTY(i, un) (isset((un)->un_goingdirty_bm, (i))) -#define CLR_GOING_DIRTY(i, un) (clrbit((un)->un_goingdirty_bm, (i))) -#define SET_GOING_DIRTY(i, un) (setbit((un)->un_goingdirty_bm, (i))) - -#define IS_GOING_CLEAN(i, un) (isset((un)->un_goingclean_bm, (i))) -#define CLR_GOING_CLEAN(i, un) (clrbit((un)->un_goingclean_bm, (i))) -#define SET_GOING_CLEAN(i, un) (setbit((un)->un_goingclean_bm, (i))) - -#define IS_REGION_DIRTY(i, un) (isset((un)->un_dirty_bm, (i))) -#define CLR_REGION_DIRTY(i, un) (clrbit((un)->un_dirty_bm, (i))) -#define SET_REGION_DIRTY(i, un) (setbit((un)->un_dirty_bm, (i))) - -#define IS_KEEPDIRTY(i, un) (isset((un)->un_resync_bm, (i))) -#define CLR_KEEPDIRTY(i, un) (clrbit((un)->un_resync_bm, (i))) - -#define IS_PERNODE_DIRTY(n, i, un) \ - (isset((un)->un_pernode_dirty_bm[(n)-1], (i))) -#define CLR_PERNODE_DIRTY(n, i, un) \ - (clrbit((un)->un_pernode_dirty_bm[(n)-1], (i))) -#define SET_PERNODE_DIRTY(n, i, un) \ - (setbit((un)->un_pernode_dirty_bm[(n)-1], (i))) - -/* - * Write-On-Write handling. - * flags for md_mirror_wow_flg - * structure for quing copy-writes - * macros for relative locating of header and buffer - */ -#define WOW_DISABLE 0x0001 /* turn off WOW detection */ -#define WOW_PHYS_ENABLE 0x0020 /* turn on WOW for PHYS */ -#define WOW_LOGIT 0x0002 /* log non-disabled WOW detections */ -#define WOW_NOCOPY 0x0004 /* repeat normal write on WOW detection */ - -typedef struct wowhdr { - DAEMON_QUEUE - md_mps_t *wow_ps; - int wow_offset; -} wowhdr_t; - -#define WOWBUF_HDR(wowbuf) ((void *)(wowbuf-sizeof (wowhdr_t))) -#define WOWHDR_BUF(wowhdr) ((char *)wowhdr+sizeof (wowhdr_t)) - -/* - * Structure used to to save information about DMR reads. Used to save - * the count of all DMR reads and the timestamp of the last one executed. - * We declare a global with this structure and it can be read by a debugger to - * verify that the DMR ioctl has been executed and the number of times that it - * has been executed. - */ -typedef struct dmr_stats { - uint_t dmr_count; - struct timeval dmr_timestamp; -} dmr_stats_t; - -/* Externals from mirror.c */ -extern mddb_recid_t mirror_get_sm_unit(md_dev64_t); -extern void mirror_release_sm_unit(md_dev64_t); - -extern void mirror_set_sm_state(mm_submirror_t *, - mm_submirror_ic_t *, sm_state_t, int); - -extern void mirror_commit(mm_unit_t *, int, mddb_recid_t *); -extern int poke_hotspares(void); -extern void build_submirror(mm_unit_t *, int, int); -extern int mirror_build_incore(mm_unit_t *, int); -extern void reset_mirror(mm_unit_t *, minor_t, int); -extern int mirror_internal_open(minor_t, int, int, int, IOLOCK *); -extern int mirror_internal_close(minor_t, int, int, IOLOCK *); -extern void set_sm_comp_state(mm_unit_t *, int, int, int, - mddb_recid_t *, uint_t, IOLOCK *); -extern int mirror_other_sources(mm_unit_t *, int, int, int); -extern int mirror_resync_message(md_mn_rs_params_t *, IOLOCK *); -extern void md_mirror_strategy(buf_t *, int, void *); -extern int mirror_directed_read(dev_t, vol_directed_rd_t *, int); -extern void mirror_check_failfast(minor_t mnum); -extern int check_comp_4_hotspares(mm_unit_t *, int, int, uint_t, - mddb_recid_t, IOLOCK *); -extern void mirror_overlap_tree_remove(md_mps_t *ps); -extern void mirror_child_init(md_mcs_t *cs); - -/* Externals from mirror_ioctl.c */ -extern void reset_comp_states(mm_submirror_t *, - mm_submirror_ic_t *); -extern int mirror_grow_unit(mm_unit_t *un, md_error_t *ep); -extern int md_mirror_ioctl(dev_t dev, int cmd, void *data, - int mode, IOLOCK *lockp); -extern mm_unit_t *mirror_getun(minor_t, md_error_t *, int, IOLOCK *); -extern void mirror_get_status(mm_unit_t *un, IOLOCK *lockp); -extern int mirror_choose_owner(mm_unit_t *un, md_mn_req_owner_t *); - -/* rename named service functions */ -md_ren_list_svc_t mirror_rename_listkids; -md_ren_svc_t mirror_rename_check; -md_ren_roleswap_svc_t mirror_renexch_update_kids; -md_ren_roleswap_svc_t mirror_exchange_parent_update_to; -md_ren_roleswap_svc_t mirror_exchange_self_update_from_down; - -/* Externals from mirror_resync.c */ -extern int unit_setup_resync(mm_unit_t *, int); -extern int mirror_resync_unit(minor_t mnum, md_resync_ioctl_t *ri, - md_error_t *ep, IOLOCK *); -extern int mirror_ioctl_resync(md_resync_ioctl_t *p, IOLOCK *); -extern int mirror_mark_resync_region(mm_unit_t *, diskaddr_t, - diskaddr_t, md_mn_nodeid_t); -extern void resync_start_timeout(set_t setno); -extern int mirror_resize_resync_regions(mm_unit_t *, diskaddr_t); -extern int mirror_add_resync_regions(mm_unit_t *, diskaddr_t); -extern int mirror_probedevs(md_probedev_t *, IOLOCK *); -extern void mirror_copy_rr(int, uchar_t *, uchar_t *); -extern void mirror_process_unit_resync(mm_unit_t *); -extern int mirror_set_dirty_rr(md_mn_rr_dirty_params_t *); -extern int mirror_set_clean_rr(md_mn_rr_clean_params_t *); -#endif /* _KERNEL */ - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS_MD_MIRROR_H */ diff --git a/usr/src/uts/common/sys/lvm/md_mirror_shared.h b/usr/src/uts/common/sys/lvm/md_mirror_shared.h deleted file mode 100644 index 8cc50c48b39b..000000000000 --- a/usr/src/uts/common/sys/lvm/md_mirror_shared.h +++ /dev/null @@ -1,104 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SYS_MD_MIRROR_SHARED_H -#define _SYS_MD_MIRROR_SHARED_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * md_m_shared32_od is part of old 32 bit format - */ -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack(4) -#endif -typedef struct md_m_shared32_od { - uint_t ms_flags; - uint_t xms_mx[2]; /* replaces kmutex_t ms_mx */ - /* - * The following are really private to the mirror code - * but are stored on a per component basic - */ - comp_state_t ms_state; /* component state */ - uint_t ms_lasterrcnt; - dev32_t ms_orig_dev; - daddr32_t ms_orig_blk; - mdkey_t ms_hs_key; - mddb_recid_t ms_hs_id; - struct timeval32 ms_timestamp; /* time of last state change */ -} md_m_shared32_od_t; - -typedef struct md_m_shared { - uint_t ms_flags; - /* - * The following are really private to the mirror code - * but are stored on a per component basic - */ - comp_state_t ms_state; /* component state */ - uint_t ms_lasterrcnt; - md_dev64_t ms_orig_dev; /* 64 bit */ - diskaddr_t ms_orig_blk; - mdkey_t ms_hs_key; - mddb_recid_t ms_hs_id; - md_timeval32_t ms_timestamp; /* time of last state change, 32 bit */ -} md_m_shared_t; - -#define MDM_S_NOWRITE 0x0001 -#define MDM_S_WRTERR 0x0002 -#define MDM_S_READERR 0x0004 -#define MDM_S_IOERR (MDM_S_WRTERR | MDM_S_READERR) -#define MDM_S_ISOPEN 0x0008 -#define MDM_S_RS_TRIED 0x0010 /* resync has tried this component */ -#define MDM_S_PROBEOPEN 0x0020 /* accessed via probe */ - -typedef struct ms_cd_info { - md_dev64_t cd_dev; - md_dev64_t cd_orig_dev; -} ms_cd_info_t; - -typedef struct ms_new_dev { - md_dev64_t nd_dev; - mdkey_t nd_key; - diskaddr_t nd_start_blk; - diskaddr_t nd_nblks; - int nd_labeled; - mddb_recid_t nd_hs_id; -} ms_new_dev_t; -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack() -#endif - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS_MD_MIRROR_SHARED_H */ diff --git a/usr/src/uts/common/sys/lvm/md_names.h b/usr/src/uts/common/sys/lvm/md_names.h deleted file mode 100644 index 20b6d494d473..000000000000 --- a/usr/src/uts/common/sys/lvm/md_names.h +++ /dev/null @@ -1,163 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. - */ - -#ifndef _SYS_MD_NAMES_H -#define _SYS_MD_NAMES_H - -#ifdef __cplusplus -extern "C" { -#endif - -#define NM_ALLOC_SIZE 512 -#define NM_DID_ALLOC_SIZE 1024 - -#define NM_NOCOMMIT 0x0100 -#define NM_SHARED 1 -#define NM_NOTSHARED 0 -#define NM_DEVID 0x0010 -#define NM_IMP_SHARED 0x0020 -#define NM_KEY_RECYCLE 0x0040 -#define NM_DEVID_VALID 1 -#define NM_DEVID_INVALID 0 - -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack(4) -#endif - -#ifdef _KERNEL -struct nm_rec_hdr { - uint_t r_revision; /* revision number */ - uint_t r_alloc_size; /* alloc'd record size */ - uint_t r_used_size; /* number bytes used */ - mddb_recid_t r_next_recid; /* record id of next record */ - uint32_t xr_next_rec; /* ptr to record, calc at boot */ - mdkey_t r_next_key; /* Next key for alloc'd entry */ -}; -#else /* ! _KERNEL */ -struct nm_rec_hdr { - uint_t r_revision; /* revision number */ - uint_t r_alloc_size; /* alloc'd record size */ - uint_t r_used_size; /* number bytes used */ - mddb_recid_t r_next_recid; /* record id of next record */ - void *r_next_rec; /* ptr to record, calc at boot */ - mdkey_t r_next_key; /* Next key for alloc'd entry */ -}; -#endif /* _KERNEL */ - -struct nm_next_hdr { - struct nm_next_hdr *nmn_nextp; - void *nmn_record; -}; - -struct nm_shr_rec { - struct nm_rec_hdr sr_rec_hdr; /* Record header */ - struct nm_shared_name { - mdkey_t sn_key; /* Unique key for this name */ - uint32_t sn_count; /* Count of users of this name */ - uint32_t sn_data; /* Data ptr for users (e.g., devops */ - /* sn_data NOT USED anywhere */ - ushort_t sn_namlen; /* Length of string in nmsn_name */ - char sn_name[1]; /* Driver/Directory name */ - } sr_name[1]; -}; - -#define SHR_NAMSIZ(n) \ - (((sizeof (struct nm_shared_name) - 1) + \ - (n)->sn_namlen + (sizeof (uint_t) - 1)) & ~(sizeof (uint_t) - 1)) - -struct nm_rec { - struct nm_rec_hdr r_rec_hdr; /* Record header */ - struct nm_name { - side_t n_side; /* (key 1) side associated with */ - mdkey_t n_key; /* (key 2) allocated unique key */ - uint32_t n_count; /* reference count */ - minor_t n_minor; /* minor number of device */ - mdkey_t n_drv_key; /* Key of driver name in nm_shared */ - mdkey_t n_dir_key; /* Key of dir. name in nm_shared */ - ushort_t n_namlen; /* Length of string in nme_name */ - char n_name[1]; /* Filename of device is here */ - } r_name[1]; -}; - -#define NAMSIZ(n) \ - (((sizeof (struct nm_name) - 1) + \ - (n)->n_namlen + (sizeof (uint_t) - 1)) & ~(sizeof (uint_t) - 1)) - -/* - * Device id support - */ -struct devid_shr_rec { - struct nm_rec_hdr did_rec_hdr; - struct did_shr_name { - mdkey_t did_key; - uint32_t did_count; - uint32_t did_data; - ushort_t did_size; - char did_devid[1]; - } device_id[1]; -}; - -#define DID_SHR_NAMSIZ(n) \ - (((sizeof (struct did_shr_name) - 1) + \ - (n)->did_size + (sizeof (uint_t) - 1)) & ~(sizeof (uint_t) - 1)) - - -struct devid_min_rec { - struct nm_rec_hdr min_rec_hdr; - struct did_min_name { - side_t min_side; - mdkey_t min_key; - uint32_t min_count; - mdkey_t min_devid_key; - ushort_t min_namlen; - char min_name[1]; - } minor_name[1]; -}; - -#define DID_NAMSIZ(n) \ - (((sizeof (struct did_min_name) - 1) + \ - (n)->min_namlen + (sizeof (uint_t) - 1)) & ~(sizeof (uint_t) - 1)) - - -struct nm_header { - uint_t h_revision; /* revision number */ - struct nm_rec_hdr h_names; /* device-name structures */ - struct nm_rec_hdr h_shared; /* shared structures */ -}; - -struct nm_header_hdr { - struct nm_header *hh_header; - struct nm_next_hdr hh_names; - struct nm_next_hdr hh_shared; -}; - -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack() -#endif - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS_MD_NAMES_H */ diff --git a/usr/src/uts/common/sys/lvm/md_notify.h b/usr/src/uts/common/sys/lvm/md_notify.h deleted file mode 100644 index 67071260aef3..000000000000 --- a/usr/src/uts/common/sys/lvm/md_notify.h +++ /dev/null @@ -1,250 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SYS_MD_NOTIFY_H -#define _SYS_MD_NOTIFY_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include -#include -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -#define MD_EVENT_ID (0xda1eab1e) -#define MD_ALLSETS ((ulong_t)0xffffffff) -#define MD_ALLDEVS ((ulong_t)0xffffffff) -#define MD_NOTIFY_HALT_TRIES (4) -#define MD_NOTIFY_NAME_SIZE (64) -#define MD_NOTIFY_REVISION 1 - -/* mdn_flags bits in struct md_event_queue */ - -#define MD_EVENT_QUEUE_DESTROY (0x00000001) -#define MD_EVENT_QUEUE_INVALID (0x00000002) -#define MD_EVENT_QUEUE_PERM (0x00000004) /* do not delete when proc dies */ -#define MD_EVENT_QUEUE_FULL (0x00000008) - -typedef enum md_event_type { - - EQ_EMPTY = 0, - - /* Configuration Changes */ - EQ_CREATE = 1, - EQ_DELETE, - EQ_ADD, - EQ_REMOVE, - EQ_REPLACE, - EQ_GROW, - EQ_RENAME_SRC, - EQ_RENAME_DST, - - EQ_MEDIATOR_ADD, - EQ_MEDIATOR_DELETE, - EQ_HOST_ADD, - EQ_HOST_DELETE, - EQ_DRIVE_ADD, - EQ_DRIVE_DELETE, - - /* State Changes */ - EQ_INIT_START = 0x00000400, - EQ_INIT_FAILED, - EQ_INIT_FATAL, - EQ_INIT_SUCCESS, - EQ_IOERR, - EQ_ERRED, - EQ_LASTERRED, - EQ_OK, - EQ_ENABLE, - EQ_RESYNC_START, - EQ_RESYNC_FAILED, - EQ_RESYNC_SUCCESS, /* resync has succeeded */ - EQ_RESYNC_DONE, /* resync completed */ - EQ_HOTSPARED, /* hot spare aquired for use */ - EQ_HS_FREED, /* hotspare no longer in use */ - EQ_HS_CHANGED, /* change of metadevice hotspare pool */ - EQ_TAKEOVER, - EQ_RELEASE, - EQ_OPEN_FAIL, - EQ_OFFLINE, - EQ_ONLINE, - EQ_DETACH, - EQ_DETACHING, - EQ_ATTACH, - EQ_ATTACHING, - EQ_CHANGE, - EQ_EXCHANGE, - EQ_REGEN_START, - EQ_REGEN_DONE, - EQ_REGEN_FAILED, - - /* User defined event */ - EQ_USER = 0x00100000, - - /* Notify Specfic */ - EQ_NOTIFY_LOST, - EQ_LAST } - md_event_type_t; - -typedef enum md_event_cmds { - EQ_NONE = 0x00000000, - EQ_ON = 0x00000001, - EQ_OFF = 0x00000002, - EQ_GET_NOWAIT = 0x00000010, - EQ_GET_WAIT = 0x00000040, - EQ_PUT = 0x00000020, - - EQ_ALLVALID = 0x00000073 - }md_event_cmds_t; - -typedef enum md_tags { - TAG_EMPTY, - TAG_METADEVICE, - TAG_REPLICA, - TAG_HSP, - TAG_HS, - TAG_SET, - TAG_DRIVE, - TAG_HOST, - TAG_MEDIATOR, - TAG_UNK, - TAG_LAST -} md_tags_t; - -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack(4) -#endif -typedef struct md_event_ioctl { - MD_DRIVER - md_error_t mde; /* error return */ - uint_t mdn_magic; /* magic number for structure */ - int mdn_rev; /* revision number */ - char mdn_name[MD_NOTIFY_NAME_SIZE]; - /* queue name */ - int mdn_flags; /* ioctl flags */ - md_event_cmds_t mdn_cmd; /* command value */ - md_tags_t mdn_tag; /* object tag */ - set_t mdn_set; /* set number */ - md_dev64_t mdn_dev; /* device event occurred on */ - md_event_type_t mdn_event; /* event */ - u_longlong_t mdn_user; /* user defined event */ - md_timeval32_t mdn_time; /* time stamp of event */ -} md_event_ioctl_t; - -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack() -#endif - -/* ioctl flags */ -#define EQ_Q_PERM (0x00000001) /* do not delete when proc dies */ - -#define EQ_Q_ALLVALID (0x00000001) /* all valid notify ioctl flags */ - -typedef enum notify_cmds_t - { EQ_LIST_ADD = 1, EQ_LIST_DELETE } - notify_cmds_t; - -typedef struct notify { - notify_cmds_t mdn_cmd; /* list function (add/delete) */ - md_tags_t mdn_tag; /* type of object */ - set_t mdn_set; /* set where event occurred */ - md_dev64_t mdn_dev; /* device that event occurred on */ - md_event_type_t mdn_event; /* event */ -}notify_t; - - -#ifdef _KERNEL - -#define NOTIFY_MD(tag, set, dev, event) \ - (void) md_notify_interface(EQ_PUT, (tag), (set), (dev), (event)) - -#define SE_NOTIFY(se_class, se_subclass, tag, set, dev) \ - svm_gen_sysevent((se_class), (se_subclass), (tag), (set), (dev)) - -typedef struct md_event { - struct md_event *mdn_next; /* pointer to next element */ - md_tags_t mdn_tag; /* object type */ - set_t mdn_set; /* set where event occurred */ - md_dev64_t mdn_dev; /* device that event occurred on */ - md_event_type_t mdn_event; /* event */ - u_longlong_t mdn_user; /* user defined event */ - struct timeval mdn_time; /* time stamp of event */ -}md_event_t; - -typedef struct md_event_queue { - struct md_event_queue *mdn_nextq; /* next event queue */ - char mdn_name[MD_NOTIFY_NAME_SIZE]; - /* queue name */ - int mdn_flags; /* queue flags */ - pid_t mdn_pid; /* pid that created the queue */ - proc_t *mdn_proc; /* process that created the queue */ - uid_t mdn_uid; /* uid of queue creator */ - size_t mdn_size; /* size of the queue in elements */ - md_event_t *mdn_front; /* front element in queue */ - md_event_t *mdn_tail; /* last element of queue */ - int mdn_waiting; /* number of process waiting */ - kcondvar_t mdn_cv; /* waiting condition varaible */ -} md_event_queue_t; - -/* - * The remainder of this file defines items that are used for testing - * md_notify. - */ - -/* - * Named services for testing - */ - -#define MD_NOTIFY_REAP_OFF "notify turn reap off" -#define MD_NOTIFY_REAP_ON "notify turn reap on" -#define MD_NOTIFY_TEST_STATS "notify test statistics" - -/* - * The MD_NOTIFY_TEST_STATS named service can be invoked to get md_notify - * to set the values of this structure. The md_tnotify module uses this - * structure. - */ - -typedef struct md_notify_stats { - kmutex_t *mds_eventq_mx; /* Address of mutex protecting */ - /* event queue. */ - int mds_max_queue; /* Max. # events in notify queue. */ - int mds_reap; /* events since last reap. */ - int mds_reap_count; /* # events between reaps. */ - int mds_reap_off; /* non-zero -> reaping is off. */ -} md_notify_stats_t; - -#endif /* _KERNEL */ - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS_MD_NOTIFY_H */ diff --git a/usr/src/uts/common/sys/lvm/md_raid.h b/usr/src/uts/common/sys/lvm/md_raid.h deleted file mode 100644 index ef73e0d43459..000000000000 --- a/usr/src/uts/common/sys/lvm/md_raid.h +++ /dev/null @@ -1,688 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SYS_MD_RAID_H -#define _SYS_MD_RAID_H - -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - - -/* - * following bits are used in status word in the common section - * of unit structure: un_status - */ -#define RAID_UNMAGIC 0xBADBABE0 -#define RAID_PSMAGIC 0xBADBABE1 -#define RAID_CSMAGIC 0xBADBABE2 -#define RAID_PWMAGIC 0xBADBABE3 -#define RAID_BUFMAGIC 0xBADBABE4 -/* - * These are the major constants for the definition of a raid device - */ -#define PWCNT_MIN 10 /* mininum # prewrites */ -#define PWCNT_MAX 100 /* maximum # prewrites */ -#define RAID_MIN_INTERLACE (DEV_BSIZE * 2) - -#define UNIT_STATE(un) ((un)->un_state) -#define COLUMN_STATE(un, column) ((un)->un_column[(column)].un_devstate) - -#define COLUMN_STATE_ONLY(un, column) (\ - ((un)->un_column[(column)].un_devstate == RCS_INIT) || \ - ((un)->un_column[(column)].un_devstate == RCS_OKAY) || \ - ((un)->un_column[(column)].un_devstate == RCS_ERRED) || \ - ((un)->un_column[(column)].un_devstate == RCS_RESYNC) || \ - ((un)->un_column[(column)].un_devstate == RCS_LAST_ERRED) || \ - ((un)->un_column[(column)].un_devstate == RCS_REGEN)) - -#define COLUMN_ISUP(un, column) (\ - ((un)->un_column[(column)].un_devstate == RCS_OKAY) || \ - ((un)->un_column[(column)].un_devstate == RCS_RESYNC) || \ - ((un)->un_column[(column)].un_devstate == RCS_LAST_ERRED)) - -#define COLUMN_ISOKAY(un, column) (\ - ((un)->un_column[(column)].un_devstate == RCS_OKAY)) - -#define COLUMN_ISLASTERR(un, column) (\ - ((un)->un_column[(column)].un_devstate == RCS_LAST_ERRED)) - -#define WRITE_ALT(un, column) ( \ - ((un)->un_column[(column)].un_alt_dev != NODEV64) && \ - (((un)->un_column[(column)].un_devflags & MD_RAID_WRITE_ALT))) - -#define HOTSPARED(un, column) ( \ - ((un)->un_column[(column)].un_hs_id != 0)) - -#define OVERLAPED(blk1, lblk1, blk2, lblk2) ( \ - (((blk1 > lblk2) ? 1 : 0) || \ - ((lblk1 < blk2) ? 1 : 0))) - - -/* - * Note: magic is needed only to set rpw_magic, not rpw_magic_ext! - */ -#define RAID_FILLIN_RPW(buf, un, sum, colnum, \ - blkno, blkcnt, id, \ - colcount, col, magic) { \ - if ((un)->c.un_revision & MD_64BIT_META_DEV) { \ - raid_pwhdr_t *rpw64 = (raid_pwhdr_t *)(void *)(buf);\ - rpw64->rpw_magic = magic; \ - rpw64->rpw_sum = sum; \ - rpw64->rpw_columnnum = colnum; \ - rpw64->rpw_blkno = (diskaddr_t)blkno; \ - rpw64->rpw_blkcnt = blkcnt; \ - rpw64->rpw_id = id; \ - rpw64->rpw_colcount = colcount; \ - rpw64->rpw_column = col; \ - rpw64->rpw_unit = MD_SID(un); \ - rpw64->rpw_magic_ext = RAID_PWMAGIC; \ - rpw64->rpw_origcolumncnt = (un)->un_origcolumncnt; \ - rpw64->rpw_totalcolumncnt = (un)->un_totalcolumncnt; \ - rpw64->rpw_segsize = (un)->un_segsize; \ - rpw64->rpw_segsincolumn = (diskaddr_t)((un)->un_segsincolumn);\ - rpw64->rpw_pwcnt = (un)->un_pwcnt; \ - rpw64->rpw_pwsize = (un)->un_pwsize; \ - rpw64->rpw_devstart = \ - (diskaddr_t)((un)->un_column[col].un_orig_devstart);\ - rpw64->rpw_pwstart = \ - (diskaddr_t)((un)->un_column[col].un_orig_pwstart);\ - } else { \ - raid_pwhdr32_od_t *rpw32 = \ - (raid_pwhdr32_od_t *)(void *)(buf); \ - rpw32->rpw_magic = magic; \ - rpw32->rpw_sum = sum; \ - rpw32->rpw_columnnum = colnum; \ - rpw32->rpw_blkno = (daddr_t)blkno; \ - rpw32->rpw_blkcnt = blkcnt; \ - rpw32->rpw_id = id; \ - rpw32->rpw_colcount = colcount; \ - rpw32->rpw_column = col; \ - rpw32->rpw_unit = MD_SID(un); \ - rpw32->rpw_magic_ext = RAID_PWMAGIC; \ - rpw32->rpw_origcolumncnt = (un)->un_origcolumncnt; \ - rpw32->rpw_totalcolumncnt = (un)->un_totalcolumncnt; \ - rpw32->rpw_segsize = (daddr_t)((un)->un_segsize); \ - rpw32->rpw_segsincolumn = (daddr_t)((un)->un_segsincolumn);\ - rpw32->rpw_pwcnt = (un)->un_pwcnt; \ - rpw32->rpw_pwsize = (un)->un_pwsize; \ - rpw32->rpw_devstart = \ - (daddr_t)((un)->un_column[col].un_orig_devstart);\ - rpw32->rpw_pwstart = \ - (daddr_t)((un)->un_column[col].un_orig_pwstart);\ - } \ -} - -#define RAID_CONVERT_RPW(rpw32, rpw64) { \ - (rpw64)->rpw_magic = (rpw32)->rpw_magic; \ - (rpw64)->rpw_sum = (rpw32)->rpw_sum; \ - (rpw64)->rpw_columnnum = (rpw32)->rpw_columnnum; \ - (rpw64)->rpw_blkno = (rpw32)->rpw_blkno; \ - (rpw64)->rpw_blkcnt = (rpw32)->rpw_blkcnt; \ - (rpw64)->rpw_id = (rpw32)->rpw_id; \ - (rpw64)->rpw_colcount = (rpw32)->rpw_colcount; \ - (rpw64)->rpw_column = (rpw32)->rpw_column; \ - (rpw64)->rpw_unit = (rpw32)->rpw_unit; \ - (rpw64)->rpw_magic_ext = (rpw32)->rpw_magic_ext; \ - (rpw64)->rpw_origcolumncnt = (rpw32)->rpw_origcolumncnt; \ - (rpw64)->rpw_totalcolumncnt = (rpw32)->rpw_totalcolumncnt; \ - (rpw64)->rpw_segsize = (rpw32)->rpw_segsize; \ - (rpw64)->rpw_segsincolumn = (rpw32)->rpw_segsincolumn; \ - (rpw64)->rpw_pwcnt = (rpw32)->rpw_pwcnt; \ - (rpw64)->rpw_pwsize = (rpw32)->rpw_pwsize; \ - (rpw64)->rpw_devstart = (rpw32)->rpw_devstart; \ - (rpw64)->rpw_pwstart = (rpw32)->rpw_pwstart; \ -} - -typedef struct mr_scoreboard { - int sb_column; - int sb_flags; - diskaddr_t sb_start_blk; - diskaddr_t sb_last_blk; - void *sb_cs; -} mr_scoreboard_t; - -#define SB_AVAIL (0x00000001) /* useable and valid blocks */ -#define SB_INUSE (0x00000002) /* being used */ -#define SB_UNUSED (0x00000004) /* useable and no valid blocks */ -#define SB_INVAL_PEND (0x00000008) /* being invalidated */ - -typedef struct mr_pw_reserve { - uint_t pw_magic; - int pw_column; - int pw_free; - mr_scoreboard_t pw_sb[1]; -} mr_pw_reserve_t; - - -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack(4) -#endif -typedef struct mr_column { - rcs_state_t un_devstate; - rcs_flags_t un_devflags; - md_timeval32_t un_devtimestamp; /* time of last state change, 32 bit */ - - mddb_recid_t un_hs_id; - diskaddr_t un_hs_pwstart; - diskaddr_t un_hs_devstart; - mdkey_t un_hs_key; - - - md_dev64_t un_orig_dev; /* original device, 64 bit */ - mdkey_t un_orig_key; - diskaddr_t un_orig_pwstart; - diskaddr_t un_orig_devstart; - - md_dev64_t un_dev; /* current read/write dev */ - diskaddr_t un_pwstart; - diskaddr_t un_devstart; - - md_dev64_t un_alt_dev; /* write to if resync */ - diskaddr_t un_alt_pwstart; - diskaddr_t un_alt_devstart; -} mr_column_t; - -/* - * mr_column32_od is for old 32 bit format only - */ -typedef struct mr_column32_od { - rcs_state_t un_devstate; - rcs_flags_t un_devflags; - struct timeval32 un_devtimestamp; /* time of last state change */ - caddr32_t xx_un_pw_reserve; - - mddb_recid_t un_hs_id; - daddr32_t un_hs_pwstart; - daddr32_t un_hs_devstart; - mdkey_t un_hs_key; - - dev32_t un_orig_dev; /* original device */ - mdkey_t un_orig_key; - daddr32_t un_orig_pwstart; - daddr32_t un_orig_devstart; - - dev32_t un_dev; /* current read/write dev */ - daddr32_t un_pwstart; - daddr32_t un_devstart; - - dev32_t un_alt_dev; /* write to if resync */ - daddr32_t un_alt_pwstart; - daddr32_t un_alt_devstart; -} mr_column32_od_t; - - -/* - * Incore only elements structures - */ -typedef struct mr_column_ic { - mr_pw_reserve_t *un_pw_reserve; -} mr_column_ic_t; - -/* - * Do not rearrange elements as mutexes must be aligned on - * an 8 byte boundary. Element _t_un_linlck_mx corresponds to - * _t_un_linlck_cv and element _t_un_mx corresponds to _t_un_cv - */ -typedef struct mr_unit_ic { - caddr_t _t_un_pbuffer; - caddr_t _t_un_dbuffer; - struct md_raidcs *_t_un_linlck_chn; - kmutex_t _t_un_linlck_mx; - kmutex_t _t_un_mx; - kcondvar_t _t_un_linlck_cv; - kcondvar_t _t_un_cv; - mr_column_ic_t *_t_un_column_ic; -} mr_unit_ic_t; - -typedef struct mr_unit { - mdc_unit_t c; - int un_raid_res; - uint_t un_magic; - rus_state_t un_state; - md_timeval32_t un_timestamp; /* 32 bit fixed size */ - uint_t un_origcolumncnt; - uint_t un_totalcolumncnt; - uint_t un_rflags; - uint_t un_segsize; - diskaddr_t un_segsincolumn; - uint_t un_maxio; /* in blks */ - uint_t un_iosize; /* in blks */ - uint_t un_linlck_flg; - uint_t un_pwcnt; - uint_t un_pwsize; - long long un_pwid; - uint_t un_percent_done; - uint_t un_resync_copysize; /* in blks */ - hsp_t un_hsp_id; - /* - * This union has to begin at an 8 byte aligned address. - * If not, this structure has different sizes in 32 / 64 bit - * environments, since in a 64 bit environment the compiler - * adds paddings before a long long, if it doesn't start at an 8byte - * aligned address. - * Be careful if you add or remove structure elements before it! - */ - - union { - struct { - diskaddr_t _t_un_resync_line_index; - uint_t _t_un_resync_segment; - int _t_un_resync_index; - } _resync; - struct { - diskaddr_t _t_un_grow_tb; - uint_t _t_un_init_colcnt; - u_longlong_t _t_un_init_iocnt; - } _init; - } _t_un; - - /* - * This union has to begin at an 8 byte aligned address. - * Be careful if you add or remove structure elements before it! - */ - union { - mr_unit_ic_t *_mr_ic; - uint_t _mr_ic_pad[2]; - } un_mr_ic; - - mr_column_t un_column[1]; -} mr_unit_t; - -#define mr_ic un_mr_ic._mr_ic -#define un_pbuffer mr_ic->_t_un_pbuffer -#define un_dbuffer mr_ic->_t_un_dbuffer -#define un_linlck_chn mr_ic->_t_un_linlck_chn -#define un_linlck_mx mr_ic->_t_un_linlck_mx -#define un_linlck_cv mr_ic->_t_un_linlck_cv -#define un_mx mr_ic->_t_un_mx -#define un_cv mr_ic->_t_un_cv -#define un_column_ic mr_ic->_t_un_column_ic - -/* - * For old 32 bit format use only - */ -typedef struct mr_unit32_od { - mdc_unit32_od_t c; - caddr32_t xx_un_raid_res; - uint_t un_magic; - rus_state_t un_state; - struct timeval32 un_timestamp; - uint_t un_origcolumncnt; - uint_t un_totalcolumncnt; - uint_t un_rflags; - uint_t un_segsize; - uint_t un_segsincolumn; - uint_t un_maxio; - uint_t un_iosize; - caddr32_t xx_un_pbuffer; - caddr32_t xx_un_dbuffer; - uint_t un_linlck_flg; - caddr32_t xx_un_linlck_chn; - uint_t un_pwcnt; - uint_t un_pwsize; - long long un_pwid; - uint_t un_rebuild_size; - uint_t un_percent_done; - union { - struct { - uint_t _t_un_resync_segment; - int _t_un_resync_index; - uint_t _t_un_resync_line_index; - } _resync; - struct { - daddr32_t _t_un_grow_tb; - uint_t _t_un_init_colcnt; - uint_t _t_un_init_iocnt; - } _init; - } _t_un; - uint_t un_resync_copysize; - - /* - * This spot is 8 byte aligned!!! - * Don't change this arrangement. - */ - union { - struct { - mr_unit_ic_t *_t_mr_ic; - } _mric; - struct { - uint_t xx_un_linlck_mx[2]; - } _lckmx; - } _unic; - - short xx_un_linlck_cv; - int xx_un_mx[2]; - short xx_un_cv; - hsp_t un_hsp_id; - mr_column32_od_t un_column[1]; -} mr_unit32_od_t; - -typedef struct raid_pwhdr { - uint_t rpw_magic; - uint_t rpw_sum; - int rpw_columnnum; - diskaddr_t rpw_blkno; - uint_t rpw_blkcnt; - long long rpw_id; - uint_t rpw_colcount; - uint_t rpw_column; - uint_t rpw_unit; - uint_t rpw_magic_ext; - uint_t rpw_origcolumncnt; - uint_t rpw_totalcolumncnt; - uint_t rpw_segsize; - diskaddr_t rpw_segsincolumn; - uint_t rpw_pwcnt; - uint_t rpw_pwsize; - diskaddr_t rpw_devstart; - diskaddr_t rpw_pwstart; - char rpw_filler[12]; -} raid_pwhdr_t; - -/* - * For old 32 bit pre-write area - */ -typedef struct raid_pwhdr32_od { - uint_t rpw_magic; - uint_t rpw_sum; - int rpw_columnnum; - daddr32_t rpw_blkno; - daddr32_t rpw_blkcnt; - long long rpw_id; - uint_t rpw_colcount; - uint_t rpw_column; - uint_t rpw_unit; - uint_t rpw_magic_ext; - uint_t rpw_origcolumncnt; - uint_t rpw_totalcolumncnt; - uint_t rpw_segsize; - uint_t rpw_segsincolumn; - uint_t rpw_pwcnt; - uint_t rpw_pwsize; - uint_t rpw_devstart; - uint_t rpw_pwstart; - rus_state_t rpw_unit_state; - rcs_state_t rpw_next_column_state; - rcs_state_t rpw_prev_column_state; -} raid_pwhdr32_od_t; -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack() -#endif - -#ifdef _KERNEL - -/* - * the buffer header is only bp_mapin if it is needed. It is needed on - * all writes and on some reads. ps_mapin is non zero if the buffer is - * maped in. ps_mapin_mx protect ps_mapin. The protocol for usage is - * - * 1) check for non-zero and continue if non-zero - * 2) aquire the ps_mapin_mx - * 3) recheck for non-zero and continue if non-zero - * 4) bp_mapin - * 5) set ps_mapin to non-zero - * 6) drop ps_mapin_mx - * - * the reason for this is to avoid the mutex when possible. - */ -typedef struct md_raidps { /* raid parent save */ - DAEMON_QUEUE - uint_t ps_magic; - mr_unit_t *ps_un; - mdi_unit_t *ps_ui; - buf_t *ps_bp; - caddr_t ps_addr; - int ps_flags; - int ps_error; - int ps_frags; - int ps_pwfrags; - int ps_mapin; /* buffer maped in if non zero */ - kmutex_t ps_mx; - kmutex_t ps_mapin_mx; /* protects ps_mapin */ -} md_raidps_t; - -/* flags for parent save area */ - -#define MD_RPS_ERROR 0x0001 -#define MD_RPS_READ 0x0020 -#define MD_RPS_WRITE 0x0040 -#define MD_RPS_DONE 0x0080 -#define MD_RPS_INUSE 0x0100 -#define MD_RPS_IODONE 0x0200 -#define MD_RPS_HSREQ 0x0400 - -/* - * used in cs_state to describe the type of io operation in progress - */ -enum raid_io_stage { - RAID_NONE = 0x0, - RAID_READ_DONE = 0x1, - RAID_WRITE_DONE = 0x2, - RAID_PREWRITE_DONE = 0x4, - RAID_WRITE_PONLY_DONE = 0x8, - RAID_WRITE_DONLY_DONE = 0x10, - RAID_LINE_PWDONE = 0x20 -}; - -typedef struct md_raidcbuf { - DAEMON_QUEUE - uint_t cbuf_magic; - struct md_raidcbuf *cbuf_next; /* 0x10 */ - mr_unit_t *cbuf_un; - md_raidps_t *cbuf_ps; - int cbuf_column; - size_t cbuf_bcount; /* 0x20 */ - caddr_t cbuf_buffer; - int cbuf_sum; - int cbuf_pwslot; - int cbuf_pwcnt; /* 0x30 */ - int cbuf_flags; - buf_t cbuf_bp; - uint_t cbuf_pad[4]; -} md_raidcbuf_t; -#define CBUF_PW_INVALIDATE (0x00000001) -#define CBUF_WRITE (0x00000002) - -typedef struct md_raidcs { - DAEMON_QUEUE - uint_t cs_magic; - minor_t cs_mdunit; - mr_unit_t *cs_un; - int cs_flags; - md_raidps_t *cs_ps; - diskaddr_t cs_line; - void (*cs_call)(); - void (*cs_error_call)(); - void (*cs_retry_call)(); - struct md_raidcs *cs_linlck_next; - struct md_raidcs *cs_linlck_prev; - long long cs_pwid; - int cs_dcolumn; - int cs_dpwslot; - uint_t cs_dflags; - int cs_pcolumn; - int cs_ppwslot; - uint_t cs_pflags; - size_t cs_bcount; - uint_t cs_blkcnt; - diskaddr_t cs_blkno; - diskaddr_t cs_lastblk; - int cs_loop; - caddr_t cs_addr; /* base address of io */ - off_t cs_offset; /* offset into the base */ - caddr_t cs_dbuffer; - caddr_t cs_pbuffer; - int cs_frags; - int cs_strategy_flag; - void *cs_strategy_private; - md_raidcbuf_t *cs_buflist; - int cs_error; - int cs_resync_check; - int cs_rstate; - enum raid_io_stage cs_stage; /* current io stage */ - md_raidcbuf_t *cs_pw_inval_list; - - kmutex_t cs_mx; - - buf_t cs_pbuf; - uint_t cs_pad1; - buf_t cs_hbuf; - uint_t cs_pad2; - /* Add new structure members HERE!! */ - buf_t cs_dbuf; - /* DO NOT add struture members here; cs_dbuf is dynamically sized */ -} md_raidcs_t; - -/* value definitions for cs_resync_check */ -#define RCL_OKAY 0x01 /* write to both orig and alt */ -#define RCL_ERRED 0x08 /* treat column as rcs_ERRED */ - -#define RCL_DATA_MASK 0x000000ff -#define RCL_PARITY_MASK 0x0000ff00 -#define RCL_PARITY_OFFSET 8 /* insure masks match offset */ - -#define RCL_PARITY(value) (((value) & RCL_PARITY_MASK) >> \ - RCL_PARITY_OFFSET) - -#define RCL_DATA(value) ((value) & RCL_DATA_MASK) - -/* value definitions for cs_flags */ -#define MD_RCS_ISCALL 0x000001 /* call cs_call in interrupt */ -#define MD_RCS_UNDBUF 0x000002 /* holding unit data buffer */ -#define MD_RCS_UNPBUF 0x000004 /* holding unit parity buffer */ -#define MD_RCS_MPBUF 0x000008 -#define MD_RCS_HAVE_PW_SLOTS 0x000010 /* pw slots gotten */ -#define MD_RCS_PWDONE 0x000040 /* pwfrags are decremented */ -#define MD_RCS_READER 0x000100 /* reader line lock needed */ -#define MD_RCS_WRITER 0x000200 /* writer line lock needed */ -#define MD_RCS_LLOCKD 0x000400 /* line lock held */ -#define MD_RCS_WAITING 0x000800 /* line lock waiting */ -#define MD_RCS_LINE 0x001000 /* full line write */ -#define MD_RCS_ERROR 0x010000 /* I/O error on this child */ -#define MD_RCS_RECOVERY 0x020000 - -/* value definitions for cs_pflags or cs_dflags */ -#define MD_RCS_ISUP 0x0002 - -/* value definitions for gcs_flags */ -#define MD_RGCS_ALLOCBUF 0x0001 -/* returned value from raid_replay() */ -#define RAID_RPLY_SUCCESS 0x0000 -#define RAID_RPLY_ALLOCFAIL 0x0001 -#define RAID_RPLY_COMPREPLAY 0x0002 -#define RAID_RPLY_READONLY 0x0004 -#define RAID_RPLY_EIO 0x0008 - -typedef struct raid_rplybuf { - caddr_t rpl_data; - buf_t *rpl_buf; -} raid_rplybuf_t; - -typedef struct raid_rplylst { - struct raid_rplylst *rpl_next; - uint_t rpl_colcnt; - long long rpl_id; - int rpl_column1; - uint_t rpl_slot1; - raid_pwhdr_t rpl_pwhdr1; - int rpl_column2; - uint_t rpl_slot2; - raid_pwhdr_t rpl_pwhdr2; -} raid_rplylst_t; - -/* Externals from raid.c */ -extern int raid_build_incore(void *, int); -extern void reset_raid(mr_unit_t *, minor_t, int); - -/* Externals from raid_ioctl.c */ -extern int md_raid_ioctl(dev_t dev, int cmd, void *data, - int mode, IOLOCK *lockp); - -/* rename named service functions */ -md_ren_svc_t raid_rename_check; -md_ren_svc_t raid_rename_lock; -md_ren_void_svc_t raid_rename_unlock; - - -/* redefinitions of the union shared by resync and init */ -#define un_resync_segment _t_un._resync._t_un_resync_segment -#define un_resync_index _t_un._resync._t_un_resync_index -#define un_resync_line_index _t_un._resync._t_un_resync_line_index - -#define un_grow_tb _t_un._init._t_un_grow_tb -#define un_init_colcnt _t_un._init._t_un_init_colcnt -#define un_init_iocnt _t_un._init._t_un_init_iocnt - -#define MD_RFLAG_NEEDBUF (0x0001) -#define MD_RFLAG_CLEAR (0x0002) -#define MD_RFLAG_KEEP (0x0004) -#define MD_RFLAG_NEEDPW (0x0008) - - -extern void raid_set_state(mr_unit_t *un, int col, - rcs_state_t new_state, int force); -extern int raid_replay(mr_unit_t *un); -extern void raid_commit(mr_unit_t *un, mddb_recid_t *extras); -extern char *raid_unit_state(rus_state_t state); -extern intptr_t raid_hotspares(); -extern void raid_hs_release(hs_cmds_t cmd, mr_unit_t *un, - mddb_recid_t *recids, int hs_index); -extern int raid_internal_open(minor_t mnum, int flag, int otyp, - int oflags); -extern int raid_internal_close(minor_t mnum, int otyp, - int init_pw, int cflags); -extern int raid_build_pwslot(mr_unit_t *unit, int column_index); -extern void raid_free_pwslot(mr_unit_t *unit, int column_index); -extern void release_resync_request(minor_t mnum); -extern int resync_request(minor_t mnum, int column_index, - size_t copysize, md_error_t *ep); -extern int raid_resync_unit(minor_t mnum, md_error_t *ep); -extern void raid_line_reader_lock(md_raidcs_t *cs, - int resync_thread); -extern void raid_line_exit(md_raidcs_t *cs); -extern int raid_state_cnt(mr_unit_t *un, rcs_state_t state); -extern int raid_build_pw_reservation(mr_unit_t *un, - int colindex); -extern int init_pw_area(mr_unit_t *un, md_dev64_t dev_to_write, - diskaddr_t pwstart, uint_t col); -extern void init_buf(buf_t *bp, int flags, size_t size); -extern void destroy_buf(buf_t *bp); -extern void reset_buf(buf_t *bp, int flags, size_t size); -extern void md_raid_strategy(buf_t *pb, int flag, void *private); -extern void raid_free_pw_reservation(mr_unit_t *un, - int colindex); -extern void raid_fillin_rpw(mr_unit_t *un, - raid_pwhdr_t *pwhdrp, int col); -#endif /* _KERNEL */ - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS_MD_RAID_H */ diff --git a/usr/src/uts/common/sys/lvm/md_rename.h b/usr/src/uts/common/sys/lvm/md_rename.h deleted file mode 100644 index 5df0eebb0f62..000000000000 --- a/usr/src/uts/common/sys/lvm/md_rename.h +++ /dev/null @@ -1,169 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SYS_MD_RENAME_H -#define _SYS_MD_RENAME_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include -#include -#include -#ifdef DEBUG -#include -#endif -#include - -#ifdef __cplusplus -extern "C" { -#endif - -#include - -/* - * rename/exchange common definitions - */ -#define MD_RENAME_VERSION_OFFLINE (1) /* top-level must be offline */ -#define MD_RENAME_VERSION_ONLINE (2) /* includes offline too */ - -/* - * current protocol only allows offline exchange - */ -#define MD_RENAME_VERSION MD_RENAME_VERSION_OFFLINE - -/* - * The rename (aka. exchange) function is implemented as the - * following set of named services. Many of these are implemented - * generically and only overridden when a specific driver needs - * special care. - */ - -#if defined(_KERNEL) - -#define MDRNM_LIST_URFOLKS "rename svc: list your parents" -#define MDRNM_LIST_URSELF "rename svc: list your self" -#define MDRNM_LIST_URKIDS "rename svc: list your children" - -#define MDRNM_LOCK "rename svc: lock" -#define MDRNM_UNLOCK "rename svc: unlock" -#define MDRNM_CHECK "rename svc: check state" - -/* role swappers */ -#define MDRNM_UPDATE_KIDS "rename svc: parent update children" -#define MDRNM_PARENT_UPDATE_TO "rename svc: parent update to" -#define MDRNM_SELF_UPDATE_FROM_UP "rename svc: self update from up" -#define MDRNM_UPDATE_SELF "rename svc: self update self" -#define MDRNM_SELF_UPDATE_FROM_DOWN "rename svc: self update from down" -#define MDRNM_CHILD_UPDATE_TO "rename svc: child update to" -#define MDRNM_UPDATE_FOLKS "rename svc: child update parents" - -typedef enum md_rename_role_t { - MDRR_UNK = 0, - MDRR_PARENT = 1, - MDRR_SELF = 2, - MDRR_CHILD = 3, - MDRR_NROLES = MDRR_CHILD -} md_renrole_t; - -typedef struct md_rendelta_status { - uint_t spare_beg :1; - uint_t locked :1; - uint_t checked :1; - uint_t role_swapped :1; - uint_t unlocked :1; - uint_t spacer :2; - uint_t is_open :1; - uint_t spare_end; -} md_rendstat_t; - -typedef struct md_rentxn_status { - uint_t spare_beg :1; - uint_t trans_in_stack :1; - uint_t spare_end; -} md_rentstat_t; - -typedef struct md_rename_transaction { - u_longlong_t beginning; - md_error_t mde; - md_renop_t op; - int revision; - uint_t uflags; - int rec_idx; - mddb_recid_t *recids; - int n_recids; - md_rentstat_t stat; - - struct md_rename_txn_unit_state { - u_longlong_t beginning; - minor_t mnum; - mdi_unit_t *uip; - md_unit_t *unp; - key_t key; - kstat_t *kstatp; - u_longlong_t end; - - } from, to; - u_longlong_t end; -} md_rentxn_t; - -typedef struct md_rendelta md_rendelta_t; - -typedef void md_ren_void_svc_t(md_rendelta_t *, md_rentxn_t *); -typedef intptr_t md_ren_svc_t(md_rendelta_t *, md_rentxn_t *); -typedef int md_ren_list_svc_t(md_rendelta_t **, md_rentxn_t *); - -typedef md_ren_void_svc_t md_ren_roleswap_svc_t; - -struct md_rendelta { - u_longlong_t beginning; - md_rendelta_t *next, *prev; - md_dev64_t dev; - md_renrole_t old_role, new_role; - md_unit_t *unp; - mdi_unit_t *uip; - - md_ren_svc_t *lock; - md_ren_void_svc_t *unlock; - md_ren_svc_t *check; - md_ren_roleswap_svc_t *role_swap; - - md_rendstat_t txn_stat; - u_longlong_t end; -}; - -/* Externals from md_rename.c */ -extern int md_rename(md_rename_t *, IOLOCK *); -extern md_rendelta_t *md_build_rendelta(md_renrole_t, md_renrole_t, - md_dev64_t, md_rendelta_t *, md_unit_t *, mdi_unit_t *, md_error_t *); -extern void md_store_recid(int *, mddb_recid_t *, md_unit_t *); - -#endif /* _KERNEL */ - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS_MD_RENAME_H */ diff --git a/usr/src/uts/common/sys/lvm/md_sp.h b/usr/src/uts/common/sys/lvm/md_sp.h deleted file mode 100644 index 5aa3547b24d7..000000000000 --- a/usr/src/uts/common/sys/lvm/md_sp.h +++ /dev/null @@ -1,163 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SYS__MD_SP_H -#define _SYS__MD_SP_H - -#include - -#ifdef __cplusplus -extern "C" { -#endif - -#define META_SP_DEBUG ("META_SP_DEBUG") - -/* on-disk structures */ -#define MD_SP_MAGIC (0x20000127) -/* number of sectors to reserve at the beginning of the volume */ -#define MD_SP_START (0) -/* current watermark version number */ -#define MD_SP_VERSION (1) -/* size of a watermark in sectors */ -#define MD_SP_WMSIZE (1) -/* free watermark name */ -#define MD_SP_FREEWMNAME "free" -/* local set name */ -#define MD_SP_LOCALSETNAME "" -/* maximum length of a soft partition metadevice name. eg. dXXXX\0 */ -#define MD_SP_MAX_DEVNAME_PLUS_1 (6) - -/* - * The size of this structure is forced to be 512 bytes (ie a sector) by - * using a union. Note the MD_MAX_SETNAME_PLUS_1 is set in meta_basic.h - */ - -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack(4) -#endif -typedef union mp_watermark { - struct { - uint32_t wm_magic; /* magic number */ - uint32_t wm_version; /* version number */ - uint32_t wm_checksum; /* structure checksum */ - uint32_t wm_seq; /* sequence number */ - uint32_t wm_type; /* extent type */ - uint64_t wm_length; /* length of extent */ - char wm_mdname[MD_MAX_SETNAME_PLUS_1 + - MD_SP_MAX_DEVNAME_PLUS_1]; /* SP name */ - char wm_setname[MD_MAX_SETNAME_PLUS_1]; /* setname */ - } wm; - uchar_t wm_pad[MD_SP_WMSIZE * DEV_BSIZE]; -} mp_watermark_t; -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack() -#endif - -#define wm_magic wm.wm_magic -#define wm_version wm.wm_version -#define wm_checksum wm.wm_checksum -#define wm_seq wm.wm_seq -#define wm_type wm.wm_type -#define wm_length wm.wm_length -#define wm_mdname wm.wm_mdname -#define wm_setname wm.wm_setname - -/* Watermark types */ -typedef enum sp_ext_type { - EXTTYP_ALLOC = 0x1, /* this extent is in use by a soft partition */ - EXTTYP_FREE = 0x2, /* extent is not in use */ - EXTTYP_END = 0x3, /* last descriptor on the volume */ - EXTTYP_RESERVED = 0x4 /* extent will not be used or updated */ -} sp_ext_type_t; - -/* ioctls */ -#define MD_IOC_SPSTATUS (MDIOC_MISC|0) -#define MD_IOC_SPUPDATEWM (MDIOC_MISC|1) -#define MD_IOC_SPREADWM (MDIOC_MISC|2) -#define MD_MN_IOC_SPUPDATEWM (MDIOC_MISC|3) - -#ifdef _KERNEL - -/* - * parent and child save areas provide the mechanism for tracking - * I/O operations in the metadevice stack. - */ - -/* soft partitioning parent save area */ -typedef struct md_spps { /* soft partition parent save */ - DAEMON_QUEUE - mp_unit_t *ps_un; /* sp unit structure */ - mdi_unit_t *ps_ui; /* incore unit struct */ - buf_t *ps_bp; /* parent buffer */ - caddr_t ps_addr; - int ps_frags; - int ps_flags; - /* - * New structure members should be added here; fields added - * after ps_mx will not be zeroed during initialization. - */ - kmutex_t ps_mx; -} md_spps_t; - -/* parent save flags. */ -#define MD_SPPS_ERROR 0x0001 -#define MD_SPPS_DONTFREE 0x0002 -#define MD_SPPS_DONE 0x0004 - -/* soft partitioning child save area */ -typedef struct md_spcs { - DAEMON_QUEUE - minor_t cs_mdunit; /* child minor number */ - md_spps_t *cs_ps; /* parent save pointer */ - /* Add new structure members HERE!! */ - buf_t cs_buf; /* child buffer */ - /* DO NOT add struture members here; cs_buf is dynamically sized */ -} md_spcs_t; - -#define SPPS_FREE(kc, ps) \ -{ \ - if ((ps)->ps_flags & MD_SPPS_DONTFREE) \ - (ps)->ps_flags |= MD_SPPS_DONE; \ - else \ - kmem_cache_free((kc), (ps)); \ -} - -/* externals from sp.c */ -extern int sp_build_incore(void *, int); -extern void reset_sp(mp_unit_t *, minor_t, int); -extern int sp_directed_read(minor_t, vol_directed_rd_t *, int); - -/* externals from sp_ioctl.c */ -extern int md_sp_ioctl(dev_t dev, int cmd, void *data, - int mode, IOLOCK *lockp); - -#endif /* _KERNEL */ - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS__MD_SP_H */ diff --git a/usr/src/uts/common/sys/lvm/md_stripe.h b/usr/src/uts/common/sys/lvm/md_stripe.h deleted file mode 100644 index 1db7fda264c1..000000000000 --- a/usr/src/uts/common/sys/lvm/md_stripe.h +++ /dev/null @@ -1,164 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SYS__MD_STRIPE_H -#define _SYS__MD_STRIPE_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * ms_comp32_od is for old 32 bit format only - */ -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack(4) -#endif -typedef struct ms_comp32_od { - mdkey_t un_key; - dev32_t un_dev; - daddr32_t un_start_block; /* comp start blkno */ - md_m_shared32_od_t un_mirror; -} ms_comp32_od_t; - - -typedef struct ms_comp { /* components */ - mdkey_t un_key; /* namespace key */ - md_dev64_t un_dev; /* device number, 64 bit */ - diskaddr_t un_start_block; /* comp start blkno */ - md_m_shared_t un_mirror; /* mirror shared data */ -} ms_comp_t; -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack() -#endif - -/* - * ms_unit32_od is for old 32 bit format only - */ -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack(4) -#endif -typedef struct ms_unit32_od { - mdc_unit32_od_t c; - int un_hsp_id; /* hot spare pool db record id */ - uint_t un_nrows; /* number of rows */ - uint_t un_ocomp; /* offset of ms_comp array */ - struct ms_row32_od { - int un_icomp; /* ms_comp array index of first comp */ - uint_t un_ncomp; /* # comps in this row */ - int un_blocks; /* total blocks in this row */ - int un_cum_blocks; /* cum. blks in this and prev. rows */ - int un_interlace; /* # blks from each disk in a stripe */ - }un_row[1]; -} ms_unit32_od_t; - - -typedef struct ms_unit { - mdc_unit_t c; - int un_hsp_id; /* hot spare pool db record id */ - uint_t un_nrows; /* number of rows */ - uint_t un_ocomp; /* offset of ms_comp array */ - struct ms_row { - int un_icomp; /* ms_comp array index of first comp */ - uint_t un_ncomp; /* # comps in this row */ - diskaddr_t un_blocks; /* total blocks in this row */ - diskaddr_t un_cum_blocks; /* cum. blks in rows */ - diskaddr_t un_interlace; /* # blks from each disk */ - }un_row[1]; -} ms_unit_t; -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack() -#endif - -#ifdef _KERNEL - -typedef struct md_sps { /* stripe parent save */ - DAEMON_QUEUE - ms_unit_t *ps_un; - mdi_unit_t *ps_ui; - buf_t *ps_bp; - caddr_t ps_addr; - int ps_frags; - int ps_flags; - ms_comp_t *ps_errcomp; - /* - * New structure members should be added here; fields added - * after ps_mx will not be zeroed during initialization. - */ - kmutex_t ps_mx; -} md_sps_t; - -#define MD_SPS_ERROR 0x0001 -#define MD_SPS_DONTFREE 0x0002 -#define MD_SPS_DONE 0x0004 - -#define SPS_FREE(kc, ps) \ -{ \ - if ((ps)->ps_flags & MD_SPS_DONTFREE) \ - (ps)->ps_flags |= MD_SPS_DONE; \ - else \ - kmem_cache_free((kc), (ps)); \ -} - -typedef struct md_scs { - DAEMON_QUEUE - minor_t cs_mdunit; - md_sps_t *cs_ps; - ms_comp_t *cs_comp; - /* Add new structure members HERE!! */ - buf_t cs_buf; - /* DO NOT add struture members here; cs_buf is dynamically sized */ -} md_scs_t; - -/* Externals from stripe.c */ -extern int stripe_build_incore(void *, int); -extern void reset_stripe(ms_unit_t *, minor_t, int); -extern intptr_t stripe_component_count(md_dev64_t, void *); -extern intptr_t stripe_get_dev(md_dev64_t, void *, int, ms_cd_info_t *); -extern intptr_t stripe_replace_dev(md_dev64_t, void *, int, ms_new_dev_t *, - mddb_recid_t *, int, void (**)(), void **); -extern void stripe_replace_done(md_dev64_t, sv_dev_t *); - -/* Externals from stripe_ioctl.c */ -extern int md_stripe_ioctl(dev_t dev, int cmd, void *data, - int mode, IOLOCK *lockp); - -/* rename named service functions (stripe_ioctl.c) */ -md_ren_svc_t stripe_rename_check; - -#endif /* _KERNEL */ - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS__MD_STRIPE_H */ diff --git a/usr/src/uts/common/sys/lvm/md_trans.h b/usr/src/uts/common/sys/lvm/md_trans.h deleted file mode 100644 index 321f1d4d90ea..000000000000 --- a/usr/src/uts/common/sys/lvm/md_trans.h +++ /dev/null @@ -1,973 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SYS_MD_TRANS_H -#define _SYS_MD_TRANS_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include -#include -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -#define LDL_META_SBLK (16) - -#define LDL_MINLOGSIZE (1024*1024) -#define LDL_MAXLOGSIZE (1024*1024*1024) -#define LDL_MINBUFSIZE (32*1024) -#define LDL_USABLE_BSIZE (DEV_BSIZE - sizeof (sect_trailer_t)) -#define NB_LEFT_IN_SECTOR(off) (LDL_USABLE_BSIZE - ((off) - dbtob(btodb(off)))) - -typedef struct cirbuf32 { - caddr32_t xx_cb_bp; /* buf's with space in circular buf */ - caddr32_t xx_cb_dirty; /* filling this buffer for log write */ - caddr32_t xx_cb_free; /* free bufs list */ - caddr32_t xx_cb_va; /* address of circular buffer */ - uint_t xx_cb_nb; /* size of circular buffer */ - uint_t xx_cb_rwlock[3]; /* r/w lock to protect list mgmt. */ -} cirbuf32_t; - -typedef struct cirbuf_ic { - buf_t *cb_bp; /* buf's with space in circular buf */ - buf_t *cb_dirty; /* filling this buffer for log write */ - buf_t *cb_free; /* free bufs list */ - caddr_t cb_va; /* address of circular buffer */ - size_t cb_nb; /* size of circular buffer */ - md_krwlock_t cb_rwlock; /* r/w lock to protect list mgmt. */ -} cirbuf_ic_t; - - -typedef struct ml_unit { - uint_t un_revision; /* revision number */ - /* - * mdd infrastructure stuff - */ - mddb_recid_t un_recid; /* db record id */ - mdkey_t un_key; /* namespace key */ - md_dev64_t un_dev; /* device number */ - uint_t un_opencnt; /* open count */ - - /* - * metatrans infrastructure stuff - */ - uint_t un_transcnt; /* #open metatrans devices */ - - /* - * log specific stuff - */ - off32_t un_head_lof; /* byte offset of head */ - uint_t un_head_ident; /* head sector id # */ - off32_t un_tail_lof; /* byte offset of tail */ - uint_t un_tail_ident; /* tail sector id # */ - off32_t un_bol_lof; /* byte offset of begin of log */ - off32_t un_eol_lof; /* byte offset of end of log */ - daddr32_t un_nblks; /* total blocks of log space */ - daddr32_t un_tblks; /* total blocks in log device */ - uint_t un_maxtransfer; /* max transfer in bytes */ - uint_t un_status; /* status bits */ - uint_t un_maxresv; /* maximum reservable space */ - daddr32_t un_pwsblk; /* block number of prewrite area */ - ulong_t un_devbsize; /* device bsize */ - uint_t un_resv; /* reserved byte count for this trans */ - uint_t un_resv_wantin; /* reserved byte count for next trans */ - mt_l_error_t un_error; /* error state */ - uint_t un_tid; /* used during logscan */ - uint_t un_head_tid; /* used for logscan; set at sethead */ - struct timeval32 un_timestamp; /* time of last state change */ - /* - * spares - */ - uint_t un_spare[16]; - /* - * following are incore only elements. - * Incore elements must always be at the end - * of this data struture. - */ - struct ml_unit *un_next; - struct mt_unit *un_utlist; - struct mt_map *un_logmap; - cirbuf_ic_t un_rdbuf; - cirbuf_ic_t un_wrbuf; - kmutex_t un_log_mutex; -} ml_unit_t; - - -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack(4) -#endif -typedef struct ml_unit32_od { - uint_t un_revision; /* revision number */ - /* - * mdd infrastructure stuff - */ - caddr32_t xx_un_next; /* next log unit struct */ - mddb_recid_t un_recid; /* db record id */ - mdkey_t un_key; /* namespace key */ - dev32_t un_dev; /* device number */ - uint_t un_opencnt; /* open count */ - - /* - * metatrans infrastructure stuff - */ - uint_t un_transcnt; /* #open metatrans devices */ - caddr32_t xx_un_utlist; /* list of metatrans devices */ - caddr32_t xx_un_logmap; /* address of logmap */ - - /* - * log specific stuff - */ - off32_t un_head_lof; /* byte offset of head */ - uint_t un_head_ident; /* head sector id # */ - off32_t un_tail_lof; /* byte offset of tail */ - uint_t un_tail_ident; /* tail sector id # */ - off32_t un_bol_lof; /* byte offset of begin of log */ - off32_t un_eol_lof; /* byte offset of end of log */ - daddr32_t un_nblks; /* total blocks of log space */ - daddr32_t un_tblks; /* total blocks in log device */ - uint_t un_maxtransfer; /* max transfer in bytes */ - uint_t un_status; /* status bits */ - uint_t un_maxresv; /* maximum reservable space */ - daddr32_t un_pwsblk; /* block number of prewrite area */ - uint_t un_devbsize; /* device bsize */ - uint_t un_resv; /* reserved byte count for this trans */ - uint_t un_resv_wantin; /* reserved byte count for next trans */ - mt_l_error_t un_error; /* error state */ - uint_t un_tid; /* used during logscan */ - uint_t un_head_tid; /* used for logscan; set at sethead */ - cirbuf32_t xx_un_rdbuf; /* read buffer space */ - cirbuf32_t xx_un_wrbuf; /* write buffer space */ - int xx_un_log_mutex[2]; /* allows one log write at a time */ - struct timeval32 un_timestamp; /* time of last state change */ - /* - * spares - */ - uint_t un_spare[16]; -} ml_unit32_od_t; -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack() -#endif - - - -#define ML_UNIT_ONDSZ ((size_t)((caddr_t)&((ml_unit_t *)0)->un_spare[15] +\ - sizeof (uint_t))) - - -/* - * un_status - */ -#define LDL_BEING_RESET 0x0001 /* delete the log record at snarf */ -#define LDL_FIND_TAIL 0x0002 /* find tail of the log */ -#define LDL_SCAN_ACTIVE 0x0004 /* log scan in progress */ -#define LDL_METADEVICE 0x0008 /* underlying device is metadevice */ -#define LDL_PWVALID 0x0010 /* prewrite area is valid */ -#define LDL_INFO 0x0020 /* prewrite state is valid */ - -typedef struct sect_trailer { - uint_t st_tid; /* transaction id */ - uint_t st_ident; /* unique sector id */ -} sect_trailer_t; - - -/* - * ioctls - */ -#define MD_IOCGET_LOG (MDIOC_MISC|0) -#define MD_IOC_DEBUG (MDIOC_MISC|4) -#define MD_IOCGET_TRANSSTATS (MDIOC_MISC|5) -#define MD_IOC_TSD (MDIOC_MISC|6) -#define MD_IOC_TRYGETBLK (MDIOC_MISC|7) -#define MD_IOC_TRYPAGE (MDIOC_MISC|8) -#define MD_IOC_SETSHADOW (MDIOC_MISC|11) -#define MD_IOC_INJECTERRORS (MDIOC_MISC|13) -#define MD_IOC_STOPERRORS (MDIOC_MISC|14) -#define MD_IOC_UFSERROR (MDIOC_MISC|15) -#define MD_IOC_ISDEBUG (MDIOC_MISC|17) - -#define MD_IOC_TRANS_DETACH (MDIOC_MISC|32) - -/* - * following bits are used in status word in the common section - * of unit structure - */ -#define MD_UN_LOG_DELETED (0x00010000) /* don't need to del @snarf */ - -/* - * map block - */ -#define MAPBLOCKSIZE (8192) -#define MAPBLOCKSHIFT (13) -#define MAPBLOCKOFF (MAPBLOCKSIZE-1) -#define MAPBLOCKMASK (~MAPBLOCKOFF) - -/* - * delta header - */ -struct delta { - offset_t d_mof; - off32_t d_nb; - dev32_t d_dev; - delta_t d_typ; -}; - -/* - * common map entry - */ -typedef struct mapentry mapentry_t; -struct mapentry { - /* - * doubly linked list of all mapentries in map -- MUST BE FIRST - */ - mapentry_t *me_next; - mapentry_t *me_prev; - - mapentry_t *me_hash; - mapentry_t *me_agenext; - mapentry_t *me_cancel; - int (*me_func)(); - uintptr_t me_arg; - off_t me_lof; - uint_t me_flags; - uint_t me_tid; - uint_t me_age; - struct delta me_delta; -}; - -#define me_mof me_delta.d_mof -#define me_nb me_delta.d_nb -#define me_dt me_delta.d_typ -#define me_dev me_delta.d_dev - -/* - * me_flags - */ -#define ME_FREE (0x0001) /* on free list */ -#define ME_HASH (0x0002) /* on hash list */ -#define ME_CANCEL (0x0004) /* on cancel list */ -#define ME_AGE (0x0008) /* on age list */ -#define ME_LIST (0x0010) /* on list list */ -#define ME_ROLL (0x0020) /* on pseudo-roll list */ - -/* - * TRANSACTION OPS STATS - * mt_top_size_* should be 64bit but that would - * require test recompilations. It does not hurt the kernel - * so leave as 32 bit for now. - */ -struct topstats { - uint_t mtm_top_num[TOP_MAX]; - uint_t mtm_top_size_etot[TOP_MAX]; - uint_t mtm_top_size_rtot[TOP_MAX]; - uint_t mtm_top_size_max[TOP_MAX]; - uint_t mtm_top_size_min[TOP_MAX]; - uint_t mtm_delta_num[DT_MAX]; -}; - -/* - * MAP STATS (global struct that is not updated if compiled w/o ASSERTs) - * some members of transstats need to be 64bit. See the comment above. - */ -struct transstats { - /* trans.c */ - uint_t ts_trans_zalloc; - uint_t ts_trans_zalloc_nosleep; - uint_t ts_trans_alloc; - uint_t ts_trans_alloc_nosleep; - uint_t ts_trans_free; - uint_t ts_trans_alloced; - uint_t ts_trans_freed; - uint_t ts_trans_write; - uint_t ts_trans_write_roll; - - /* trans_delta.c */ - uint_t ts_mapentry_alloc; - uint_t ts_mapentry_alloc_list; - uint_t ts_mapentry_free; - - uint_t ts_delta_add; - uint_t ts_delta_add_scan; - uint_t ts_delta_add_hit; - - uint_t ts_delta_remove; - uint_t ts_delta_remove_scan; - uint_t ts_delta_remove_hit; - - uint_t ts_delta_del; - uint_t ts_delta_del_scan; - - uint_t ts_delta_push; - - uint_t ts_overlap; - uint_t ts_overlap_scan; - uint_t ts_overlap_hit; - - uint_t ts_remove_roll; - uint_t ts_remove_roll_scan; - uint_t ts_remove_roll_hit; - uint_t ts_remove_roll_dolock; - uint_t ts_remove_roll_sud; - - uint_t ts_next_roll; - uint_t ts_next_roll_scan; - uint_t ts_next_roll_hit; - - uint_t ts_list_age; - uint_t ts_list_age_scan; - - uint_t ts_list_get; - uint_t ts_list_get_scan; - uint_t ts_list_get_hit; - uint_t ts_list_get_again; - - uint_t ts_list_put; - uint_t ts_list_put_scan; - - uint_t ts_read_mstr; - - uint_t ts_logmap_secmap_roll; - - uint_t ts_read_log; - - uint_t ts_logmap_abort; - uint_t ts_logmap_abort_hit; - - uint_t ts_list_add; - uint_t ts_list_add_scan; - uint_t ts_list_add_cancel; - uint_t ts_list_add_unhash; - - uint_t ts_free_cancel; - uint_t ts_free_cancel_again; - uint_t ts_free_cancel_scan; - uint_t ts_free_cancel_hit; - - uint_t ts_commit; - uint_t ts_commit_hit; - - uint_t ts_logmap_roll_dev; - uint_t ts_logmap_roll_dev_scan; - uint_t ts_logmap_roll_dev_hit; - - uint_t ts_logmap_roll_sud; - uint_t ts_logmap_roll_sud_hit; - - uint_t ts_logmap_ud_done; - uint_t ts_logmap_ud_done_scan; - - uint_t ts_logmap_ud_wait; - uint_t ts_logmap_ud_wait_hit; - - uint_t ts_logmap_ud_commit; - uint_t ts_logmap_ud_commit_scan; - - uint_t ts_logmap_cancel; - uint_t ts_logmap_cancel_scan; - uint_t ts_logmap_cancel_hit; - - uint_t ts_logmap_iscancel; - uint_t ts_logmap_iscancel_scan; - uint_t ts_logmap_iscancel_hit; - - uint_t ts_logscan; - uint_t ts_logscan_ud; - uint_t ts_logscan_delta; - uint_t ts_logscan_cancel; - uint_t ts_logscan_commit; - - /* trans_thread.c */ - uint_t ts_prewrite; - uint_t ts_prewrite_read; - uint_t ts_prewrite_write; - uint_t ts_trans_roll; - uint_t ts_trans_roll_wait; - uint_t ts_trans_roll_wait_nada; - uint_t ts_trans_roll_wait_slow; - uint_t ts_trans_roll_force; - uint_t ts_trans_roll_nsud; - uint_t ts_trans_roll_ref; - uint_t ts_trans_roll_full; - uint_t ts_trans_roll_logmap; - uint_t ts_trans_roll_read; - uint_t ts_trans_roll_reread; - uint_t ts_trans_roll_wait_inuse; - uint_t ts_trans_roll_prewrite; - uint_t ts_trans_roll_write; - - /* trans_top.c */ - uint_t ts_delta; - uint_t ts_ud_delta; - uint_t ts_ud_delta_log; - uint_t ts_cancel; - uint_t ts_iscancel; - uint_t ts_error; - uint_t ts_iserror; - uint_t ts_beginsync; - uint_t ts_active; - uint_t ts_activesync; - uint_t ts_beginasync; - uint_t ts_endsync; - uint_t ts_wantin; - uint_t ts_endasync; - uint_t ts_read; - uint_t ts_read_roll; - uint_t ts_readmt; - uint_t ts_write; - uint_t ts_writemt; - uint_t ts_writemt_done; - uint_t ts_log; - - /* trans_log.c */ - uint_t ts_logcommitdb; - - uint_t ts_push_dirty_bp; - uint_t ts_push_dirty_bp_extra; - uint_t ts_push_dirty_bp_fail; - - uint_t ts_alloc_bp; - uint_t ts_alloc_bp_free; - - uint_t ts_find_bp; - uint_t ts_find_bp_scan; - uint_t ts_find_bp_hit; - - uint_t ts_find_read_lof; - uint_t ts_find_read_lof_scan; - uint_t ts_find_read_lof_hit; - - uint_t ts_get_read_bp; - uint_t ts_get_read_bp_wr; - uint_t ts_get_read_bp_rd; - - uint_t ts_extend_write_bp; - uint_t ts_extend_write_bp_hit; - - uint_t ts_storebuf; - uint_t ts_fetchbuf; - uint_t ts_round_commit; - uint_t ts_push_commit; - - uint_t ts_inval_range; - uint_t ts_inval_range_scan; - uint_t ts_inval_range_hit; - - uint_t ts_writelog; - uint_t ts_writelog_max; - - uint_t ts_readlog; - uint_t ts_readlog_max; - - uint_t ts_get_write_bp; - uint_t ts_get_write_bp_steal; - - uint_t ts_writesync; - uint_t ts_writesync_log; - uint_t ts_writesync_nolog; - - uint_t ts_longmof_cnt; - -} transstats; - -#ifdef DEBUG -#define TRANSSTATS(f) (transstats.f++) -#define TRANSSTATSADD(f, n) (transstats.f += (n)) -#define TRANSSTATSMAX(m, v) \ - if ((v) > transstats.m)\ - transstats.m = (v); -#else -#define TRANSSTATS(f) -#define TRANSSTATSADD(f, n) -#define TRANSSTATSMAX(m, v) -#endif /* DEBUG */ - -/* - * MAP TYPES - */ -enum maptypes { - deltamaptype, udmaptype, logmaptype, matamaptype, shadowmaptype -}; - -/* - * MAP - */ -#define DELTAMAP_NHASH (512) -#define LOGMAP_NHASH (2048) -#define MAP_INDEX(dev, mof, mtm) \ - ((((mof) >> MAPBLOCKSHIFT) + (dev)) & ((mtm)->mtm_nhash-1)) -#define MAP_HASH(dev, mof, mtm) \ - (mtm->mtm_hash + MAP_INDEX(dev, mof, mtm)) - -typedef struct mt_map { - /* - * anchor doubly linked list this map's entries -- MUST BE FIRST - */ - mapentry_t *mtm_next; - mapentry_t *mtm_prev; - - int mtm_flags; /* generic flags */ - int mtm_ref; /* PTE like ref bit */ - uint_t mtm_debug; /* set at create time */ - uint_t mtm_age; /* mono-inc; tags mapentries */ - mapentry_t *mtm_cancel; /* to be canceled at commit */ - uint_t mtm_nhash; /* # of hash anchors */ - mapentry_t **mtm_hash; /* array of singly linked lists */ - struct topstats *mtm_tops; /* trans ops - enabled by an ioctl */ - int mtm_nme; /* # of mapentries */ - int mtm_nmet; /* # of mapentries this transaction */ - int mtm_nud; /* # of active userdata writes */ - int mtm_nsud; /* # of userdata scanned deltas */ - md_dev64_t mtm_dev; /* device identifying map */ - - /* - * the following are protected by the global map_mutex - */ - struct mt_map *mtm_mapnext; /* singly linked list of all maps */ - uint_t mtm_refcnt; /* reference count to this map */ - enum maptypes mtm_type; /* type of map */ - - /* - * used after logscan to set the log's tail - */ - off_t mtm_tail_lof; - size_t mtm_tail_nb; - - /* - * debug field for Scan test - */ - off_t mtm_trimlof; /* log was trimmed to this lof */ - off_t mtm_trimtail; /* tail lof before trimming */ - off_t mtm_trimalof; /* lof of last allocation delta */ - off_t mtm_trimclof; /* lof of last commit delta */ - off_t mtm_trimrlof; /* lof of last rolled delta */ - struct ml_unit *mtm_ul; /* log unit for this map */ - - /* - * moby trans stuff - */ - uint_t mtm_tid; - uint_t mtm_committid; - ushort_t mtm_closed; - ushort_t mtm_seq; - int mtm_wantin; - int mtm_active; - int mtm_activesync; - uint_t mtm_dirty; - kmutex_t mtm_lock; - kcondvar_t mtm_cv_commit; - kcondvar_t mtm_cv_next; - kcondvar_t mtm_cv_eot; - - /* - * mutex that protects all the fields in mt_map except - * mtm_mapnext and mtm_refcnt - */ - kmutex_t mtm_mutex; - kcondvar_t mtm_cv; /* generic conditional */ - - /* - * rw lock for the mapentry fields agenext and locnext - */ - md_krwlock_t mtm_rwlock; - /* - * DEBUG: runtestscan - */ - kmutex_t mtm_scan_mutex; -} mt_map_t; - -/* - * mtm_flags - */ -#define MTM_ROLL_EXIT (0x00000001) -#define MTM_ROLL_RUNNING (0x00000002) -#define MTM_FORCE_ROLL (0x00000004) - -/* - * Generic range checking macros - */ -#define OVERLAP(sof, snb, dof, dnb) \ - ((sof >= dof && sof < (dof + dnb)) || \ - (dof >= sof && dof < (sof + snb))) - -#define WITHIN(sof, snb, dof, dnb) ((sof >= dof) && ((sof+snb) <= (dof+dnb))) - -#define DATAoverlapME(mof, hnb, me) (OVERLAP(mof, hnb, me->me_mof, me->me_nb)) -#define MEwithinDATA(me, mof, hnb) (WITHIN(me->me_mof, me->me_nb, mof, hnb)) -#define DATAwithinME(mof, hnb, me) (WITHIN(mof, hnb, me->me_mof, me->me_nb)) - - -typedef struct mt_unit { - struct mdc_unit c; /* common stuff */ - /* - * infrastructure - */ - mt_flags_t un_flags; - /* - * log and master device - */ - mdkey_t un_m_key; - md_dev64_t un_m_dev; - mdkey_t un_l_key; - md_dev64_t un_l_dev; - daddr32_t un_l_sblk; /* start block */ - daddr32_t un_l_pwsblk; /* prewrite start block */ - daddr32_t un_l_nblks; /* # of usable log blocks */ - daddr32_t un_l_tblks; /* total log blocks */ - daddr32_t un_l_head; /* sector offset of log head */ - daddr32_t un_l_tail; /* sector offset of log tail */ - uint_t un_l_resv; /* current log reservations */ - uint_t un_l_maxresv; /* max log reservations */ - uint_t un_l_maxtransfer; /* maximum transfer at init */ - mddb_recid_t un_l_recid; /* database id */ - mt_l_error_t un_l_error; /* error state */ - struct timeval32 un_l_timestamp; /* time of last log state chg */ - md_dev64_t un_s_dev; /* shadow device for testing only */ - mt_debug_t un_debug; /* debug flags; set at create */ - md_dev64_t un_dev; /* this metatrans device */ - int un_logreset; /* part of _FIOLOGRESET ioctl stuff */ - struct timeval32 un_timestamp; /* time of last trans state change */ - /* - * spares - */ - ulong_t un_spare[16]; - /* - * following are incore only elements. - * Incore elements must always be at the end - * of this data struture. - */ - struct mt_unit *un_next; - struct ml_unit *un_l_unit; - struct ufstrans *un_ut; - mt_map_t *un_deltamap; - mt_map_t *un_udmap; - mt_map_t *un_logmap; - mt_map_t *un_matamap; - mt_map_t *un_shadowmap; -} mt_unit_t; - - -typedef struct mt_unit32_od { - mdc_unit32_od_t c; /* common stuff */ - /* - * infrastructure - */ - mt_flags_t un_flags; - caddr32_t xx_un_next; /* anchored in log unit */ - /* - * log and master device - */ - mdkey_t un_m_key; - dev32_t un_m_dev; - mdkey_t un_l_key; - dev32_t un_l_dev; - daddr32_t un_l_sblk; /* start block */ - daddr32_t un_l_pwsblk; /* prewrite start block */ - daddr32_t un_l_nblks; /* # of usable log blocks */ - daddr32_t un_l_tblks; /* total log blocks */ - daddr32_t un_l_head; /* sector offset of log head */ - daddr32_t un_l_tail; /* sector offset of log tail */ - uint_t un_l_resv; /* current log reservations */ - uint_t un_l_maxresv; /* max log reservations */ - uint_t un_l_maxtransfer; /* maximum transfer at init */ - mddb_recid_t un_l_recid; /* database id */ - caddr32_t xx_un_l_unit; /* log device unit struct */ - mt_l_error_t un_l_error; /* error state */ - struct timeval32 un_l_timestamp; /* time of last log state chg */ - dev32_t un_s_dev; /* shadow device for testing only */ - - mt_debug_t un_debug; /* debug flags; set at create */ - caddr32_t xx_un_ut; /* ufstrans struct */ - dev32_t un_dev; /* this metatrans device */ - caddr32_t xx_un_deltamap; /* deltamap */ - caddr32_t xx_un_udmap; /* userdata map */ - caddr32_t xx_un_logmap; /* logmap includes moby trans stuff */ - caddr32_t xx_un_matamap; /* optional - matamap */ - caddr32_t xx_un_shadowmap; /* optional - shadowmap */ - int un_logreset; /* part of _FIOLOGRESET ioctl stuff */ - struct timeval32 un_timestamp; /* time of last trans state change */ - /* - * spares - */ - uint_t un_spare[16]; -} mt_unit32_od_t; - -/* - * prewrite info (per buf); stored as array at beginning of prewrite area - */ -struct prewrite { - int pw_bufsize; /* every buffer is this size */ - daddr32_t pw_blkno; /* block number */ - dev32_t pw_dev; /* device to write to */ - ushort_t pw_secmap; /* bitmap */ - /* 1's write this sector in the buf */ - ushort_t pw_flags; -}; -/* - * pw_flags - */ -#define PW_INUSE 0x0001 /* this prewrite buf is in use */ -#define PW_WAIT 0x0002 /* write in progress; wait for completion */ -#define PW_REM 0x0004 /* remove deltas */ - -/* - * log state - */ -struct logstate { - off32_t ls_head_lof; /* log head */ - uint_t ls_head_ident; /* log head ident */ - uint_t ls_head_tid; /* log head tid */ - uint_t ls_chksum; /* checksum of structure */ - off32_t ls_bol_lof; /* needed for TS_Tools/dumplog.c */ - off32_t ls_eol_lof; /* needed for TS_Tools/dumplog.c */ - uint_t ls_maxtransfer; /* needed for TS_Tools/dumplog.c */ - daddr32_t ls_pwsblk; /* needed for TS_Tools/dumplog.c */ -}; - -/* - * log state defines - */ -#define LS_SECTORS (2) /* number of sectors used by state area */ - -/* - * un_debug - * MT_TRANSACT - keep per thread accounting of tranactions - * MT_MATAMAP - double check deltas and ops against matamap - * MT_WRITE_CHECK - check master+deltas against metadata write - * MT_LOG_WRITE_CHECK - read after write for log writes - * MT_CHECK_MAP - check map after every insert/delete - * MT_TRACE - trace transactions (used with MT_TRANSACT) - * MT_SIZE - fail on size errors (used with MT_TRANSACT) - * MT_NOASYNC - force every op to be sync - * MT_FORCEROLL - forcibly roll the log after every commit - * MT_SCAN - running runtestscan; special case as needed - * MT_SHADOW - copy metatrans device writes to shadow dev. - * MT_PREWRITE - process prewrite area every roll - */ -#define MT_TRANSACT (0x00000001) -#define MT_MATAMAP (0x00000002) -#define MT_WRITE_CHECK (0x00000004) -#define MT_LOG_WRITE_CHECK (0x00000008) -#define MT_CHECK_MAP (0x00000010) -#define MT_TRACE (0x00000020) -#define MT_SIZE (0x00000040) -#define MT_NOASYNC (0x00000080) -#define MT_FORCEROLL (0x00000100) -#define MT_SCAN (0x00000200) -#define MT_SHADOW (0x00000400) -#define MT_PREWRITE (0x00000800) - -/* Type 2 trans records */ -#define TRANS_REC 1 -#define LOG_REC 2 - -#ifdef _KERNEL - -typedef struct md_tps { /* trans parent save */ - DAEMON_QUEUE - struct mt_unit *ps_un; - mdi_unit_t *ps_ui; - buf_t *ps_bp; - size_t ps_count; /* Used for testing only. */ - kmutex_t ps_mx; /* protects ps_count. */ -} md_tps_t; - -/* - * Log layer protos -- trans_log.c - */ -extern void _init_ldl(void); -extern void _fini_ldl(void); -extern void md_ldl_round_commit(mt_unit_t *); -extern void md_ldl_push_commit(mt_unit_t *); -extern int md_ldl_need_commit(ml_unit_t *); -extern int md_ldl_has_space(ml_unit_t *, mapentry_t *); -extern void md_ldl_write(mt_unit_t *, caddr_t, offset_t, - mapentry_t *); -extern void md_ldl_waito(ml_unit_t *); -extern int md_ldl_read(ml_unit_t *, caddr_t, offset_t, off_t, - mapentry_t *); -extern void md_ldl_sethead(ml_unit_t *, off_t, uint_t, - struct buf *); -extern void md_ldl_settail(ml_unit_t *, off_t, off_t, - struct buf *); -extern void ldl_setpwvalid(ml_unit_t *); -extern int ldl_build_incore(ml_unit_t *, int); -extern ml_unit_t *ldl_findlog(mddb_recid_t); -extern mddb_recid_t ldl_create(mdkey_t, mt_unit_t *); -extern void ldl_utadd(mt_unit_t *); -extern int ldl_open_dev(mt_unit_t *, ml_unit_t *); -extern void ldl_close_dev(ml_unit_t *); -extern int ldl_snarf(void); -extern void ldl_logscan_seterror(ml_unit_t *); -extern void ldl_logscan_saverror(ml_unit_t *); -extern size_t md_ldl_logscan_nbcommit(off_t); -extern int md_ldl_logscan_read(ml_unit_t *, off_t *, size_t, - caddr_t); -extern void md_ldl_logscan_begin(ml_unit_t *, daddr_t); -extern void md_ldl_logscan_end(ml_unit_t *); -extern int md_ldl_need_roll(ml_unit_t *); -extern int md_ldl_empty(ml_unit_t *); -extern int ldl_pwvalid(ml_unit_t *); -extern void ldl_waitscan(ml_unit_t *); -extern void ldl_errorbp(set_t, buf_t *, char *); -extern void md_ldl_seterror(ml_unit_t *); -extern int ldl_isherror(ml_unit_t *); -extern int ldl_iserror(ml_unit_t *); -extern int ldl_isanyerror(ml_unit_t *); -extern void ldl_start_scan(mt_unit_t *); -extern void ldl_opened_trans(mt_unit_t *, int); -extern void ldl_open_trans(mt_unit_t *, int); -extern int ldl_logreset(mt_unit_t *, buf_t *); -extern void ldl_close_trans(mt_unit_t *); -extern size_t md_ldl_bufsize(ml_unit_t *); -extern void ldl_open_underlying(mt_unit_t *); -extern void ldl_snarf_done(); -extern int ldl_reset(mt_unit_t *, int, int); -extern void ldl_cleanup(ml_unit_t *); - -/* - * trans driver layer -- mdtrans.c - */ -extern kmem_cache_t *trans_child_cache; -extern void *md_trans_zalloc(size_t); -extern void *md_trans_zalloc_nosleep(size_t); -extern void *md_trans_alloc(size_t); -extern void *md_trans_alloc_nosleep(size_t); -extern void md_trans_free(void *, size_t); -extern int md_trans_not_wait(struct buf *cb); -extern int md_trans_not_done(struct buf *cb); -extern int md_trans_wait(struct buf *cb); -extern int trans_done(struct buf *cb); -extern int trans_done_shadow(struct buf *cb); -extern void trans_child_init(struct buf *bp); -extern void trans_close_all_devs(mt_unit_t *); -extern int trans_open_all_devs(mt_unit_t *); -extern int trans_build_incore(void *, int); -extern void trans_commit(mt_unit_t *, int); -extern int trans_detach(mt_unit_t *, int); -extern void trans_attach(mt_unit_t *, int); -extern int trans_reset(mt_unit_t *, minor_t, int, int); - -/* - * transaction ioctl -- trans_ioctl.c - */ - -/* rename named service functions */ -md_ren_list_svc_t trans_rename_listkids; -md_ren_svc_t trans_rename_check; -md_ren_roleswap_svc_t trans_renexch_update_kids; -md_ren_roleswap_svc_t trans_rename_update_self; -md_ren_roleswap_svc_t trans_exchange_parent_update_to; -md_ren_roleswap_svc_t trans_exchange_self_update_from_down; - -/* - * transaction op layer -- trans_top.c - */ -extern void _init_md_top(void); -extern void _fini_top(void); -extern void top_read(struct buf *, char *, mt_unit_t *, int, void *); -extern void md_top_read_roll(struct buf *, mt_unit_t *, ushort_t *); -extern void top_build_incore(mt_unit_t *); -extern void top_reset(mt_unit_t *, int, int); -extern void top_write(struct buf *, char *, mt_unit_t *, int, void *); - -/* - * map layer -- trans_delta.c - */ -extern void md_map_free_entries(mt_map_t *); -extern int md_matamap_overlap(mt_map_t *, offset_t, off_t); -extern int md_matamap_within(mt_map_t *, offset_t, off_t); -extern int md_deltamap_need_commit(mt_map_t *); -extern void md_deltamap_add(mt_map_t *, offset_t, off_t, delta_t, - int (*)(), uintptr_t); -extern mapentry_t *md_deltamap_remove(mt_map_t *, offset_t, off_t); -extern void md_deltamap_del(mt_map_t *, offset_t, off_t); -extern void md_deltamap_push(mt_unit_t *); -extern int md_logmap_need_commit(mt_map_t *); -extern int md_logmap_need_roll_async(mt_map_t *); -extern int md_logmap_need_roll_sync(mt_map_t *); -extern int md_logmap_need_roll(mt_map_t *); -extern void md_logmap_start_roll(mt_unit_t *); -extern void md_logmap_kill_roll(mt_map_t *); -extern void md_logmap_forceroll(mt_map_t *); -extern int md_logmap_overlap(mt_map_t *, md_dev64_t, offset_t, - off_t); -extern void md_logmap_remove_roll(mt_map_t *, md_dev64_t, offset_t, - off_t); -extern int md_logmap_next_roll(mt_map_t *, offset_t *, - md_dev64_t *); -extern void md_logmap_list_get(mt_map_t *, md_dev64_t, offset_t, - off_t, mapentry_t **); -extern void md_logmap_list_get_roll(mt_map_t *, md_dev64_t, - offset_t, off_t, mapentry_t **); -extern void md_logmap_list_put(mt_map_t *, mapentry_t *); -extern void md_logmap_read_mstr(ml_unit_t *, struct buf *, int, - void *); -extern void md_logmap_secmap_roll(mapentry_t *, offset_t, - ushort_t *); -extern int logmap_read_log(ml_unit_t *, char *, offset_t, off_t, - mapentry_t *); -extern void md_logmap_make_space(mt_map_t *, ml_unit_t *, - mapentry_t *); -extern void md_logmap_add(mt_unit_t *, md_dev64_t, char *, offset_t, - mapentry_t *); -extern void md_logmap_add_ud(mt_unit_t *, md_dev64_t, char *, - offset_t, mapentry_t *); -extern void md_logmap_commit(mt_unit_t *); -extern void md_logmap_sethead(mt_map_t *, ml_unit_t *, - struct buf *); -extern void md_logmap_roll_dev(mt_map_t *, ml_unit_t *ul, - md_dev64_t); -extern void md_logmap_roll_sud(mt_map_t *, ml_unit_t *ul, - md_dev64_t, offset_t, off_t); -extern int md_logmap_ud_done(struct buf *); -extern void md_logmap_ud_wait(); -extern void md_logmap_cancel(mt_unit_t *, md_dev64_t, offset_t, - off_t); -extern int md_logmap_iscancel(mt_map_t *, md_dev64_t, offset_t, - off_t); -extern void md_logmap_logscan(mt_unit_t *, daddr_t); -extern void map_build_incore(mt_unit_t *); -extern void map_reset(mt_unit_t *, int, int); -extern void _init_md_map(void); -extern void _fini_map(void); - -/* - * scan and roll threads -- trans_thread.c - */ -extern void md_trans_roll(ml_unit_t *); -extern void trans_scan(mt_unit_t *); -extern void trans_roll_prewrite(ml_unit_t *); - -#endif /* _KERNEL */ - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS_MD_TRANS_H */ diff --git a/usr/src/uts/common/sys/lvm/mdio.h b/usr/src/uts/common/sys/lvm/mdio.h deleted file mode 100644 index e604a98795c0..000000000000 --- a/usr/src/uts/common/sys/lvm/mdio.h +++ /dev/null @@ -1,996 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SYS__MDIO_H -#define _SYS__MDIO_H - -#include -#include -#include -#include -#include -#ifdef _KERNEL -#include -#else /* !_KERNEL */ -#include -#endif -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * driver version number - */ -#define MD_DVERSION 0x00040003 /* major.minor */ -#define MD_SET_SHIFT (NBITSMINOR32 - MD_BITSSET) -#define MD_MAXUNITS (1 << MD_SET_SHIFT) -#define MD_UNIT_MASK (MD_MAXUNITS - 1) - -#define MD_MIN2UNIT(m) ((m) & MD_UNIT_MASK) -#define MD_MIN2SET(m) ((m) >> MD_SET_SHIFT) -#define MD_SID(u) ((u)->c.un_self_id) -#define MD_RECID(u) ((u)->c.un_record_id) -#define MD_STATUS(u) ((u)->c.un_status) -#define MD_PARENT(u) ((u)->c.un_parent) -#define MD_CAPAB(u) ((u)->c.un_capabilities) -#define MD_UN2SET(u) MD_MIN2SET(MD_SID(u)) -#define MD_UL2SET(l) MD_MIN2SET(MAXMIN32 & ((l)->un_dev)) - -#define MD_MKMIN(s, u) ((((s) & MD_SETMASK) << MD_SET_SHIFT) | \ - ((u) & MD_UNIT_MASK)) - -#define HSP_BITSID 31 -#define HSP_SET_SHIFT (HSP_BITSID - MD_BITSSET) -#define HSP_SET_MASK (MD_SETMASK << HSP_SET_SHIFT) -#define HSP_SET(hspid) (((hspid) & HSP_SET_MASK) >> HSP_SET_SHIFT) -#define HSP_ID(hspid) ((hspid) & ~HSP_SET_MASK) -#define MAKE_HSP_ID(setno, id) (((setno) << HSP_SET_SHIFT) | (id)) - -/* - * The following macros were added to support friendly names for hot spare - * pools. Before the addition of friendly names the hsp_self_id was merely - * the conbination of the set number and the hot spare pool number. With - * friendly names a NM record is created to hold the hot spare pool name. - * The hsp_self_id now becomes the set number shifted left plus the NM - * record key plus 1000. The number 1000 is used to collision between - * traditional hsp_self_ids and friendly name self ids. In traditional hot - * spare pool the hot spare pool number could never be grater than 999. - * - * HSP_ID_IS_FN(hspid) returns TRUE if the hot spare pool ID is the ID of - * a friendly named hsp. Will return FALSE otherwise. - * hspid may contain the set bits, since HSP_ID_IS_FN - * will call HSP_ID as part of doing its work. - * - * KEY_TO_HSP_ID(setno, reckey) constructs a hot spare pool ID (hsp_t) from - * a set number and a NM record key. The result is - * suitable for storing in the hsp_self_id member of a - * hot_spare_pool struct. - * - * HSP_ID_TO_KEY(hspid) returns the NM key that is encoded in the hot spare - * pool ID. MD_KEYBAD will be returned if hspid does - * not represent a friendly named hsp. hspid may - * contain the set bits, since HSP_ID_TO_KEY will call - * HSP_ID as part of doing its work. - * - * HSP_KEY_OK(reckey) Insures that the NM record key is not so large as - * to interfere with the set number bits in a hot - * spare pool self id. This macro will probably only - * be used in meta_hs_add. - */ -#define HSP_FN_BASE (1000) -#define HSP_ID_IS_FN(hspid) (HSP_ID(hspid) > HSP_FN_BASE) -#define KEY_TO_HSP_ID(setno, key) ((setno << HSP_SET_SHIFT) | \ - (key + HSP_FN_BASE)) -#define HSP_ID_TO_KEY(hspid) ((HSP_ID_IS_FN(hspid)) ? \ - (HSP_ID(hspid) - HSP_FN_BASE) : MD_KEYBAD) -#define HSP_KEY_OK(key) (((key + HSP_FN_BASE) & HSP_SET_MASK) == 0) - -/* - * for did stat ioctl - */ -#define MD_FIND_INVDID 0x01 -#define MD_GET_INVDID 0x02 - -/* - * for setting the un_revision, hsp_revision and hs_revision - */ -#define MD_64BIT_META_DEV 0x01 -#define MD_FN_META_DEV 0x02 /* Friendly named metadevice */ - -/* - * for trans EOF error messages - */ -#define MD_EOF_TRANS_MSG "Trans logging has been replaced by UFS" \ - " Logging.\nSee mount_ufs(1M). Operation failed.\n" - -#define MD_SHORT_EOF_TRANS_MSG "#Trans logging has been replaced by UFS" \ - " Logging.\n#See mount_ufs(1M). Operation failed.\n" - -#define MD_EOF_TRANS_WARNING "Existing Trans devices are not logging; they" \ - "\npass data directly to the underlying device.\n" - -#define MD_SHORT_EOF_TRANS_WARNING "#Existing Trans devices are not " \ - "logging; they\n#pass data directly to the underlying device.\n" - -/* - * for importing of disksets (IMP_LOAD) - */ -#define MD_IMP_STALE_SET 1 - -/* - * miscname stuff - */ - -#define MD_DRIVERNAMELEN 16 -#define MD_SETDRIVERNAME(to, from, setno) \ - if ((from) != NULL) \ - (void) strcpy((to)->md_driver.md_drivername, (from)); \ - (to)->md_driver.md_setno = (setno); - - -#define MD_GETDRIVERNAME(to, from) \ - (void) strcpy((to), (from)->md_driver.md_drivername); - -#define MD_PNTDRIVERNAME(from) \ - ((from)->md_driver.md_drivername) - -/* - * ioctl parameter structures - */ - -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack(4) -#endif -typedef struct md_i_driverinfo { - MD_DRIVER - md_error_t mde; - minor_t mnum; -} md_i_driverinfo_t; - -typedef struct md_i_getnext { - MD_DRIVER - md_error_t mde; - minor_or_hsp_t id; -} md_i_getnext_t; - -typedef struct md_i_getnum { - MD_DRIVER - md_error_t mde; - int start; - int size; - uint64_t minors; /* Pointer to minor #'s */ -} md_i_getnum_t; - -typedef struct md_i_get { - MD_DRIVER - md_error_t mde; - minor_or_hsp_t id; - int size; - uint64_t mdp; /* Contains pointer */ -} md_i_get_t; - -typedef struct md_i_reset { - MD_DRIVER - md_error_t mde; - minor_t mnum; /* Unit to clear */ - int force; -} md_i_reset_t; - -/* soft partition reset parameters */ -typedef struct md_sp_reset { - MD_DRIVER - md_error_t mde; /* Error return */ - minor_t mnum; /* Unit to clear */ - int force; /* Force reset */ - md_parent_t new_parent; /* New parent for child component */ -} md_sp_reset_t; - -/* soft partition status change parameters */ -typedef struct md_sp_statusset { - MD_DRIVER - md_error_t mde; /* Error return */ - int num_units; /* Number of units */ - int new_status; /* New status */ - int size; /* Array size */ - uint64_t minors; /* Pointer to array of minor numbers */ -} md_sp_statusset_t; - -typedef struct md_sp_update_wm { - MD_DRIVER - md_error_t mde; /* Error return */ - minor_t mnum; /* Unit to update */ - uint_t count; /* Number of watermarks */ - uint64_t wmp; /* Pointer to array of watermarks */ - uint64_t osp; /* Pointer to array of offsets */ -} md_sp_update_wm_t; - -typedef struct md_sp_read_wm { - MD_DRIVER - md_error_t mde; /* Error return */ - md_dev64_t rdev; /* Device from which to read */ - uint64_t wmp; /* Pointer to wm buffer */ - xsp_offset_t offset; /* Offset of wm */ -} md_sp_read_wm_t; - -typedef struct md_set_userflags { - MD_DRIVER - md_error_t mde; - minor_t mnum; - uint_t userflags; -} md_set_userflags_t; - -typedef struct md_stripe_params { - MD_DRIVER - md_error_t mde; /* Error return */ - minor_t mnum; - ms_params_t params; -} md_stripe_params_t; - -typedef struct md_raid_params { - MD_DRIVER - md_error_t mde; /* Error return */ - minor_t mnum; - mr_params_t params; -} md_raid_params_t; - -typedef struct md_mirror_params { - MD_DRIVER - md_error_t mde; /* Error return */ - minor_t mnum; - mm_params_t params; -} md_mirror_params_t; - -typedef struct md_grow_params { - MD_DRIVER - md_error_t mde; /* Error return */ - minor_t mnum; /* Unit to grow */ - int options; /* create a 64 or 32 bit device */ - uint64_t mdp; /* Optional - pointer to new unit struct */ - int size; /* Optional - size of new unit struct */ - int nrows; /* Optional - original number of rows */ - int npar; /* Optional - number of parents to lock */ - uint64_t par; /* Optional - pointer to parent units */ -} md_grow_params_t; - -/* if the didstat struct changes you will need to change the following macro */ -typedef struct md_i_didstat { - md_error_t mde; /* Error return */ - set_t setno; /* which set to use */ - side_t side; /* which side to use */ - int mode; /* find or get ? */ - int cnt; /* return number of invalid devid's found */ - int maxsz; /* return max size of invalid device id */ - uint64_t ctdp; /* pointer to structure to fill with ctds */ -} md_i_didstat_t; - -typedef struct mdnm_params { - md_error_t mde; /* Error return */ - char drvnm[MD_MAXDRVNM]; /* drvnm for get/set/rem nm */ - major_t major; /* major #, (alternative) for get nm */ - minor_t mnum; /* minor #, for get/set/rem nm */ - uint_t devname_len; /* Length of device name, for set nm */ - uint64_t devname; /* Address of device name for set/get */ - set_t setno; /* Which namespace set to use */ - side_t side; /* -1 == current side, >0 specified */ - mdkey_t key; /* 0 == alloc one, else use this key */ - mdkey_t retkey; /* return key here! */ - ushort_t devid_size; /* 0 == ret size, else use this one */ - uint64_t devid; /* pointer to devid, supplied by user */ - uint_t pathname_len; /* length of pathname */ - uint64_t pathname; /* address of pathname for update */ - md_dev64_t devt; /* devt for updating namespace */ - ushort_t minorname_len; /* length of minor name */ - uint64_t minorname; /* address of minor name */ - uint_t ref_count; /* returned n_count */ - int imp_flag; /* used by metaimport */ -} mdnm_params_t; - -typedef struct mdhspnm_params { - md_error_t mde; /* Error return */ - char drvnm[MD_MAXDRVNM]; /* drvnm for get/set/rem nm */ - uint_t hspname_len; /* Length of device name, for set nm */ - uint64_t hspname; /* Address of device name for set/get */ - set_t setno; /* Which namespace set to use */ - side_t side; /* -1 == current side, >0 specified */ - hsp_t hspid; /* 0 == alloc one, else use this key */ - hsp_t ret_hspid; /* return key here! */ - uint_t ref_count; /* returned n_count */ -} mdhspnm_params_t; - -typedef struct md_getdevs_params { - MD_DRIVER - md_error_t mde; - minor_t mnum; - int cnt; - uint64_t devs; /* Pointer to devs */ -} md_getdevs_params_t; - - -typedef struct md_i_get_tstate { - minor_or_hsp_t id; - uint_t tstate; /* Transient state */ - md_error_t mde; -} md_i_get_tstate_t; - -typedef struct md_set_state_params { - MD_DRIVER - md_error_t mde; - minor_t mnum; - uint_t sm; - uint_t comp; - uint_t state; - mddb_recid_t hs_id; -} md_set_state_params_t; - -typedef struct md_alloc_hotsp_params { - MD_DRIVER - md_error_t mde; - minor_t mnum; - uint_t sm; - uint_t comp; - mddb_recid_t hs_id; -} md_alloc_hotsp_params_t; - -typedef struct md_suspend_wr_params { - MD_DRIVER - md_error_t mde; - minor_t mnum; -} md_suspend_wr_params_t; - -typedef struct md_mn_req_owner { - minor_t mnum; /* Mirror metadevice */ - uint_t flags; /* Flags (see below) */ - md_mn_nodeid_t owner; /* New owner of Mirror */ -} md_mn_req_owner_t; - -#define MD_MN_MM_PREVENT_CHANGE 0x0001 /* Disallow further ownership change */ -#define MD_MN_MM_ALLOW_CHANGE 0x0002 /* Allow ownership change */ -#define MD_MN_MM_SPAWN_THREAD 0x0004 -#define MD_MN_MM_CHOOSE_OWNER 0x0008 /* Choose a resync owner */ - -#define MD_MN_MM_RESULT 0x80000000 /* Result contained in LSB */ -#define MD_MN_MM_RESULT_MASK 0xFFFF /* Mask for result code */ -#define MD_MN_MM_RES_OK 0 /* Success */ -#define MD_MN_MM_RES_FAIL 1 /* Failure */ - -typedef struct md_set_mmown_params { - MD_DRIVER - md_error_t mde; - md_mn_req_owner_t d; /* New owner */ -} md_set_mmown_params_t; - -typedef struct md_mn_own_status { - MD_DRIVER - md_error_t mde; - minor_t mnum; - uint_t flags; /* See above *_MM_RESULT flags */ -} md_mn_own_status_t; - -typedef struct md_mn_poke_hotspares { - MD_DRIVER - md_error_t mde; -} md_mn_poke_hotspares_t; - -typedef struct md_mn_rs_params { - MD_DRIVER - md_error_t mde; - int msg_type; /* Type of message */ - minor_t mnum; /* Mirror metadevice */ - uint_t rs_type; /* Type of resync */ - diskaddr_t rs_start; /* 1st block of resync range */ - diskaddr_t rs_size; /* size of resync range */ - diskaddr_t rs_done; /* amount of resync done so far */ - diskaddr_t rs_2_do; /* amount still to be done */ - md_mn_nodeid_t rs_originator; /* Originator of resync message */ - char rs_flags; /* flags */ - char rs_first_time; /* set if first resync-next message */ - sm_state_t rs_sm_state[NMIRROR]; /* Submirror state */ - sm_flags_t rs_sm_flags[NMIRROR]; /* Submirror flags */ -} md_mn_rs_params_t; - -/* flag values for rs_flags */ -#define MD_MN_RS_ERR 0x01 /* Resync err */ -#define MD_MN_RS_CLEAR_OPT_NOT_DONE 0x02 /* Optimized resync done */ -#define MD_MN_RS_FIRST_RESYNC_NEXT 0x04 /* First RESYNC_NEXT message */ - -typedef struct md_mn_setcap_params { - MD_DRIVER - md_error_t mde; - minor_t mnum; - uint_t sc_set; /* Capability settings */ -} md_mn_setcap_params_t; - -typedef struct md_mkdev_params { - MD_DRIVER - md_error_t mde; /* Error return */ - unit_t un; -} md_mkdev_params_t; - -#define MDMN_RR_CLEAN_PARAMS_DATA(x) ((unsigned char *)(x) + \ - sizeof (md_mn_rr_clean_params_t)) -#define MDMN_RR_CLEAN_PARAMS_SIZE(x) (sizeof (md_mn_rr_clean_params_t) + \ - MDMN_RR_CLEAN_PARAMS_DATA_BYTES(x)) -#define MDMN_RR_CLEAN_PARAMS_START_BIT(x) ((x)->rr_start_size >> 16) -#define MDMN_RR_CLEAN_PARAMS_DATA_BYTES(x) ((x)->rr_start_size & 0xffff) - -typedef struct md_mn_rr_clean_params { - MD_DRIVER - md_error_t mde; - md_mn_nodeid_t rr_nodeid; - minor_t rr_mnum; - unsigned int rr_start_size; /* start_bit (16b) | data_bytes (16b) */ - /* actual data goes here */ -} md_mn_rr_clean_params_t; - -typedef struct md_mn_rr_dirty_params { - MD_DRIVER - md_error_t mde; - minor_t rr_mnum; - md_mn_nodeid_t rr_nodeid; - ushort_t rr_start; /* First RR region to mark */ - ushort_t rr_end; /* Last RR region to mark */ -} md_mn_rr_dirty_params_t; - -/* - * Flags to coordinate sending device id between kernel and user space. - * To get devid from kernel: - * User calls ioctl with l_devid_flags set to GETSZ flag to get size of - * devid which is returned in the l_devid_sz field if the SZ flag is set. - * Then user allocs that size and sends same ioctl with SPACE flag set - * and l_devid_sz set to alloc'd size. Kernel either sets the NOSPACE - * flag (if alloc'd space is not big enough) or sets the VALID flag and - * fills in the devid. - * - * To send devid to kernel: - * User alloc's space for devid, fills in devid, sets (SPACE|VALID|SZ) flags - * and sets size of devid into l_devid_sz field. - * - * If MDDB_DEVID_SPACE is set, MDDB_DEVID_GETSZ is ignored. - * If no flags are set, devid information is ignored. - */ -#define MDDB_DEVID_SPACE 0x0001 /* l_devid_sz bytes of space alloc'd */ -#define MDDB_DEVID_VALID 0x0002 /* kernel has filled in devid */ -#define MDDB_DEVID_NOSPACE 0x0004 /* not enough alloc'd space for devid */ -#define MDDB_DEVID_GETSZ 0x0008 /* fill in l_devid_sz with devid size */ -#define MDDB_DEVID_SZ 0x0010 /* l_devid_sz filled in with devid sz */ - - - -/* - * Maximum number of replicas (or number of locator blocks) in set. - */ -#define MDDB_NLB 50 - -/* - * maximum size of allowable bootlist property string - only used to - * read in and write out boolist property strings to conf files. - */ -#define MDDB_BOOTLIST_MAX_LEN MAX_HWC_LINESIZE - -/* - * Percentage of free space left in replica during conversion of non-devid - * style replica to devid style replica. - */ -#define MDDB_DEVID_CONV_PERC 5 - -typedef struct mddb_cfg_loc { - dev32_t l_dev; - daddr32_t l_blkno; - int l_flags; - char l_driver[MD_MAXDRVNM]; - minor_t l_mnum; - int l_devid_flags; - uint64_t l_devid; /* pointer to devid */ - int l_devid_sz; - uint64_t l_old_devid; - int l_old_devid_sz; - char l_minor_name[MDDB_MINOR_NAME_MAX]; - char l_devname[MAXPATHLEN]; /* device name */ -} mddb_cfg_loc_t; - -typedef struct mddb_dtag { - md_timeval32_t dt_tv; - int dt_id; - set_t dt_setno; - char dt_sn[MDDB_SN_LEN]; - char dt_hn[MD_MAX_NODENAME_PLUS_1]; -} mddb_dtag_t; - -typedef struct mddb_dtag_lst { - struct mddb_dtag_lst *dtl_nx; - mddb_dtag_t dtl_dt; -} mddb_dtag_lst_t; - -typedef struct mddb_dtag_get_parm { - set_t dtgp_setno; - mddb_dtag_t dtgp_dt; - md_error_t dtgp_mde; -} mddb_dtag_get_parm_t; - -typedef struct mddb_dtag_use_parm { - int dtup_id; - set_t dtup_setno; - md_error_t dtup_mde; -} mddb_dtag_use_parm_t; - -typedef struct mddb_accept_parm { - set_t accp_setno; - md_error_t accp_mde; -} mddb_accept_parm_t; - -typedef struct mddb_med_parm { - set_t med_setno; - md_hi_arr_t med; - md_error_t med_mde; /* error return */ -} mddb_med_parm_t; - -typedef struct mddb_med_upd_parm { - set_t med_setno; - md_error_t med_mde; /* error return */ -} mddb_med_upd_parm_t; - -#define MED_TE_NM_LEN 64 - -typedef struct mddb_med_t_ent { - char med_te_nm[MED_TE_NM_LEN]; - md_dev64_t med_te_dev; /* fixed size dev_t */ -} mddb_med_t_ent_t; - -typedef struct mddb_med_t_parm { - md_error_t med_tp_mde; /* error return */ - int med_tp_nents; /* number of entries */ - int med_tp_setup; /* setup flag */ - mddb_med_t_ent_t med_tp_ents[1]; /* Var. sized array */ -} mddb_med_t_parm_t; - -#define MDDB_SETMASTER_MAGIC 0x53544d41 /* Ascii for STMA */ -typedef struct mddb_setmaster_config { - md_error_t c_mde; - set_t c_setno; - int c_magic; /* used to verify ioctl */ - int c_current_host_master; -} mddb_setmaster_config_t; - -/* - * Structure used to set/reset/get flags in set structure. - */ -#define MDDB_SETFLAGS_MAGIC 0x5354464c /* ascii for STFL */ -typedef struct mddb_setflags_config { - md_error_t sf_mde; - set_t sf_setno; - int sf_magic; /* used to verify ioctl */ - int sf_flags; /* Control flags set/reset/get */ - int sf_setflags; /* Flag values */ -} mddb_setflags_config_t; - -typedef struct mddb_set_node_params { - md_error_t sn_mde; - set_t sn_setno; - md_mn_nodeid_t sn_nodeid; -} mddb_set_node_params_t; - -typedef struct mddb_block_parm { - md_error_t c_mde; - set_t c_setno; - int c_blk_flags; -} mddb_block_parm_t; - -typedef struct mddb_parse_parm { - md_error_t c_mde; - set_t c_setno; - int c_parse_flags; - int c_lb_flags[MDDB_NLB]; -} mddb_parse_parm_t; - -typedef struct mddb_optrec_parm { - md_error_t c_mde; - set_t c_setno; - md_replica_recerr_t c_recerr[2]; -} mddb_optrec_parm_t; - -typedef struct mddb_config { - md_error_t c_mde; /* error return */ - int c_id; /* used with getnext locator */ - md_splitname c_devname; /* contains name or keys */ - int c_dbcnt; /* number of dbs */ - int c_dbmax; /* maximum number of dbs */ - int c_flags; - int c_dbend; /* size of database */ - set_t c_setno; /* set number of replica */ - int c_multi_node; /* set if multi_node set */ - side_t c_sideno; /* side number of replica */ - md_timeval32_t c_timestamp; /* creation of set */ - /* setname */ - char c_setname[MD_MAX_SETNAME_PLUS_1]; - md_hi_arr_t c_med; /* Mediator host information */ - int c_spare[14]; /* unused must be zero */ - md_dev64_t c_devt; /* devt to get/set */ - mddb_cfg_loc_t c_locator; /* device specific info */ -} mddb_config_t; - -#define c_subcmd c_spare[0] -/* - * Subcommands. - */ -#define MDDB_CONFIG_ABS 1 /* treat c_id as abs index */ - -typedef struct mddb_optloc { - int recid; /* really mddb_recid_t */ - int li[2]; -} mddb_optloc_t; - -typedef struct md_gs_stat_parm { - set_t gs_setno; - uint_t gs_status; - md_error_t gs_mde; -} md_gs_stat_parm_t; - -typedef struct { - int setno; - int owns_set; -} mddb_ownset_t; - -typedef enum md_rename_operation_t { - MDRNOP_UNK = 0, MDRNOP_RENAME, MDRNOP_EXCHANGE -} md_renop_t; - -typedef struct md_rename { - md_error_t mde; - md_renop_t op; - int revision; - uint_t flags; - struct { - minor_t mnum; - key_t key; - } from, to; -} md_rename_t; - -typedef struct md_regen_param { - MD_DRIVER - md_error_t mde; - minor_t mnum; /* Unit to regenerate parity for */ -} md_regen_param_t; - -/* Base ioctl's defined here */ -#define MDIOC ('V' << 8) -#define ISMDIOC(c) (((c) >> 8) == 'V') - -#define MD_IOCSET (MDIOC|0) /* set config (metainit) */ -#define MD_IOCRESET (MDIOC|1) /* reset config (metaclear) */ -#define MD_IOCGET (MDIOC|2) /* get config (metastat) */ -#define MD_IOCGROW (MDIOC|3) /* grow config (dyn concat) */ -#define MD_IOCCHANGE (MDIOC|4) /* change config (metaparam) */ -#define MD_IOCSET_NM (MDIOC|5) /* set device name */ -#define MD_IOCGET_NM (MDIOC|6) /* get device name */ -#define MD_IOCREM_NM (MDIOC|7) /* remove device name */ -#define MD_IOCGET_DRVNM (MDIOC|8) /* get driver name */ -#define MD_IOCGET_NEXT (MDIOC|9) /* get next unit id */ -#define MD_IOCGET_DEVS (MDIOC|10) /* get device list */ -#define MD_DB_NEWDEV (MDIOC|11) /* add a db replica */ -#define MD_DB_USEDEV (MDIOC|12) /* patch in a db location */ -#define MD_DB_GETDEV (MDIOC|13) /* get a db replica */ -#define MD_DB_DELDEV (MDIOC|14) /* remove a db replica */ -#define MD_DB_ENDDEV (MDIOC|15) /* get db replica and size */ -#define MD_DB_GETDRVNM (MDIOC|16) /* get db replica driver name */ -#define MD_HALT (MDIOC|17) /* halt driver (metahalt) */ -#define MD_GRAB_SET (MDIOC|18) -#define MD_RELEASE_SET (MDIOC|20) /* release a set */ -#define MD_IOCSETSYNC (MDIOC|21) -#define MD_IOCGETSYNC (MDIOC|22) -#define MD_IOCOFFLINE (MDIOC|23) -#define MD_IOCONLINE (MDIOC|24) -#define MD_IOCATTACH (MDIOC|25) -#define MD_IOCDETACH (MDIOC|26) -#define MD_IOCREPLACE (MDIOC|27) -#define MD_DB_USERREQ (MDIOC|28) -#define MD_DB_GETOPTLOC (MDIOC|29) /* get locators for opt resync rec. */ -#define MD_DB_OWNSET (MDIOC|30) /* Does caller own the set */ -#define MD_IOCGETNSET (MDIOC|31) /* Get the config'd number sets */ -#define MD_IOCNXTKEY_NM (MDIOC|32) /* get next key from namespace */ -#define MD_DB_NEWSIDE (MDIOC|33) /* add another side to the db replica */ -#define MD_DB_DELSIDE (MDIOC|34) /* delete a side from the db replica */ -#define MD_IOCGVERSION (MDIOC|35) /* get the driver version */ -#define MD_IOCSET_FLAGS (MDIOC|36) /* set the userflags of a metadevice */ -#define MD_IOCGETNUNITS (MDIOC|37) /* Get the config'd number units */ -#define MD_IOCNOTIFY (MDIOC|38) /* notification */ -#define MD_IOCRENAME (MDIOC|39) /* (Ex)Change/Rename unit identities */ -#define MD_IOCISOPEN (MDIOC|40) /* Is metadevice open? */ -#define MD_IOCSETREGEN (MDIOC|41) /* regen ioctl for raid */ -#define MD_MED_GET_LST (MDIOC|42) /* Get the mediator list */ -#define MD_MED_SET_LST (MDIOC|43) /* Set the mediator list */ -#define MD_MED_UPD_MED (MDIOC|44) /* Have the kernel push mediator data */ -#define MD_MED_GET_NMED (MDIOC|45) /* Get the max number of mediators */ -#define MD_MED_GET_TLEN (MDIOC|46) /* Get the mediator transport tbl len */ -#define MD_MED_GET_T (MDIOC|47) /* Get the mediator transport tbl */ -#define MD_MED_SET_T (MDIOC|48) /* Set the mediator transport tbl */ -#define MD_MED_GET_TAG (MDIOC|49) /* Get the list of data tags */ -#define MD_MED_USE_TAG (MDIOC|50) /* Use one of the data tags */ -#define MD_MED_ACCEPT (MDIOC|51) /* Accept 1/2 n 1/2 */ -#define MD_GET_SETSTAT (MDIOC|52) /* Get the s_status for a set */ -#define MD_SET_SETSTAT (MDIOC|53) /* Set the s_status for a set */ -#define MD_IOCPROBE_DEV (MDIOC|54) /* Force pseudo opens for metadevices */ -#define MD_IOCGET_DID (MDIOC|55) /* Get device id */ -#define MD_IOCUPD_NM (MDIOC|56) /* Update namespace */ -#define MD_DB_SETDID (MDIOC|57) /* Set device id for a locator block */ -#define MD_IOCUPD_LOCNM (MDIOC|58) /* update locator namespace */ -#define MD_SETNMDID (MDIOC|59) /* update namespace devid */ -#define MD_IOCDID_STAT (MDIOC|60) /* get invalid device id's */ -#define MD_UPGRADE_STAT (MDIOC|61) /* get upgrade status information */ -#define MD_IOCGET_NUM (MDIOC|62) /* get number of devs and devs */ -#define MD_IOCGET_TSTATE (MDIOC|63) /* get ui_tstate for metastat */ -#define MD_SETMASTER (MDIOC|64) -#define MD_MN_SET_DOORH (MDIOC|65) /* MN: set the doorhandle */ -#define MD_MN_OPEN_TEST (MDIOC|66) /* MN: check / (un)lock a md */ -#define MD_MN_SET_MM_OWNER (MDIOC|67) /* Set mirror owner */ -#define MD_MN_SET_NODEID (MDIOC|68) /* Set this node's id */ -#define MD_MN_SET_STATE (MDIOC|69) /* Set mirror state */ -#define MD_MN_SUSPEND_WRITES (MDIOC|70) /* Blocks writes */ -#define MD_MN_GET_MM_OWNER (MDIOC|71) /* Get mirror owner */ -#define MD_IOCGUNIQMSGID (MDIOC|72) /* create a unique message ID */ -#define MD_MN_MM_OWNER_STATUS (MDIOC|73) /* Return status of SET_MM_OWNER */ -#define MD_MN_ALLOCATE_HOTSPARE (MDIOC|74) /* Allocate hotspare */ -#define MD_MN_SUBMIRROR_STATE (MDIOC|75) /* Submirror state change */ -#define MD_MN_RESYNC (MDIOC|76) /* Resync ioctl */ -#define MD_MN_SUSPEND_SET (MDIOC|77) /* suspend IO's for a MN diskset */ -#define MD_MN_RESUME_SET (MDIOC|78) /* resume IO's for a MN diskset */ -#define MD_MN_MDDB_PARSE (MDIOC|79) /* Re-parse portion of MNset mddb */ -#define MD_MN_MDDB_BLOCK (MDIOC|80) /* Block parse or record changes */ -#define MD_MN_MDDB_OPTRECFIX (MDIOC|81) /* Fix optimized record failure */ -#define MD_MN_SET_CAP (MDIOC|82) /* set capability, eg ABR, DMR */ -#define MD_MN_CHK_WRT_MDDB (MDIOC|83) /* New master checks/writes mddb */ -#define MD_MN_SET_SETFLAGS (MDIOC|84) /* Set/reset set flags */ -#define MD_MN_GET_SETFLAGS (MDIOC|85) /* Gets set flags */ -#define MD_IOCGET_DIDMIN (MDIOC|94) /* get the minor name for a devid */ -#define MD_IOCIMP_LOAD (MDIOC|95) /* load the import replicas */ -#define MD_IOCSET_DID (MDIOC|96) /* set the devid of a disk */ -#define MD_MN_GET_MIRROR_STATE (MDIOC|97) /* Get the mirror state MN only */ -#define MD_MN_DB_USERREQ (MDIOC|98) /* MN MT-version of USERREQ */ -#define MD_IOCMAKE_DEV (MDIOC|99) /* create device node for unit */ -#define MD_MN_SET_COMMD_RUNNING (MDIOC|100) /* Commd running or exiting */ -#define MD_MN_COMMD_ERR (MDIOC|101) /* get a message out */ -#define MD_MN_SETSYNC (MDIOC|102) /* multi-threaded MD_IOCSETSYNC */ -#define MD_MN_POKE_HOTSPARES (MDIOC|103) /* poke hotspares */ -#define MD_DB_LBINITTIME (MDIOC|104) /* get the lb_inittime */ -#define MD_IOCGET_HSP_NM (MDIOC|105) /* get hsp entry from namespace */ -#define MD_IOCREM_DEV (MDIOC|106) /* remove device node for unit */ -#define MD_IOCUPDATE_NM_RR_DID (MDIOC|107) /* update remotely repl did in NM */ -#define MD_MN_RR_DIRTY (MDIOC|108) /* Mark RR range as dirty */ -#define MD_MN_RR_CLEAN (MDIOC|109) /* Clean RR bits from bitmap */ - -#define MDIOC_MISC (MDIOC|128) /* misc module base */ -/* Used in DEBUG_TEST code */ -#define MD_MN_CHECK_DOOR1 (MDIOC|126) /* MN: test door to master */ -#define MD_MN_CHECK_DOOR2 (MDIOC|127) /* MN: test door master-broadcast */ - -#define NODBNEEDED(c) ((c) == MD_IOCNOTIFY) - -typedef struct md_resync_ioctl { - MD_DRIVER - md_error_t mde; - minor_t ri_mnum; /* mirror to sync */ - diskaddr_t ri_copysize; /* The size of the copy buffer */ - int ri_zerofill; /* Zerofill on lec read error */ - int ri_percent_done; /* percent done current phase */ - int ri_percent_dirty; - md_riflags_t ri_flags; -} md_resync_ioctl_t; - -typedef struct md_rrsize { - MD_DRIVER - md_error_t mde; /* error return */ - minor_t mnum; /* unit # to get */ - ulong_t rr_num; /* Number of resync regions */ - ulong_t rr_blksize; /* Blocksize of regions */ -} md_rrsize_t; - -typedef enum replace_cmd { - REPLACE_COMP, ENABLE_COMP, FORCE_REPLACE_COMP, FORCE_ENABLE_COMP -} replace_cmd_t; - -typedef struct replace_params { - MD_DRIVER - md_error_t mde; - replace_cmd_t cmd; /* what to do */ - minor_t mnum; /* mirror to act upon */ - md_dev64_t old_dev; /* enable/replace use this */ - md_dev64_t new_dev; /* replace only uses this */ - mdkey_t new_key; /* replace only uses this */ - diskaddr_t start_blk; /* start block of new device */ - int has_label; /* has label flag of new device */ - diskaddr_t number_blks; /* # of blocks of new device */ - uint_t options; /* misc options, see MDIOCTL_* below */ -} replace_params_t; - -typedef struct md_i_off_on { - MD_DRIVER - md_error_t mde; - minor_t mnum; - md_dev64_t submirror; - int force_offline; -} md_i_off_on_t; - -typedef struct md_att_struct { - MD_DRIVER - md_error_t mde; /* Normal error */ - minor_t mnum; - mdkey_t key; /* namespace key of sm */ - md_dev64_t submirror; /* The device to attach */ - uint_t options; /* passed in from the command */ -} md_att_struct_t; - -/* possible values for options above */ -#define MDIOCTL_DRYRUN 0x0001 /* Only check if operation possible */ -#define MDIOCTL_NO_RESYNC_RAID 0x0002 /* if cluster replace we don't */ - /* want to resync */ - -typedef struct md_detach_params { - MD_DRIVER - md_error_t mde; - minor_t mnum; /* mirror to act upon */ - md_dev64_t submirror; - int force_detach; -} md_detach_params_t; - -/* - * Structure for accessing the DB from user land. - */ -typedef struct mddb_userreq { - md_error_t ur_mde; - mddb_usercmd_t ur_cmd; - set_t ur_setno; - mddb_type_t ur_type; - uint_t ur_type2; - mddb_recid_t ur_recid; - mddb_recstatus_t ur_recstat; - int ur_size; - uint64_t ur_data; /* Pointer to user data */ -} mddb_userreq_t; - -/* - * Ioctl structure for MD_IOCISOPEN - */ -typedef struct md_isopen { - md_error_t mde; - md_dev64_t dev; - int isopen; -} md_isopen_t; - -/* - * Ioctl structure for MD_MN_OPEN_TEST - * md_clu_open stands for md check/lock/unlock - * Can't use MD_IOCISOPEN, because it's a contracted inteface. - */ -typedef struct md_clu_open { - md_error_t clu_mde; - md_dev64_t clu_dev; - enum { MD_MN_LCU_CHECK = 0, - MD_MN_LCU_LOCK, - MD_MN_LCU_UNLOCK } clu_cmd; - int clu_isopen; -} md_clu_open_t; - -/* - * Structure to push the message out from commd - * MAXPATHLEN macro is being overloaded to represent - * the line size of 1024 characters. i.e. no path - * is being passed. - */ -typedef struct md_mn_commd_err { - int size; - uint64_t md_message; /* pointer to array of chars */ -} md_mn_commd_err_t; - -/* - * Ioctl structure for MD_IOCPROBE_DEV - */ - -#define TESTNAME_LEN 32 - -#define PROBE_SEMA(p) p->probe_sema -#define PROBE_MX(p) p->probe_mx - -/* - * To categorize user/kernel structures md_probedev is split into two, - * one used by user and the other by kernel, thereby hiding the semaphore - * /mutex pointer members from user, which should be the appropriate one. - */ - -typedef struct md_probedev { - MD_DRIVER - md_error_t mde; /* return error status */ - int nmdevs; /* number of metadevices */ - char test_name[TESTNAME_LEN]; - uint64_t mnum_list; /* pointer to array of minor numbers */ -} md_probedev_t; - -typedef struct md_probedev_impl { - ksema_t *probe_sema; - kmutex_t *probe_mx; - md_probedev_t probe; -} md_probedev_impl_t; - -/* - * Ioctl structure for MD_MN_GET_MIRROR_STATE - */ -typedef struct md_mn_get_mir_state { - MD_DRIVER - minor_t mnum; /* Unit to obtain submirror info from */ -} md_mn_get_mir_state_t; - -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack() -#endif -/* - * Per set flags, stored in md_set[n].s_status - */ -#define MD_SET_HALTED 0x00000001 /* Set is shut down */ -#define MD_SET_SNARFED 0x00000002 /* incores built for set db recs */ -#define MD_SET_SNARFING 0x00000004 /* incores being built for set */ -#define MD_SET_STALE 0x00000008 /* set database not correct */ -#define MD_SET_NM_LOADED 0x00000010 /* set namespace is loaded */ -#define MD_SET_TAGDATA 0x00000020 /* tagged data detected */ -#define MD_SET_ACCOK 0x00000040 /* Accept data is possible */ -#define MD_SET_TOOFEW 0x00000080 /* not enough replicas */ -#define MD_SET_USETAG 0x00000100 /* A tag is selected, use it */ -#define MD_SET_ACCEPT 0x00000200 /* User chose accept 50/50 mode */ -#define MD_SET_OWNERSHIP 0x00000400 /* Set is owned */ -#define MD_SET_BADTAG 0x00000800 /* DT is not valid */ -#define MD_SET_CLRTAG 0x00001000 /* Clear the tags */ -#define MD_SET_KEEPTAG 0x00002000 /* Keep the tag */ -#define MD_SET_PUSHLB 0x00004000 /* Indicate a LB push is needed */ -#define MD_SET_MNSET 0x00008000 /* Set is a multinode diskset */ -#define MD_SET_DIDCLUP 0x00010000 /* Set has cleaned up devids */ -#define MD_SET_MNPARSE_BLK 0x00020000 /* Do not send parse msgs */ -#define MD_SET_MN_NEWMAS_RC 0x00040000 /* Is new master during reconfig */ -#define MD_SET_MN_START_RC 0x00080000 /* Start step executed for set */ -#define MD_SET_IMPORT 0x00100000 /* Indicate set is importing */ -#define MD_SET_MN_MIR_STATE_RC 0x00200000 /* Mirror state gotten for set */ -#define MD_SET_HOLD 0x00400000 /* Hold set during release */ -#define MD_SET_REPLICATED_IMPORT 0x00800000 /* Set importing RC disk */ - -#define MD_MNSET_SETNO(setno) (md_set[setno].s_status & MD_SET_MNSET) - -/* - * See meta_prbits() in SUNWmd/lib/libmeta/meta_print.c for a description of - * the way this is used - */ -#define MD_SET_STAT_BITS "\020\001HALTED\002SNARFED\003SNARFING\004STALE" \ - "\005NM_LOADED\006TAGDATA\007ACCOK\010TOOFEW" \ - "\011USETAG\012ACCEPT\013OWNERSHIP\014BADTAG" \ - "\015CLRTAG\016KEEPTAG\017PUSHLB\020MNSET" \ - "\021DIDCLUP\022MNPARSE_BLK\023MN_NEWMAS_RC" \ - "\024MN_START_RC\025IMPORT\026MIR_STATE_RC" \ - "\027HOLD\030REPLICATED_IMPORT" - - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS__MDIO_H */ diff --git a/usr/src/uts/common/sys/lvm/mdiox.x b/usr/src/uts/common/sys/lvm/mdiox.x deleted file mode 100644 index 5e5c890e5318..000000000000 --- a/usr/src/uts/common/sys/lvm/mdiox.x +++ /dev/null @@ -1,2101 +0,0 @@ -%/* -% * CDDL HEADER START -% * -% * The contents of this file are subject to the terms of the -% * Common Development and Distribution License (the "License"). -% * You may not use this file except in compliance with the License. -% * -% * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -% * or http://www.opensolaris.org/os/licensing. -% * See the License for the specific language governing permissions -% * and limitations under the License. -% * -% * When distributing Covered Code, include this CDDL HEADER in each -% * file and include the License file at usr/src/OPENSOLARIS.LICENSE. -% * If applicable, add the following below this CDDL HEADER, with the -% * fields enclosed by brackets "[]" replaced with your own identifying -% * information: Portions Copyright [yyyy] [name of copyright owner] -% * -% * CDDL HEADER END -% */ -% -%/* -% * Copyright 2007 Sun Microsystems, Inc. All rights reserved. -% * Use is subject to license terms. -% */ -% -%#pragma ident "%Z%%M% %I% %E% SMI" -% -%/* -% * MDD interface definitions -% */ - -%/* pick up multihost ioctl definitions */ -%#include -%/* get the basic XDR types */ -%#include -%/* pick up device id information */ -%#include - -%#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -%/* -% * NOTE: can't change these structures so make sure they are packed -% * in the kernel. -% */ -%#pragma pack(4) -%#endif -% -%/* -% * fundamental types -% */ -% -%/* -% * -% * NOTE: THESE ARE ON-DISK VALUES DO NOT CHANGE THE ORDER -% */ -enum mddb_type_t { - MDDB_ALL, - MDDB_NM_HDR, - MDDB_NM, - MDDB_SHR_NM, - MDDB_VTOC, - MDDB_USER, - MDDB_DID_NM_HDR, - MDDB_DID_NM, - MDDB_DID_SHR_NM, - MDDB_EFILABEL, - MDDB_FIRST_MODID = 1000 -}; - -% -%/* -% * Configuration commands. -% */ -enum mddb_cfgcmd_t { - MDDB_USEDEV, - MDDB_NEWDEV, - MDDB_DELDEV, - MDDB_GETDEV, - MDDB_ENDDEV, - MDDB_GETDRVRNAME, - MDDB_RELEASESET, - MDDB_NEWSIDE, - MDDB_DELSIDE, - MDDB_SETDID, - MDDB_LBINITTIME -}; - -% -%/* -% * Return codes from DB record operations. -% */ -enum mddb_recstatus_t { - MDDB_NORECORD, - MDDB_NODATA, - MDDB_OK, - MDDB_STALE -}; - -% -%/* -% * Commands for DB accesses from user land. -% */ -enum mddb_usercmd_t { - MD_DB_GETNEXTREC, - MD_DB_COMMIT_ONE, - MD_DB_COMMIT_MANY, - MD_DB_GETDATA, - MD_DB_DELETE, - MD_DB_CREATE, - MD_DB_GETSTATUS, - MD_DB_GETSIZE, - MD_DB_SETDATA, - MD_DB_MAKEID -}; - -% -%/* -% * MDDB_USER record subtypes, set records and drive records. -% * Node records (NR) used for Multinode Disksets. -% * The MDDB_UR_SR record subtype is associated with the structures -% * md_set_record and md_mnset_record. -% * The MDDB_UR_DR record subtype is associated with the structure -% * md_drive_record. -% * The MDDB_NR_DR record subtype is associated with the structure -% * md_mnnode_record. -% * The MDDB_UR_LR record subtype is associated with the structure -% * md_mn_changelog_record_t -% */ -enum mddb_userrec_t { - MDDB_UR_ALL, - MDDB_UR_SR, - MDDB_UR_DR, - MDDB_UR_NR, - MDDB_UR_LR -}; - -% -%/* -% * MDDB_USER record get commands. -% */ -enum md_ur_get_cmd_t { - MD_UR_GET_NEXT, - MD_UR_GET_WKEY -}; - -% -%/* -% * These are the options for mddb_createrec() -% */ -enum md_create_rec_option_t { - MD_CRO_NOOPT = 0x000, - MD_CRO_OPTIMIZE = 0x001, - MD_CRO_32BIT = 0x002, - MD_CRO_64BIT = 0x004, - MD_CRO_STRIPE = 0x008, - MD_CRO_MIRROR = 0x010, - MD_CRO_RAID = 0x020, - MD_CRO_SOFTPART = 0x040, - MD_CRO_TRANS_MASTER = 0x080, - MD_CRO_TRANS_LOG = 0x100, - MD_CRO_HOTSPARE = 0x200, - MD_CRO_HOTSPARE_POOL = 0x400, - MD_CRO_CHANGELOG = 0x800, - MD_CRO_FN = 0x1000 -}; - -% -%/* -% * This SKEW value is used to skew the sideno of -% * the share device names that are put into each -% * local set's namespace. This will prevent the -% * wrong name to be picked up via a devno, when -% * we really wanted a local device name. -% */ -const SKEW = 1; - -#ifdef RPC_XDR -% -%/* Start - Avoid duplicate definitions, but get the xdr calls right */ -%#if 0 -#include "meta_arr.x" -%#endif /* 0 */ -%/* End - Avoid duplicate definitions, but get the xdr calls right */ -% -#endif /* RPC_XDR */ - -const MD_DRIVE_RECORD_REVISION = 0x00010000; - -#ifdef RPC_HDR -% -%#define MD_DR_ADD 0x00000001U -%#define MD_DR_DEL 0x00000002U -%#define MD_DR_FIX_MB_DID 0x10000000U /* Fix MB */ -%#define MD_DR_FIX_LB_NM_DID 0x20000000U /* Fix LB and namespaces */ -%#define MD_DR_UNRSLV_REPLICATED 0x40000000U -%#define MD_DR_OK 0x80000000U -#endif /* RPC_HDR */ - -#if !defined(_KERNEL) -struct md_drive_record { - u_int dr_revision; /* revision level */ - u_int dr_flags; /* state flags */ - mddb_recid_t dr_selfid; /* db record id */ - md_timeval32_t dr_ctime; /* creation timestamp */ - u_long dr_genid; /* generation id */ - md_drive_record *dr_next; /* next ptr (Incore) */ - mddb_recid_t dr_nextrec; /* next record id */ - int dr_dbcnt; /* # of replica's */ - int dr_dbsize; /* replica size */ - mdkey_t dr_key; /* namespace key */ -}; -#else /* _KERNEL */ -struct md_drive_record { - u_int dr_revision; /* revision level */ - u_int dr_flags; /* state flags */ - mddb_recid_t dr_selfid; /* db record id */ - md_timeval32_t dr_ctime; /* creation timestamp */ - u_int dr_genid; /* generation id */ - u_int dr_next; /* next ptr (Incore) */ - mddb_recid_t dr_nextrec; /* next record id */ - int dr_dbcnt; /* # of replica's */ - int dr_dbsize; /* replica size */ - mdkey_t dr_key; /* namespace key */ -}; -#endif /* !_KERNEL */ - -#ifdef RPC_HDR -%/* -% * Actions that can be taken on a node record. -% * Used with routine upd_nr_flags. -% */ -% -%#define MD_NR_JOIN 0x00000001U /* Turn on JOIN flag */ -%#define MD_NR_WITHDRAW 0x00000002U /* Turn off JOIN flag */ -%#define MD_NR_SET 0x00000004U /* Set node flags in nodelist */ -%#define MD_NR_DEL 0x00000008U /* reset OK flag, set DEL */ -%#define MD_NR_OK 0x80000000U /* set OK flag; reset ADD */ -#endif /* RPC_HDR */ - -struct md_mnnode_record { - u_int nr_revision; /* revision level */ - u_int nr_flags; /* state flags */ - mddb_recid_t nr_selfid; /* db record id */ - md_timeval32_t nr_ctime; /* creation timestamp */ - u_long nr_genid; /* generation id */ - md_mnnode_record *nr_next; /* next ptr (Incore) */ - mddb_recid_t nr_nextrec; /* next node rec id */ - u_int nr_nodeid; /* node id */ - md_node_nm_t nr_nodename; /* node name */ - -}; - -const MD_MNNODE_RECORD_REVISION = 0x00000100; - -const MD_SET_RECORD_REVISION = 0x00010000; - -#ifdef RPC_HDR -% -%#define MD_SR_ADD 0x00000001U -%#define MD_SR_DEL 0x00000002U -%#define MD_SR_CHECK 0x00000004U -%#define MD_SR_CVT 0x00000008U -%#define MD_SR_LOCAL 0x00000010U -%#define MD_SR_UNRSLV_REPLICATED 0x08000000U -%#define MD_SR_MB_DEVID 0x10000000U -%#define MD_SR_AUTO_TAKE 0x20000000U -%#define MD_SR_MN 0x40000000U -%#define MD_SR_OK 0x80000000U -%#define MD_SR_STATE_FLAGS (MD_SR_ADD | \ -% MD_SR_DEL | \ -% MD_SR_CHECK | \ -% MD_SR_CVT | \ -% MD_SR_UNRSLV_REPLICATED | \ -% MD_SR_OK) -#endif /* RPC_HDR */ - -#if !defined(_KERNEL) -struct md_set_record { - u_int sr_revision; /* revision level */ - u_int sr_flags; /* state flags */ - mddb_recid_t sr_selfid; /* db record id */ -#ifdef RPC_HDR - md_set_record *sr_next; /* next ptr (Incore) */ -#endif /* RPC_HDR */ - set_t sr_setno; /* set number */ - md_set_nm_t sr_setname; /* setname */ - md_timeval32_t sr_ctime; /* creation timestamp */ - u_long sr_genid; /* generation id */ - md_node_nm_arr_t sr_nodes; /* array of nodenames */ - md_drive_record *sr_drivechain; /* dr list (Incore) */ - mddb_recid_t sr_driverec; /* first dr record id */ - mhd_mhiargs_t sr_mhiargs; /* MH ioctl timeouts */ - md_h_arr_t sr_med; /* Mediator hosts */ -}; -#else /* _KERNEL */ -struct md_set_record { - u_int sr_revision; /* revision level */ - u_int sr_flags; /* state flags */ - mddb_recid_t sr_selfid; /* db record id */ -#ifdef RPC_HDR - u_int sr_next; /* next ptr (Incore) */ -#endif /* RPC_HDR */ - set_t sr_setno; /* set number */ - md_set_nm_t sr_setname; /* setname */ - md_timeval32_t sr_ctime; /* creation timestamp */ - u_int sr_genid; /* generation id */ - md_node_nm_arr_t sr_nodes; /* array of nodenames */ - u_int sr_drivechain; /* dr list (Incore) */ - mddb_recid_t sr_driverec; /* first dr record id */ - mhd_mhiargs_t sr_mhiargs; /* MH ioctl timeouts */ - md_h_arr_t sr_med; /* Mediator hosts */ -}; -#endif /* !_KERNEL */ - -struct md_mnset_record { - u_int sr_revision; /* revision level */ - u_int sr_flags; /* state flags */ - mddb_recid_t sr_selfid; /* db record id */ -#ifdef RPC_HDR - md_set_record *sr_next; /* next ptr (Incore) */ -#endif /* RPC_HDR */ - set_t sr_setno; /* set number */ - md_set_nm_t sr_setname; /* setname */ - md_timeval32_t sr_ctime; /* creation timestamp */ - u_long sr_genid; /* generation id */ - md_node_nm_arr_t sr_nodes_bw_compat; /* for compat with */ - /* md_set_record, */ - /* first node always */ - /* this node */ - md_drive_record *sr_drivechain; /* dr list (Incore) */ - mddb_recid_t sr_driverec; /* first dr record id */ - mhd_mhiargs_t sr_mhiargs; /* MH ioctl timeouts */ - md_h_arr_t sr_med; /* Mediator hosts */ - md_mnnode_record *sr_nodechain; /* node list (incore) */ - mddb_recid_t sr_noderec; /* first node rec id */ - md_node_nm_t sr_master_nodenm; /* Master nm (incore) */ - u_int sr_master_nodeid; /* Master id (incore) */ - u_int sr_mddb_min_size; /* min size of mddb */ -}; - -#ifdef RPC_HDR -% -%#define MD_SETOWNER_NO 0 -%#define MD_SETOWNER_YES 1 -%#define MD_SETOWNER_NONE 2 -#endif /* RPC_HDR */ - -% -%/* Gate key type */ -struct md_setkey_t { - string sk_host<>; - set_t sk_setno; - string sk_setname<>; - md_timeval32_t sk_key; -#ifdef RPC_HDR - struct md_setkey_t *sk_next; -#endif /* RPC_HDR */ -}; - -% -%/* metadevice ID */ -typedef minor_t unit_t; - -% -%/* component ID */ -struct comp_t { - minor_t mnum; /* associated metadevice */ - md_dev64_t dev; -}; - -% -%/* hotspare pool ID */ -typedef u_int hsp_t; - -#ifdef RPC_HDR -% -%#define MD_HSP_NONE ((hsp_t)~0U) -#endif /* RPC_HDR */ - -% -%/* hotspare ID */ -struct hs_t { - hsp_t hsp; /* associated hotspare pool */ - md_dev64_t dev; /* device ID */ -}; - -% -%/* mnum or hsp */ -typedef u_int minor_or_hsp_t; - -% -%/* -% * name service stuff -% */ -const MD_MAXPREFIX = 127; -% -%#define MD_MAX_CTDLEN 64 - -struct md_name_prefix { - u_char pre_len; - char pre_data[MD_MAXPREFIX]; -}; - -const MD_MAXSUFFIX = 40; -% -struct md_name_suffix { - u_char suf_prefix; - u_char suf_len; - char suf_data[MD_MAXSUFFIX]; -}; - -struct md_splitname { - md_name_prefix sn_prefix; - md_name_suffix sn_suffix; -}; - -#ifdef RPC_HDR -% -%#define SPN_PREFIX(spn) ((spn)->sn_prefix) -%#define SPN_SUFFIX(spn) ((spn)->sn_suffix) -#endif /* RPC_HDR */ - -% -%/* -% * Number of bits to represent a setno -% * this gives us all info to define masks and shifts ... -% * Also used for minor #, hsp id, recid mask and shifts. -% */ -const MD_BITSSET = 5; -const MD_DEFAULTSETS = 4; -% -#ifdef RPC_HDR -% -%#define MD_MAXSETS (1 << MD_BITSSET) -%#define MD_SETMASK (MD_MAXSETS - 1) -#endif /* RPC_HDR */ - -% -%/* -% * Define a file descriptor for lockfd -% * when the lock is not held. -% */ -const MD_NO_LOCK = -2; - -% -%/* -% * accumulated setname -% */ -struct mdsetname_t { - string setname<>; /* logical name */ - set_t setno; /* set number */ -#ifdef RPC_HDR - struct md_set_desc *setdesc; /* Cache set/drive desc */ - int lockfd; /* used by meta_lock_* */ -#endif /* RPC_HDR */ -}; - -struct mdsetnamelist_t { - mdsetnamelist_t *next; - mdsetname_t *sp; -}; - -% -%/* -% * device name -% */ -#ifdef RPC_HDR -%#define MD_FULLNAME_ONLY 0x0 -%#define MD_BASICNAME_OK 0x1 -%#define MD_BYPASS_DAEMON 0x2 -% -%#define MD_SLICE0 0 -%#define MD_SLICE6 6 -%#define MD_SLICE7 7 -% -%#define MD_MAX_PARTS 17 -#endif /* RPC_HDR */ - -struct mdname_t { -#ifdef RPC_HDR - struct mddrivename_t *drivenamep; /* back pointer to drive */ -#endif /* RPC_HDR */ - string cname<>; /* cannonical name */ - string bname<>; /* block name */ - string rname<>; /* raw name */ - string devicesname<>; /* /devices name (or NULL) */ - string minor_name<>; /* minor name with respect to devid */ - md_dev64_t dev; /* major/minor (or NODEV64) */ -#ifdef RPC_HDR - mdkey_t key; /* namespace key (or MD_KEYBAD) */ -#endif /* RPC_HDR */ - diskaddr_t end_blk; /* end of database replicas (or -1) */ - diskaddr_t start_blk; /* usable start block (or -1) */ -}; - -%/* name structure (old style) */ -struct o_mdname_t { -#ifdef RPC_HDR - struct o_mddrivename_t *drivenamep; /* back pointer to drive */ -#endif /* RPC_HDR */ - string cname<>; /* cannonical name */ - string bname<>; /* block name */ - string rname<>; /* raw name */ - string devicesname<>; /* /devices name (or NULL) */ - dev_t dev; /* major/minor (or NODEV64) */ -#ifdef RPC_HDR - mdkey_t key; /* namespace key (or MD_KEYBAD) */ -#endif /* RPC_HDR */ - daddr_t end_blk; /* end of database replicas (or -1) */ - daddr_t start_blk; /* usable start block (or -1) */ -}; - -struct mdnamelist_t { - mdnamelist_t *next; - mdname_t *namep; -}; - -% -%/* -% * drive name -% */ -%/* name types */ -enum mdnmtype_t { - MDT_UNKNOWN = 0, /* unknown type */ - MDT_ACCES, /* could not access device */ - MDT_META, /* metadevice name */ - MDT_COMP, /* regular device name */ - MDT_FAST_META, /* metadevice name (partial) */ - MDT_FAST_COMP /* regular device name (partial) */ -}; - -%/* metadevice types */ -enum md_types_t { - MD_UNDEFINED = 0, - MD_DEVICE, - MD_METAMIRROR, - MD_METATRANS, - MD_METARAID, - MD_METASP -}; - -%/* SVM general device types -% * -% * META_DEVICE refers to any SVM metadevice -% * LOGICAL_DEVICE refers to any underlying physical device -% * HSP_DEVICE refers to a hotspare pool -% * -% * In the past, the device type can be determined via -% * the device name (such as d10, c1t1d1s1). With -% * the friendly name implementation, it is not possible -% * to determine from the device name. In the code, -% * whereever the device type is obvious that type will be -% * used explicitly otherwise 'UNKNOWN' will be used and -% * specific SVM lookup routines will be called to determine -% * the device type associated with the name. -% */ -enum meta_device_type_t { - UNKNOWN = 0, - META_DEVICE, - HSP_DEVICE, - LOGICAL_DEVICE -}; - -#ifdef RPC_HDR -% -%/* misc module names */ -%/* When modifying this list also update meta_names in md_names.c */ -%#define MD_STRIPE "md_stripe" -%#define MD_MIRROR "md_mirror" -%#define MD_TRANS "md_trans" -%#define MD_HOTSPARES "md_hotspares" -%#define MD_RAID "md_raid" -%#define MD_VERIFY "md_verify" -%#define MD_SP "md_sp" -%#define MD_NOTIFY "md_notify" -#endif /* RPC_HDR */ - -%/* generic device info */ -struct mdgeom_t { - u_int ncyl; - u_int nhead; - u_int nsect; - u_int rpm; - u_int write_reinstruct; - u_int read_reinstruct; - u_int blk_sz; -}; - -%/* generic device info (old style) */ -struct o_mdgeom_t { - u_int ncyl; - u_int nhead; - u_int nsect; - u_int rpm; - u_int write_reinstruct; - u_int read_reinstruct; -}; - -struct mdcinfo_t { - char cname[16]; /* controller driver name */ - mhd_ctlrtype_t ctype; /* controller type */ - u_int cnum; /* controller instance */ - u_int tray; /* SSA100 tray */ - u_int bus; /* SSA100 bus */ - u_longlong_t wwn; /* SSA100 World Wide Name */ - char dname[16]; /* disk driver name */ - u_int unit; /* disk instance */ - u_int maxtransfer; /* max I/O size (in blocks) */ -}; - -struct mdpart_t { - diskaddr_t start; /* start block */ - diskaddr_t size; /* size of partition (in blocks) */ - u_short tag; /* ID tag of partition */ - u_short flag; /* permission flags */ - diskaddr_t label; /* size of disk label (or 0) */ -}; - -%/* partition information (old style) */ -struct o_mdpart_t { - daddr_t start; /* start block */ - daddr_t size; /* size of partition (in blocks) */ - u_short tag; /* ID tag of partition */ - u_short flag; /* permission flags */ - daddr_t label; /* size of disk label (or 0) */ -}; - -struct mdvtoc_t { - u_int nparts; - diskaddr_t first_lba; /* for efi devices only */ - diskaddr_t last_lba; /* for efi devices only */ - diskaddr_t lbasize; /* for efi devices only */ - mdpart_t parts[MD_MAX_PARTS]; /* room for i386 too */ - char *typename; /* disk type (or NULL) */ -}; - -%/* vtoc information (old style) */ -struct o_mdvtoc_t { - char *typename; /* disk type (or NULL) */ - u_int nparts; - o_mdpart_t parts[16]; /* room for i386 too */ -}; -% -%/* -% * List of drivename cnames per side, -% * also the driver name, mnum (for slice 7). -% */ -struct mdsidenames_t { - mdsidenames_t *next; - side_t sideno; - minor_t mnum; - string dname<>; - string cname<>; -}; - -struct mddrivename_t { -#ifdef RPC_HDR - /* - * the following string is not used but is left in place so that - * it is not necessary to version the rpc interface that passes - * this structure. - */ - string not_used<>; -#endif /* RPC_HDR */ - string cname<>; /* canonical drive name */ - string rname<>; /* raw name */ - mdnmtype_t type; /* type of drive */ - string devid<>; /* Device Id of the drive */ - int errnum; /* errno for type == MDT_ACCES */ - mdgeom_t geom; /* disk geometry */ - mdcinfo_t cinfo; /* controller info */ - mdvtoc_t vtoc; /* volume table of contents info */ - mdname_t parts<>; /* partitions in drive */ - mdsidenames_t *side_names; /* list of names per side */ - mdkey_t side_names_key; /* key used to store the side names*/ - - string miscname<>; /* metadevice misc name */ -#ifdef RPC_HDR - struct md_common_t *unitp; /* metadevice unit structure */ -#endif /* RPC_HDR */ -}; - -%/* -% * old version of mddrivename_t that contains an old version of mdgeom_t, -% * mdvtoc_t and mdname_t (prefixed _o). -% */ -struct o_mddrivename_t { -#ifdef RPC_HDR - string cachenm<>; /* name used for cache lookups */ -#endif /* RPC_HDR */ - string cname<>; /* canonical drive name */ - string rname<>; /* raw name */ - mdnmtype_t type; /* type of drive */ - int errnum; /* errno for type == MDT_ACCES */ - o_mdgeom_t geom; /* disk geometry (old style) */ - mdcinfo_t cinfo; /* controller info */ - o_mdvtoc_t vtoc; /* vtoc info (old style) */ - o_mdname_t parts<>; /* partitions in drive (old style) */ - mdsidenames_t *side_names; /* list of names per side */ - mdkey_t side_names_key; /* key used to store the side names*/ - - string miscname<>; /* metadevice misc name */ -#ifdef RPC_HDR - struct md_common_t *unitp; /* metadevice unit structure */ -#endif /* RPC_HDR */ -}; -struct mddrivenamelist_t { - mddrivenamelist_t *next; - mddrivename_t *drivenamep; -}; - -% -%/* -% * replica struct -% */ -typedef u_int replica_flags_t; -#ifdef RPC_HDR -% -%#define MDDB_F_EREAD 0x00001 /* a read error occurred */ -%#define MDDB_F_TOOSMALL 0x00002 /* replica is too small to hold db */ -%#define MDDB_F_EFMT 0x00004 /* something is wrong with the data */ -%#define MDDB_F_EDATA 0x00008 /* error in data blocks */ -%#define MDDB_F_EMASTER 0x00010 /* error in master block(s) */ -%#define MDDB_F_ACTIVE 0x00020 /* this replica is currently in use */ -%#define MDDB_F_EWRITE 0x00040 /* a write error occurred */ -%#define MDDB_F_MASTER 0x00080 /* the copy which was used as input */ -%#define MDDB_F_SUSPECT 0x00100 /* replica write ability is suspect */ -%#define MDDB_F_PTCHED 0x00400 /* db location was patched in kernel */ -%#define MDDB_F_IOCTL 0x00800 /* db location passed in from ioctl */ -%#define MDDB_F_GOTTEN 0x01000 /* getdev has been done on this dev */ -%#define MDDB_F_LOCACC 0x02000 /* the locator has been accessed */ -%#define MDDB_F_UP2DATE 0x04000 /* this copy of db is up to date */ -%#define MDDB_F_OLDACT 0x08000 /* this copy was active previously */ -%#define MDDB_F_DELETED 0x10000 /* place holder in empty slot */ -%#define MDDB_F_TAGDATA 0x20000 /* Data is tagged */ -%#define MDDB_F_BADTAG 0x40000 /* Data tag was not valid */ -%#define MDDB_F_NODEVID 0x80000 /* No devid associated with replica */ -% -%/* -% * These are used in de_flags only -% * Do not change these values, they are stored on-disk -% */ -%#define MDDB_F_STRIPE 0x00001 /* record is a stripe record */ -%#define MDDB_F_MIRROR 0x00002 /* record is a mirror record */ -%#define MDDB_F_RAID 0x00004 /* record is a raid record */ -%#define MDDB_F_SOFTPART 0x00008 /* record is a sp record */ -%#define MDDB_F_TRANS_MASTER 0x00010 /* trans master record */ -%#define MDDB_F_TRANS_LOG 0x00020 /* trans log record */ -%#define MDDB_F_HOTSPARE 0x00040 /* hotspare record */ -%#define MDDB_F_HOTSPARE_POOL 0x00080 /* hotspare record */ -%#define MDDB_F_OPT 0x00200 /* optimization record */ -%#define MDDB_F_CHANGELOG 0x00400 /* change log record */ - -%/* used by metadb(1m) for printing */ -%#define MDDB_FLAGS_STRING "RSFDMaWm pc luo tBr" -%#define MDDB_FLAGS_LEN (strlen(MDDB_FLAGS_STRING)) -% -%/* -% * See meta_prbits() in SUNWmd/lib/libmeta/meta_print.c for a description of -% * the way this is used -% */ -%#define MDDB_F_BITNAMES "\020\001EREAD\002TOOSMALL\003EFMT\004EDATA" \ -% "\005EMASTER\006ACTIVE\007EWRITE\010MASTER" \ -% "\011SUSPECT\012OPT\013PTCHED\014IOCTL" \ -% "\015GOTTEN\016LOCACC\017UP2DATE\020OLDACT" \ -% "\021DELETED\022TAGDATA\023BADTAG\024NORELOC" -% -#endif /* RPC_HDR */ - -/* - * Refering to r_blkno and r_nblk: - * A replica will always be smaller than 1 Terabyte, so no need to - * change the ondisk structure to 64 bits. - */ -struct md_replica_t { - mdname_t *r_namep; - replica_flags_t r_flags; - daddr_t r_blkno; - daddr_t r_nblk; - ddi_devid_t r_devid; - char r_driver_name[MD_MAXDRVNM]; - char r_minor_name[MDDB_MINOR_NAME_MAX]; -}; - -struct md_replica_recerr_t { - int r_li; - int r_flags; - daddr32_t r_blkno; - minor_t r_mnum; - char r_driver_name[MD_MAXDRVNM]; -}; - -struct md_replicalist_t { - md_replicalist_t *rl_next; - md_replica_t *rl_repp; -}; - -% -%/* -% * set/drive structs exposed by the library routines -% */ -struct md_drive_desc { - md_timeval32_t dd_ctime; /* creation time */ - u_long dd_genid; /* generation id */ - u_int dd_flags; /* state flags */ - md_drive_desc *dd_next; /* next drive */ - mddrivename_t *dd_dnp; /* drive name ptr */ - int dd_dbcnt; /* # of replicas */ - int dd_dbsize; /* size of replica */ -}; - -% -%/* -% * set/drive structs exposed by the library routines (old style) -% */ -struct o_md_drive_desc { - md_timeval32_t dd_ctime; /* creation time */ - u_long dd_genid; /* generation id */ - u_int dd_flags; /* state flags */ - o_md_drive_desc *dd_next; /* next drive */ - o_mddrivename_t *dd_dnp; /* drive name ptr */ - int dd_dbcnt; /* # of replicas */ - int dd_dbsize; /* size of replica */ -}; - -struct md_mnnode_desc { - md_timeval32_t nd_ctime; /* creation time */ - u_long nd_genid; /* generation id */ - u_int nd_flags; /* state flags */ - md_mnnode_desc *nd_next; /* next node */ - md_mnnode_nm_t nd_nodename; /* name of node */ - u_int nd_nodeid; /* id of node */ - md_mnnode_nm_t nd_priv_ic; /* priv interconnect */ - /* nodename */ -}; - -struct md_set_desc { - md_timeval32_t sd_ctime; /* creation time */ - u_long sd_genid; /* generation id */ - set_t sd_setno; /* set number */ - u_int sd_flags; /* state flags */ - md_node_nm_arr_t sd_nodes; /* array of nodenames */ - /* for !MN_disksets */ - int sd_isown[MD_MAXSIDES]; /* bool for is owner? */ - md_h_arr_t sd_med; /* Mediator hosts */ - md_drive_desc *sd_drvs; /* drive list */ - u_int sd_mn_am_i_master; - u_int sd_mn_numnodes; /* # of nodes in list */ - md_mnnode_desc *sd_nodelist; /* MN node list */ - /* for MN_disksets */ - md_node_nm_t sd_mn_master_nodenm; /* Master node name */ - u_int sd_mn_master_nodeid; /* Master node id */ - md_mnnode_desc *sd_mn_mynode; /* shortcut to me */ - md_mnnode_desc *sd_mn_masternode; /* shortcut to master */ -}; - -%/* -% * Defines to determine if diskset is a Multinode diskset. -% * The sd_flags field in the md_set_desc structure is never manipulated -% * directly but is always a copy of the set record's sr_flags field, so -% * the same define (MD_SR_MN) can be used for both sd_flags and sr_flags. -% * The set record is of the structure type md_set_record if a regular diskset -% * or type md_mnset_record for a Multinode diskset. -%*/ -%#define MD_MNSET_DESC(sd) (((sd)->sd_flags & MD_SR_MN) ? 1 : 0) -%#define MD_MNSET_REC(sr) (((sr)->sr_flags & MD_SR_MN) ? 1 : 0) -%#define MD_MNDR_REC(dr) (((dr)->dr_flags & MD_DR_MN) ? 1 : 0) - -%/* -% * Define to determine if diskset is a Auto-Take diskset. -%*/ -%#define MD_ATSET_DESC(sd) (((sd)->sd_flags & MD_SR_AUTO_TAKE) ? 1 : 0) - -%/* -% * Define to set the alive flag for a node. A node is alive if it -% * is in the multi_node membership list. -% */ -%#define MD_MN_NODE_ALIVE 0x0001 - -%/* -% * Define to set the own flag for a node. A node is an owner of the diskset -% * if that node has snarf'd in the mddb. -% */ -%#define MD_MN_NODE_OWN 0x0002 - -%/* -% * Defines to set the add, delete and ok states of a node. The add state is -% * set at the beginning of the addition of a node to a diskset. The -% * delete state is set at the beginning of a deletion of a node from a diskset. -% * The OK state is set (and the add state reset) when that node is -% * functional in the diskset. -% * Rollback join flag is used on an error condition when deleting the last -% * disk from a diskset. rpc.metad should never see this flag. -% * NOSET flag is used on an error condition during a reconfig cycle when -% * the set has been removed from this node. rpc.metad should just ignore -% * this flag. -% */ -%#define MD_MN_NODE_ADD 0x0004 -%#define MD_MN_NODE_DEL 0x0008 -%#define MD_MN_NODE_OK 0x0010 -%#define MD_MN_NODE_RB_JOIN 0x0020 -%#define MD_MN_NODE_NOSET 0x0040 - -%/* -% * Define for invalid node id. Used specifically to set mn set record -% * master nodeid to invalid when no master can be determined. -% */ -%#define MD_MN_INVALID_NID 0xfffffffful /* invalid node id */ - -% -%/* -% * set description (old style) -% */ -struct o_md_set_desc { - md_timeval32_t sd_ctime; /* creation time */ - u_long sd_genid; /* generation id */ - set_t sd_setno; /* set number */ - u_int sd_flags; /* state flags */ - md_node_nm_arr_t sd_nodes; /* array of nodenames */ - int sd_isown[MD_MAXSIDES]; /* bool for is owner? */ - md_h_arr_t sd_med; /* Mediator hosts */ - o_md_drive_desc *sd_drvs; /* drive list */ -}; - -% -%/* -% * hotspare pool name -% */ -struct mdhspname_t { - string hspname<>; /* hotspare pool name */ - hsp_t hsp; /* number */ - -#ifdef RPC_HDR - struct md_hsp_t *unitp; /* hotspare pool unit structure */ -#endif /* RPC_HDR */ -}; - -struct mdhspnamelist_t { - mdhspnamelist_t *next; - mdhspname_t *hspnamep; -}; - -% -%/* -% * generic metadevice descriptions for status and init -% */ -% - -%/* -% * following used with un_status -% * bottom 16 bits are global definitions -% * top 16 bits are defined by sub device -% */ -typedef u_int md_status_t; -#ifdef RPC_HDR -% -%#define MD_UN_GROW_PENDING 0x0008 /* grow mirror pending */ -%#define MD_UN_BEING_RESET 0x0040 /* reset at snarf time */ -#endif /* RPC_HDR */ -% -%/* -% * following are used with un_parent -% * MD_NO_PARENT - Not a sub-device. -% * MD_MULTI_PARENT - A sub-device with one or more parents, like a log. -% * other - A sub-device with only one parent, like a submirror. -% * The number is the parent's unit number. -% */ -typedef unit_t md_parent_t; -#ifdef RPC_HDR -% -%#define MD_NO_PARENT 0xffffffffu -%#define MD_MULTI_PARENT 0xfffffffeu -%#define MD_HAS_PARENT(p) ((p) != MD_NO_PARENT) -#endif /* RPC_HDR */ - -typedef u_int md_stackcap_t; -#ifdef RPC_HDR -% -%#define MD_CANT_PARENT 0x00 /* cannot have a parent */ -%#define MD_CAN_PARENT 0x01 /* can have a parent */ -%#define MD_CAN_SUB_MIRROR 0x02 /* can be a sub-mirror */ -%#define MD_CAN_META_CHILD 0x04 /* can have metadev. children */ -%#define MD_CAN_SP 0x08 /* can be soft partitioned */ - -#endif /* RPC_HDR */ - -/* common to all metadevices */ -struct md_common_t { - mdname_t *namep; - md_types_t type; - md_status_t state; - md_stackcap_t capabilities; - md_parent_t parent; - diskaddr_t size; - u_long user_flags; - u_longlong_t revision; -}; - -% -%/* -% * stripe -% */ -/* - * ioctl stuff - */ -struct ms_params_t { - int change_hsp_id; - hsp_t hsp_id; -}; - -/* - * unit structure - */ -typedef u_int comp_state_t; -#ifdef RPC_HDR -% -%#define CS_OKAY 0x0001 -%#define CS_ERRED 0x0002 -%#define CS_RESYNC 0x0004 -%#define CS_LAST_ERRED 0x0008 -% -%/* interlace values (in bytes) */ -%#define MININTERLACE (16 * 512) -%#define MAXINTERLACE (100 * 1024 * 1024) -#endif /* RPC_HDR */ - -struct md_comp_t { - mdname_t *compnamep; - mdname_t *hsnamep; - comp_state_t state; - u_int lasterrcnt; - md_timeval32_t timestamp; -}; - -struct md_row_t { - diskaddr_t interlace; - diskaddr_t row_size; - md_comp_t comps<>; -}; - -struct md_stripe_t { - md_common_t common; - mdhspname_t *hspnamep; - md_row_t rows<>; -}; - -% -%/* -% * soft partition -% */ -typedef uint64_t xsp_offset_t; -typedef uint64_t xsp_length_t; -typedef u_int xsp_status_t; -% -%#define SP_INIT 0x0001 -%#define SP_OK 0x0002 -%#define SP_LASTERR 0x0004 -% -/* - * unit structure - */ - -struct md_sp_ext_t { - xsp_offset_t voff; - xsp_offset_t poff; - xsp_length_t len; -}; - -struct md_sp_t { - md_common_t common; - mdname_t *compnamep; /* name of this component */ - xsp_status_t status; /* state of this soft partition */ - md_sp_ext_t ext<>; -}; - -% -%/* -% * mirror -% */ -/* - * ioctl stuff - */ -enum mm_wr_opt_t { - WR_PARALLEL = 0, /* write submirrors in parallel */ - WR_SERIAL /* write submirrors one at a time */ -}; - -enum mm_rd_opt_t { - RD_LOAD_BAL = 0, /* read submirrors roundrobin */ - RD_GEOMETRY, /* read submirrors geometrically */ - RD_FIRST /* read first submirror */ -}; - -typedef short mm_pass_num_t; -const MD_PASS_DEFAULT = 1; -const MD_PASS_MAX = 9; - -struct mm_params_t { - int change_read_option; - mm_rd_opt_t read_option; - int change_write_option; - mm_wr_opt_t write_option; - int change_pass_num; - mm_pass_num_t pass_num; -}; - -/* - * unit structure - */ -typedef u_int sm_state_t; -#ifdef RPC_HDR -% -%#define SMS_UNUSED 0x0000 -%#define SMS_RUNNING 0x0001 -%#define SMS_COMP_ERRED 0x0002 -%#define SMS_COMP_RESYNC 0x0004 -%#define SMS_ATTACHED 0x0008 -%#define SMS_ATTACHED_RESYNC 0x0010 -%#define SMS_OFFLINE 0x0020 -%#define SMS_OFFLINE_RESYNC 0x0040 -%#define SMS_ALL_ERRED 0x0080 -%#define SMS_INUSE (0xffff) -%#define SMS_LIMPING (SMS_COMP_ERRED | SMS_COMP_RESYNC) -%#define SMS_IGNORE 0x4000 -#endif /* RPC_HDR */ - -typedef u_int sm_flags_t; -#ifdef RPC_HDR -% -%#define MD_SM_RESYNC_TARGET 0x0001 -%#define MD_SM_FAILFAST 0x0002 -#endif /* RPC_HDR */ - -struct md_submirror_t { - mdname_t *submirnamep; - sm_state_t state; - sm_flags_t flags; - md_timeval32_t timestamp; -}; - -#ifdef RPC_HDR -% -%#define MD_UN_RESYNC_ACTIVE 0x00010000 -%#define MD_UN_WAR 0x00020000 -%#define MD_UN_OFFLINE_SM 0x00040000 -%#define MD_UN_OPT_NOT_DONE 0x00080000 -%#define MD_UN_KEEP_DIRTY (MD_UN_OFFLINE_SM | MD_UN_OPT_NOT_DONE) -%#define MD_UN_RESYNC_CANCEL 0x00100000 -%#define MD_UN_REPLAYED 0x00200000 -%#define MD_UN_RENAMING 0x00400000 -%#define MD_UN_MOD_INPROGRESS (MD_UN_RESYNC_ACTIVE | \ -% MD_UN_OPT_NOT_DONE | \ -% MD_UN_RENAMING) -#endif /* RPC_HDR */ - -const NMIRROR = 4; -struct md_mirror_t { - md_common_t common; - mm_rd_opt_t read_option; - mm_wr_opt_t write_option; - mm_pass_num_t pass_num; - int percent_done; - int percent_dirty; - md_submirror_t submirrors[NMIRROR]; -}; - - -% -%/* -% * trans -% */ -%/* -% * unit structure -% */ -typedef u_int mt_flags_t; -#ifdef RPC_HDR -% -%#define TRANS_NEED_OPEN 0x0001 /* subdevs are unopened */ -%#define TRANS_OPENED 0x0002 /* open at snarf succeeded */ -%#define TRANS_DETACHING 0x0004 /* detaching the log */ -%#define TRANS_DETACHED 0x0008 /* log successfully detached */ -%#define TRANS_DETACH_SKIP 0x0010 /* already processed; skip */ -%#define TRANS_ATTACHING 0x0020 /* attaching the log */ -%#define TRANS_ROLL_ON_WRITE 0x0040 /* roll on physio write */ -%#define TRANS_NEED_SCANROLL 0x0080 /* roll on physio write */ -#endif /* RPC_HDR */ - -typedef u_int mt_l_error_t; -#ifdef RPC_HDR -% -%#define LDL_ERROR 0x0001 /* error state */ -%#define LDL_HERROR 0x0002 /* hard error state */ -%#define LDL_ANYERROR 0x0003 /* any error state */ -%#define LDL_NOERROR 0x0004 /* dont error transition during scan */ -%#define LDL_SAVERROR 0x0008 /* transition to error after scan */ -#endif /* RPC_HDR */ - -typedef u_int mt_debug_t; /* values in md_trans.h */ - -struct md_trans_t { - md_common_t common; - mdname_t *masternamep; - mdname_t *lognamep; - mt_flags_t flags; - md_timeval32_t timestamp; - mt_l_error_t log_error; - md_timeval32_t log_timestamp; - daddr_t log_size; - mt_debug_t debug; -}; - - - -% -%/* -% * RAID -% */ -/* - * ioctl stuff - */ -struct mr_params_t { - int change_hsp_id; - hsp_t hsp_id; -}; - -/* - * unit structure - */ -enum rcs_state_t { - RCS_UNUSED = 0x0, - RCS_INIT = 0x1, - RCS_OKAY = 0x2, - RCS_ERRED = 0x4, - RCS_LAST_ERRED = 0x8, - RCS_RESYNC = 0x10, - RCS_INIT_ERRED = 0x20, - RCS_REGEN = 0x40 -}; - -typedef u_int rcs_flags_t; -#ifdef RPC_HDR -% -%#define MD_RAID_DEV_ISOPEN 0x00001 -%#define MD_RAID_ALT_ISOPEN 0x00002 -%#define MD_RAID_RESYNC 0x00004 -%#define MD_RAID_RESYNC_ERRED 0x00008 -%#define MD_RAID_FORCE_REPLACE 0x00010 -%#define MD_RAID_WRITE_ALT 0x00020 -%#define MD_RAID_DEV_ERRED 0x00040 -%#define MD_RAID_COPY_RESYNC 0x00080 -%#define MD_RAID_REGEN_RESYNC 0x00100 -%#define MD_RAID_DEV_PROBEOPEN 0x00200 -%#define MD_RAID_HAS_LABEL 0x40000 -#endif /* RPC_HDR */ - -struct md_raidcol_t { - mdname_t *colnamep; - mdname_t *hsnamep; - rcs_state_t state; - rcs_flags_t flags; - md_timeval32_t timestamp; -}; - -enum rus_state_t { - RUS_UNUSED = 0x0, - RUS_INIT = 0x1, - RUS_OKAY = 0x2, - RUS_ERRED = 0x4, - RUS_LAST_ERRED = 0x8, - RUS_DOI = 0x10, - RUS_REGEN = 0x20 -}; - -typedef u_int md_riflags_t; -#ifdef RPC_HDR -% -%#define MD_RI_INPROGRESS 0x0001 -%#define MD_GROW_INPROGRESS 0x0002 -%#define MD_RI_BLOCK 0x0004 -%#define MD_RI_UNBLOCK 0x0008 -%#define MD_RI_KILL 0x0010 -%#define MD_RI_BLOCK_OWNER 0x0020 -%#define MD_RI_SHUTDOWN 0x0040 -%#define MD_RI_NO_WAIT 0x0080 -%#define MD_RI_RESYNC_FORCE_MNSTART 0x0100 -#endif /* RPC_HDR */ - -const MD_RAID_MIN = 3; -struct md_raid_t { - md_common_t common; - rus_state_t state; - md_timeval32_t timestamp; - diskaddr_t interlace; - diskaddr_t column_size; - size_t orig_ncol; - mdhspname_t *hspnamep; - md_riflags_t resync_flags; - int percent_dirty; - int percent_done; - int pw_count; - md_raidcol_t cols<>; -}; - -% -%/* -% * shared -% */ -/* - * unit structure - */ -struct md_shared_t { - md_common_t common; -}; - -% -%/* -% * hotspare -% */ -/* - * ioctl stuff - */ -enum hotspare_states_t { - HSS_UNUSED, HSS_AVAILABLE, HSS_RESERVED, HSS_BROKEN -}; - -/* - * unit structure - */ -struct md_hs_t { - mdname_t *hsnamep; - hotspare_states_t state; - diskaddr_t size; - md_timeval32_t timestamp; - u_longlong_t revision; -}; - -struct md_hsp_t { - mdhspname_t *hspnamep; - u_int refcount; - md_hs_t hotspares<>; -}; - -% -%/* -% * specific error info -% */ -% -%/* -% * simple errors -% */ -enum md_void_errno_t { - MDE_NONE = 0, - MDE_UNIT_NOT_FOUND, - MDE_DUPDRIVE, - MDE_INVAL_HSOP, - MDE_NO_SET, /* no such set */ - MDE_SET_DIFF, /* setname changed on command line */ - MDE_BAD_RD_OPT, /* bad mirror read option */ - MDE_BAD_WR_OPT, /* bad mirror write option */ - MDE_BAD_PASS_NUM, /* bad mirror pass number */ - MDE_BAD_INTERLACE, /* bad stripe interlace */ - MDE_NO_HSPS, /* couldn't find any hotspare pools */ - MDE_NOTENOUGH_DB, /* Too few replicas */ - MDE_DELDB_NOTALLOWED, /* last replica in ds cannot be del in metadb */ - MDE_DEL_VALIDDB_NOTALLOWED, /* last valid replica cannot be del */ - MDE_SYSTEM_FILE, /* /etc/system file error */ - MDE_MDDB_FILE, /* /etc/lvm/mddb.cf file error */ - MDE_MDDB_CKSUM, /* /etc/lvm/mddb.cf checksum error */ - MDE_VFSTAB_FILE, /* /etc/vfstab file error */ - MDE_NOSLICE, /* metaslicename() with sliceno to big */ - MDE_SYNTAX, /* metainit syntax error */ - MDE_OPTION, /* metainit options error */ - MDE_TAKE_OWN, /* take ownership failed */ - MDE_NOT_DRIVENAME, /* not in drivename syntax */ - MDE_RESERVED, /* device is reserved by another host */ - MDE_DVERSION, /* driver version out of sync */ - MDE_MVERSION, /* MDDB version out of sync */ - MDE_TESTERROR, /* Test Error Message */ - MDE_BAD_ORIG_NCOL, /* bad RAID original column count */ - MDE_RAID_INVALID, /* attempt to use -k on invalid device */ - MDE_MED_ERROR, /* mediator error */ - MDE_TOOMANYMED, /* Too many mediators specified */ - MDE_NOMED, /* No mediators */ - MDE_ONLYNODENAME, /* Only the nodename is needed */ - MDE_RAID_BAD_PW_CNT, /* bad prewrite count specified */ - MDE_DEVID_TOOBIG, /* Devid size is greater than allowed */ - MDE_NOPERM, /* No permission - not root */ - MDE_NODEVID, /* No device id for given devt */ - MDE_NOROOT, /* No root in /etc/mnttab */ - MDE_EOF_TRANS, /* trans logging eof'd */ - MDE_BAD_RESYNC_OPT, /* bad mirror resync option */ - MDE_NOT_MN, /* option only valid within a multi-node set */ - MDE_ABR_SET, /* invalid operation for ABR mirror */ - MDE_INVAL_MNOP, /* Invalid operation on MN diskset */ - MDE_MNSET_NOTRANS, /* Trans metadevice not supported in MN set */ - MDE_MNSET_NORAID, /* RAID metadevice not supported in MN set */ - MDE_FORCE_DEL_ALL_DRV, /* Must use -f flag to delete all drives */ - MDE_STRIPE_TRUNC_SINGLE, /* single component stripe truncation */ - MDE_STRIPE_TRUNC_MULTIPLE, /* multiple component stripe trun */ - MDE_SMF_FAIL, /* service management facility error */ - MDE_SMF_NO_SERVICE, /* service not enabled in SMF */ - MDE_AMBIGUOUS_DEV, /* Ambiguous device specified */ - MDE_NAME_IN_USE, /* Friendly name already in use. For */ - /* instance name desired for hot spare pool */ - /* is being used for a metadevice. */ - MDE_ZONE_ADMIN, /* in a zone & no admin device */ - MDE_NAME_ILLEGAL, /* illegal syntax for metadevice or hsp name */ - MDE_MISSING_DEVID_DISK /* unable to find disk using devid */ -}; - -struct md_void_error_t { - md_void_errno_t errnum; -}; - -% -%/* -% * system errors -% */ -struct md_sys_error_t { - int errnum; -}; - -% -%/* -% * RPC errors -% */ -struct md_rpc_error_t { - enum clnt_stat errnum; -}; - -% -%/* -% * device errors -% */ -enum md_dev_errno_t { - MDE_INVAL_HS = 1, - MDE_FIX_INVAL_STATE, - MDE_FIX_INVAL_HS_STATE, - MDE_NOT_META, - MDE_IS_META, - MDE_IS_SWAPPED, - MDE_NAME_SPACE, - MDE_IN_SHARED_SET, - MDE_NOT_IN_SET, - MDE_NOT_DISK, - MDE_CANT_CONFIRM, - MDE_INVALID_PART, - MDE_HAS_MDDB, - MDE_NO_DB, /* Replica not on device given */ - MDE_CANTVERIFY_VTOC, - MDE_NOT_LOCAL, - MDE_DEVICES_NAME, - MDE_REPCOMP_INVAL, /* replica slice not allowed in "set" metadevs */ - MDE_REPCOMP_ONLY, /* only replica slice diskset replicas */ - MDE_INV_ROOT, /* Invalid root device for this operation */ - MDE_MULTNM, /* Multiple entries for device in namespace */ - MDE_TOO_MANY_PARTS, /* dev has more than MD_MAX_PARTS partitions */ - MDE_REPART_REPLICA, /* replica slice would move with repartitioning */ - MDE_IS_DUMP, /* device already in use as dump device */ - MDE_DISKNAMETOOLONG /* devid's not in use and diskname too long */ -}; - -struct md_dev_error_t { - md_dev_errno_t errnum; - md_dev64_t dev; /* 64 bit fixed size */ -}; - -% -%/* -% * overlap errors -% */ -enum md_overlap_errno_t { - MDE_OVERLAP_MOUNTED = 1, - MDE_OVERLAP_SWAP, - MDE_OVERLAP_DUMP -}; - -% - -#if !defined(_KERNEL) -struct md_overlap_error_t { - md_overlap_errno_t errnum; - string where<>; - string overlap<>; -}; -#else -struct md_overlap_error_t { - md_overlap_errno_t errnum; - u_int xwhere; - u_int xoverlap; -}; -#endif /* !_KERNEL */ - -% -%/* -% * use errors -% */ -enum md_use_errno_t { - MDE_IS_MOUNTED = 1, - MDE_ALREADY, - MDE_OVERLAP, - MDE_SAME_DEVID -}; - -% -#if !defined(_KERNEL) -struct md_use_error_t { - md_use_errno_t errnum; - md_dev64_t dev; - string where<>; -}; -#else -struct md_use_error_t { - md_use_errno_t errnum; - md_dev64_t dev; - u_int xwhere; -}; -#endif - -% -%/* -% * metadevice errors -% */ -enum md_md_errno_t { - MDE_INVAL_UNIT = 1, - MDE_UNIT_NOT_SETUP, - MDE_UNIT_ALREADY_SETUP, - MDE_NOT_MM, - MDE_NOT_ENOUGH_DBS, - MDE_IS_SM, - MDE_IS_OPEN, - MDE_C_WITH_INVAL_SM, - MDE_RESYNC_ACTIVE, - MDE_LAST_SM_RE, - MDE_MIRROR_FULL, - MDE_IN_USE, - MDE_SM_TOO_SMALL, - MDE_NO_LABELED_SM, - MDE_SM_OPEN_ERR, - MDE_CANT_FIND_SM, - MDE_LAST_SM, - MDE_NO_READABLE_SM, - MDE_SM_FAILED_COMPS, - MDE_ILLEGAL_SM_STATE, - MDE_RR_ALLOC_ERROR, - MDE_MIRROR_OPEN_FAILURE, - MDE_MIRROR_THREAD_FAILURE, - MDE_GROW_DELAYED, - MDE_NOT_MT, - MDE_HS_IN_USE, - MDE_HAS_LOG, - MDE_UNKNOWN_TYPE, - MDE_NOT_STRIPE, - MDE_NOT_RAID, - MDE_NROWS, - MDE_NCOMPS, - MDE_NSUBMIRS, - MDE_BAD_STRIPE, - MDE_BAD_MIRROR, - MDE_BAD_TRANS, - MDE_BAD_RAID, - MDE_RAID_OPEN_FAILURE, - MDE_RAID_THREAD_FAILURE, - MDE_RAID_NEED_FORCE, - MDE_NO_LOG, - MDE_RAID_DOI, - MDE_RAID_LAST_ERRED, - MDE_RAID_NOT_OKAY, - MDE_RENAME_BUSY, - MDE_RENAME_SOURCE_BAD, - MDE_RENAME_TARGET_BAD, - MDE_RENAME_TARGET_UNRELATED, - MDE_RENAME_CONFIG_ERROR, - MDE_RENAME_ORDER, - MDE_RECOVER_FAILED, - MDE_NOT_SP, - MDE_SP_NOSPACE, - MDE_SP_BADWMREAD, - MDE_SP_BADWMWRITE, - MDE_SP_BADWMMAGIC, - MDE_SP_BADWMCRC, - MDE_SP_OVERLAP, - MDE_SP_BAD_LENGTH, - MDE_UNIT_TOO_LARGE, - MDE_LOG_TOO_LARGE, - MDE_SP_NOSP, - MDE_IN_UNAVAIL_STATE -}; - -struct md_md_error_t { - md_md_errno_t errnum; - minor_t mnum; -}; - -% -%/* -% * component errors -% */ -enum md_comp_errno_t { - MDE_CANT_FIND_COMP = 1, - MDE_REPL_INVAL_STATE, - MDE_COMP_TOO_SMALL, - MDE_COMP_OPEN_ERR, - MDE_RAID_COMP_ERRED, - MDE_MAXIO, - MDE_SP_COMP_OPEN_ERR -}; - -struct md_comp_error_t { - md_comp_errno_t errnum; - comp_t comp; -}; - -% -%/* -% * hotspare pool errors -% */ -enum md_hsp_errno_t { - MDE_HSP_CREATE_FAILURE = 1, - MDE_HSP_IN_USE, - MDE_INVAL_HSP, - MDE_HSP_BUSY, - MDE_HSP_REF, - MDE_HSP_ALREADY_SETUP, - MDE_BAD_HSP, - MDE_HSP_UNIT_TOO_LARGE -}; - -struct md_hsp_error_t { - md_hsp_errno_t errnum; - hsp_t hsp; -}; - -% -%/* -% * hotspare errors -% */ -enum md_hs_errno_t { - MDE_HS_RESVD = 1, - MDE_HS_CREATE_FAILURE, - MDE_HS_INUSE, - MDE_HS_UNIT_TOO_LARGE -}; - -struct md_hs_error_t { - md_hs_errno_t errnum; - hs_t hs; -}; - -% -%/* -% * MDDB errors -% */ -enum md_mddb_errno_t { - MDE_TOOMANY_REPLICAS = 1, - MDE_REPLICA_TOOSMALL, - MDE_NOTVERIFIED, - MDE_DB_INVALID, - MDE_DB_EXISTS, - MDE_DB_MASTER, - MDE_DB_TOOSMALL, - MDE_DB_NORECORD, - MDE_DB_NOSPACE, - MDE_DB_NOTNOW, - MDE_DB_NODB, - MDE_DB_NOTOWNER, - MDE_DB_STALE, - MDE_DB_TOOFEW, - MDE_DB_TAGDATA, - MDE_DB_ACCOK, - MDE_DB_NTAGDATA, - MDE_DB_ACCNOTOK, - MDE_DB_NOLOCBLK, - MDE_DB_NOLOCNMS, - MDE_DB_NODIRBLK, - MDE_DB_NOTAGREC, - MDE_DB_NOTAG, - MDE_DB_BLKRANGE -}; - -% -struct md_mddb_error_t { - md_mddb_errno_t errnum; - minor_t mnum; /* associated metadevice */ - set_t setno; - u_int size; -}; - -% -%/* -% * diskset (ds) errors -% */ -enum md_ds_errno_t { - MDE_DS_DUPHOST = 1, - MDE_DS_NOTNODENAME, - MDE_DS_SELFNOTIN, - MDE_DS_NODEHASSET, - MDE_DS_NODENOSET, - MDE_DS_NOOWNER, - MDE_DS_NOTOWNER, - MDE_DS_NODEISNOTOWNER, - MDE_DS_NODEINSET, - MDE_DS_NODENOTINSET, - MDE_DS_SETNUMBUSY, - MDE_DS_SETNUMNOTAVAIL, - MDE_DS_SETNAMEBUSY, - MDE_DS_DRIVENOTCOMMON, - MDE_DS_DRIVEINSET, - MDE_DS_DRIVENOTINSET, - MDE_DS_DRIVEINUSE, - MDE_DS_DUPDRIVE, - MDE_DS_INVALIDSETNAME, - MDE_DS_HASDRIVES, - MDE_DS_SIDENUMNOTAVAIL, - MDE_DS_SETNAMETOOLONG, - MDE_DS_NODENAMETOOLONG, - MDE_DS_OHACANTDELSELF, - MDE_DS_HOSTNOSIDE, - MDE_DS_SETLOCKED, - MDE_DS_ULKSBADKEY, - MDE_DS_LKSBADKEY, - MDE_DS_WRITEWITHSULK, - MDE_DS_SETCLEANUP, - MDE_DS_CANTDELSELF, - MDE_DS_HASMED, - MDE_DS_TOOMANYALIAS, - MDE_DS_ISMED, - MDE_DS_ISNOTMED, - MDE_DS_INVALIDMEDNAME, - MDE_DS_ALIASNOMATCH, - MDE_DS_NOMEDONHOST, - MDE_DS_CANTDELMASTER, - MDE_DS_NOTINMEMBERLIST, - MDE_DS_MNCANTDELSELF, - MDE_DS_RPCVERSMISMATCH, - MDE_DS_WITHDRAWMASTER, - MDE_DS_COMMDCTL_SUSPEND_NYD, - MDE_DS_COMMDCTL_SUSPEND_FAIL, - MDE_DS_COMMDCTL_REINIT_FAIL, - MDE_DS_COMMDCTL_RESUME_FAIL, - MDE_DS_NOTNOW_RECONFIG, - MDE_DS_NOTNOW_CMD, - MDE_DS_COMMD_SEND_FAIL, - MDE_DS_MASTER_ONLY, - MDE_DS_DRIVENOTONHOST, - MDE_DS_CANTRESNARF, - MDE_DS_INSUFQUORUM, - MDE_DS_EXTENDEDNM, - MDE_DS_PARTIALSET, - MDE_DS_SINGLEHOST, - MDE_DS_AUTONOTSET, - MDE_DS_INVALIDDEVID, - MDE_DS_SETNOTIMP, - MDE_DS_NOTSELFIDENTIFY -}; - -% -#if !defined(_KERNEL) -struct md_ds_error_t { - md_ds_errno_t errnum; - set_t setno; - string node<>; - string drive<>; -}; -#else /* _KERNEL */ -struct md_ds_error_t { - md_ds_errno_t errnum; - set_t setno; - u_int xnode; - u_int xdrive; -}; -#endif /* !_KERNEL */ - -% -%/* -% * fundamental error type -% */ -enum md_errclass_t { - MDEC_VOID = 0, /* simple error */ - MDEC_SYS, /* system errno */ - MDEC_RPC, /* RPC errno */ - MDEC_DEV, /* device error */ - MDEC_USE, /* use error */ - MDEC_MD, /* metadevice error */ - MDEC_COMP, /* component error */ - MDEC_HSP, /* hotspare pool error */ - MDEC_HS, /* hotspare error */ - MDEC_MDDB, /* metadevice database error */ - MDEC_DS, /* diskset error */ - MDEC_OVERLAP /* overlap error */ -}; - -% -%/* -% * error info -% */ -union md_error_info_t -switch (md_errclass_t errclass) { -case MDEC_VOID: - md_void_error_t void_error; -case MDEC_SYS: - md_sys_error_t sys_error; -case MDEC_RPC: - md_rpc_error_t rpc_error; -case MDEC_DEV: - md_dev_error_t dev_error; -case MDEC_USE: - md_use_error_t use_error; -case MDEC_MD: - md_md_error_t md_error; -case MDEC_COMP: - md_comp_error_t comp_error; -case MDEC_HSP: - md_hsp_error_t hsp_error; -case MDEC_HS: - md_hs_error_t hs_error; -case MDEC_MDDB: - md_mddb_error_t mddb_error; -case MDEC_DS: - md_ds_error_t ds_error; -case MDEC_OVERLAP: - md_overlap_error_t overlap_error; -}; - -% -#if !defined(_KERNEL) -struct md_error_t { - md_error_info_t info; /* specific info */ - string host<>; /* hostname */ - string extra<>; /* extra context info */ - string name<>; /* file or device name */ -}; -#else /* _KERNEL */ -struct md_error_t { - md_error_info_t info; /* specific info */ - u_int xhost; /* hostname */ - u_int xextra; /* extra context info */ - u_int xname; /* file or device name */ -}; -#endif /* !_KERNEL */ -%#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -%#pragma pack() -%#endif - -#ifdef RPC_HDR -% -%/* -% * Null error constant -% */ -%#define MDNULLERROR {{MDEC_VOID}, NULL, NULL, NULL} -#endif /* RPC_HDR */ - -#ifdef RPC_XDR -%/* -% * Constant null error struct. -% */ -%const md_error_t mdnullerror = MDNULLERROR; -#endif /* RPC_XDR */ - -#ifdef RPC_HDR -% -%/* -% * External reference to constant null error struct. (declared in mdiox_xdr.c) -% */ -%extern const md_error_t mdnullerror; -% -%/* -% * External declarations -% */ -%extern void mdclrerror(md_error_t *ep); /* clear error */ -%extern int mdstealerror(md_error_t *to, md_error_t *from); -% -%#define mdiserror(ep, num) (((ep)->info.errclass == MDEC_VOID) &&\ -% ((ep)->info.md_error_info_t_u.void_error.errnum == (num))) -%#define mdisok(ep) mdiserror(ep, MDE_NONE) -% -%#define mdissyserror(ep, num) (((ep)->info.errclass == MDEC_SYS) && \ -% ((ep)->info.md_error_info_t_u.sys_error.errnum == (num))) -%#define mdisrpcerror(ep, num) (((ep)->info.errclass == MDEC_RPC) && \ -% ((ep)->info.md_error_info_t_u.rpc_error.errnum == (num))) -%#define mdisdeverror(ep, num) (((ep)->info.errclass == MDEC_DEV) && \ -% ((ep)->info.md_error_info_t_u.dev_error.errnum == (num))) -%#define mdisuseerror(ep, num) (((ep)->info.errclass == MDEC_USE) && \ -% ((ep)->info.md_error_info_t_u.use_error.errnum == (num))) -%#define mdismderror(ep, num) (((ep)->info.errclass == MDEC_MD) && \ -% ((ep)->info.md_error_info_t_u.md_error.errnum == (num))) -%#define mdiscomperror(ep, num) (((ep)->info.errclass == MDEC_COMP) &&\ -% ((ep)->info.md_error_info_t_u.comp_error.errnum == (num))) -%#define mdishsperror(ep, num) (((ep)->info.errclass == MDEC_HSP) && \ -% ((ep)->info.md_error_info_t_u.hsp_error.errnum == (num))) -%#define mdishserror(ep, num) (((ep)->info.errclass == MDEC_HS) && \ -% ((ep)->info.md_error_info_t_u.hs_error.errnum == (num))) -%#define mdismddberror(ep, num) (((ep)->info.errclass == MDEC_MDDB) &&\ -% ((ep)->info.md_error_info_t_u.mddb_error.errnum == (num))) -%#define mdisdserror(ep, num) (((ep)->info.errclass == MDEC_DS) && \ -% ((ep)->info.md_error_info_t_u.ds_error.errnum == (num))) -%#define mdisoverlaperror(ep, num) \ -% (((ep)->info.errclass == MDEC_OVERLAP) && \ -% ((ep)->info.md_error_info_t_u.ds_error.errnum == (num))) -% -%#define mdanysyserror(ep) ((ep)->info.errclass == MDEC_SYS) -%#define mdanyrpcerror(ep) ((ep)->info.errclass == MDEC_RPC) -%#define mdanydeverror(ep) ((ep)->info.errclass == MDEC_DEV) -%#define mdanyuseerror(ep) ((ep)->info.errclass == MDEC_USE) -%#define mdanymderror(ep) ((ep)->info.errclass == MDEC_MD) -%#define mdanycomperror(ep) ((ep)->info.errclass == MDEC_COMP) -%#define mdanyhsperror(ep) ((ep)->info.errclass == MDEC_HSP) -%#define mdanyhserror(ep) ((ep)->info.errclass == MDEC_HS) -%#define mdanymddberror(ep) ((ep)->info.errclass == MDEC_MDDB) -%#define mdanydserror(ep) ((ep)->info.errclass == MDEC_DS) -%#define mdanyoverlaperror(ep) ((ep)->info.errclass == MDEC_OVERLAP) -% -#ifdef _KERNEL -% -%extern int mderror(md_error_t *ep, md_void_errno_t errnum); -%extern int mdsyserror(md_error_t *ep, int errnum); -%extern int mddeverror(md_error_t *ep, md_dev_errno_t errnum, -% md_dev64_t dev); -%extern int mdmderror(md_error_t *ep, md_md_errno_t errnum, minor_t mnum); -%extern int mdcomperror(md_error_t *ep, md_comp_errno_t errnum, -% minor_t mnum, md_dev64_t dev); -%extern int mdhsperror(md_error_t *ep, md_hsp_errno_t errnum, hsp_t hsp); -%extern int mdhserror(md_error_t *ep, md_hs_errno_t errnum, -% hsp_t hsp, md_dev64_t dev); -%extern int mdmddberror(md_error_t *ep, md_mddb_errno_t errnum, -% minor_t mnum, set_t setno); -%extern int mddbstatus2error(md_error_t *ep, int status, minor_t mnum, -% set_t setno); -% -#else /* ! _KERNEL */ -% -%extern int mderror(md_error_t *ep, md_void_errno_t errnum, char *name); -%extern int mdsyserror(md_error_t *ep, int errnum, char *name); -%extern int mdrpcerror(md_error_t *ep, CLIENT *clntp, char *host, -% char *extra); -%extern int mdrpccreateerror(md_error_t *ep, char *host, char *extra); -%extern int mddeverror(md_error_t *ep, md_dev_errno_t errnum, -% md_dev64_t dev, char *name); -%extern int mduseerror(md_error_t *ep, md_use_errno_t errnum, -% md_dev64_t dev, char *where, char *name); -%extern int mdmderror(md_error_t *ep, md_md_errno_t errnum, minor_t mnum, -% char *name); -%extern int mdcomperror(md_error_t *ep, md_comp_errno_t errnum, -% minor_t mnum, md_dev64_t dev, char *name); -%extern int mdhsperror(md_error_t *ep, md_hsp_errno_t errnum, hsp_t hsp, -% char *name); -%extern int mdhserror(md_error_t *ep, md_hs_errno_t errnum, -% hsp_t hsp, md_dev64_t dev, char *name); -%extern int mdmddberror(md_error_t *ep, md_mddb_errno_t errnum, -% minor_t mnum, set_t setno, size_t size, char *name); -%extern int mddserror(md_error_t *ep, md_ds_errno_t errnum, set_t setno, -% char *node, char *drive, char *name); -%extern int mdoverlaperror(md_error_t *ep, md_overlap_errno_t errnum, -% char *overlap, char *where, char *name); -% -%extern void mderrorextra(md_error_t *ep, char *extra); -% -#endif /* ! _KERNEL */ -#endif /* RPC_HDR */ - -/* - * common unit structure - */ -struct mdc_unit { - u_longlong_t un_revision; /* revision # (keep this a longlong) */ - md_types_t un_type; /* type of record */ - md_status_t un_status; /* status flags */ - int un_parent_res; /* parent reserve index */ - int un_child_res; /* child reserve index */ - minor_t un_self_id; /* metadevice unit number */ - mddb_recid_t un_record_id; /* db record id */ - uint_t un_size; /* db record size for unit structure */ - ushort_t un_flag; /* configuration info */ - diskaddr_t un_total_blocks; /* external # blocks in metadevice */ - diskaddr_t un_actual_tb; /* actual # blocks in metadevice */ - uint_t un_nhead; /* saved value of # heads */ - uint_t un_nsect; /* saved value of # sectors */ - ushort_t un_rpm; /* saved value of rpm's */ - ushort_t un_wr_reinstruct; /* worse case write reinstruct */ - ushort_t un_rd_reinstruct; /* worse case read reinstruct */ - mddb_recid_t un_vtoc_id; /* vtoc db record id */ - md_stackcap_t un_capabilities; /* subdevice capabilities */ - md_parent_t un_parent; /* -1 none, -2 many, positive unit # */ - uint_t un_user_flags; /* provided for userland */ -}; -typedef struct mdc_unit mdc_unit_t; - -/* - * For old 32 bit format use only - */ -%#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -%#pragma pack(4) -%#endif -struct mdc_unit32_od { - u_longlong_t un_revision; - md_types_t un_type; - md_status_t un_status; - int un_parent_res; - int un_child_res; - minor_t un_self_id; - mddb_recid_t un_record_id; - uint_t un_size; - ushort_t un_flag; - daddr32_t un_total_blocks; /* external # blocks in metadevice */ - daddr32_t un_actual_tb; /* actual # blocks in metadevice */ - ushort_t un_nhead; - ushort_t un_nsect; - ushort_t un_rpm; - ushort_t un_wr_reinstruct; - ushort_t un_rd_reinstruct; - mddb_recid_t un_vtoc_id; - md_stackcap_t un_capabilities; - md_parent_t un_parent; - uint_t un_user_flags; -}; -typedef struct mdc_unit32_od mdc_unit32_od_t; - -struct md_unit { - mdc_unit_t c; /* common stuff */ -}; -typedef struct md_unit md_unit_t; - -enum sp_status_t { - MD_SP_CREATEPEND, /* soft partition creation in progress */ - MD_SP_GROWPEND, /* attach operation in progress */ - MD_SP_DELPEND, /* delete operation in progress */ - MD_SP_OK, /* soft partition is stable */ - MD_SP_ERR, /* soft partition is errored */ - MD_SP_RECOVER, /* recovery operation in progess */ - MD_SP_LAST /* always the last entry */ -}; - -/* soft partition offsets and lengths are specified in sectors */ -typedef u_longlong_t sp_ext_offset_t; -typedef u_longlong_t sp_ext_length_t; -struct mp_ext { - sp_ext_offset_t un_voff; /* virtual offset */ - sp_ext_offset_t un_poff; /* physical offset */ - sp_ext_length_t un_len; /* length of extent */ -}; -typedef struct mp_ext mp_ext_t; - -/* - * mp_unit32_od is for old 32 bit format only - */ -struct mp_unit32_od { - mdc_unit32_od_t c; /* common unit structure */ - mdkey_t un_key; /* namespace key */ - dev32_t un_dev; /* device number */ - sp_ext_offset_t un_start_blk; /* start block, incl reserved space */ - sp_status_t un_status; /* sp status */ - uint_t un_numexts; /* number of extents */ - sp_ext_length_t un_length; /* total length (in sectors) */ - /* extent array. NOTE: sized dynamically! */ - mp_ext_t un_ext[1]; -}; -typedef struct mp_unit32_od mp_unit32_od_t; - -/* - * softpart unit structure - */ -struct mp_unit { - mdc_unit_t c; /* common unit structure */ - mdkey_t un_key; /* namespace key */ - md_dev64_t un_dev; /* device number, 64 bit */ - sp_ext_offset_t un_start_blk; /* start block, incl reserved space */ - sp_status_t un_status; /* sp status */ - uint_t un_numexts; /* number of extents */ - sp_ext_length_t un_length; /* total length (in sectors) */ - /* extent array. NOTE: sized dynamically! */ - mp_ext_t un_ext[1]; -}; -typedef struct mp_unit mp_unit_t; - -/* - * ioctl structures used when passing ioctls via rpc.mdcommd - */ -struct md_driver { - char md_drivername[MD_MAXDRVNM]; - set_t md_setno; -}; -typedef struct md_driver md_driver_t; - -%#define MD_DRIVER md_driver_t md_driver; -#define MD_DRIVER md_driver_t md_driver; - -struct md_set_params { - MD_DRIVER - md_error_t mde; - minor_t mnum; - md_types_t type; - uint_t size; - int options; - uint64_t mdp; /* Contains pointer */ -}; -typedef struct md_set_params md_set_params_t; -%#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -%#pragma pack() -%#endif - - diff --git a/usr/src/uts/common/sys/lvm/mdmn_commd.x b/usr/src/uts/common/sys/lvm/mdmn_commd.x deleted file mode 100644 index 3ec7b1fff4dd..000000000000 --- a/usr/src/uts/common/sys/lvm/mdmn_commd.x +++ /dev/null @@ -1,623 +0,0 @@ -%/* -% * CDDL HEADER START -% * -% * The contents of this file are subject to the terms of the -% * Common Development and Distribution License (the "License"). -% * You may not use this file except in compliance with the License. -% * -% * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -% * or http://www.opensolaris.org/os/licensing. -% * See the License for the specific language governing permissions -% * and limitations under the License. -% * -% * When distributing Covered Code, include this CDDL HEADER in each -% * file and include the License file at usr/src/OPENSOLARIS.LICENSE. -% * If applicable, add the following below this CDDL HEADER, with the -% * fields enclosed by brackets "[]" replaced with your own identifying -% * information: Portions Copyright [yyyy] [name of copyright owner] -% * -% * CDDL HEADER END -% */ -% -%/* -% * Copyright 2008 Sun Microsystems, Inc. All rights reserved. -% * Use is subject to license terms. -% */ -% - -%#include -%#include -%#include -%#include -%#ifndef _KERNEL -%#include -%#include -%extern bool_t xdr_md_set_params_t(XDR *xdrs, md_set_params_t *objp); -%extern bool_t xdr_mp_unit_t(XDR *xdrs, mp_unit_t *objp); -%extern bool_t xdr_diskaddr_t(XDR *xdrs, diskaddr_t *objp); -%extern bool_t xdr_md_dev64_t(XDR *xdrs, md_dev64_t *objp); -%extern bool_t xdr_daddr_t(XDR *xdrs, daddr_t *objp); -%extern bool_t xdr_daddr32_t(XDR *xdrs, daddr32_t *objp); -%#else -%#include -%#endif /* ! _KERNEL */ - -/* every message handler must have these parameters */ -%#define HANDLER_PARMS md_mn_msg_t *msg, uint_t flag, md_mn_result_t *res - - -/* every submessage generator must have these parameters */ -%#define SMGEN_PARMS md_mn_msg_t *msg, md_mn_msg_t **msglist - -/* when ever a new message type is added, an entry for it must be made in the master msg_table (defined in mdmn_commd_server.c*/ - -enum md_mn_msgtype_t { - MD_MN_MSG_NULL = 0, /* special message type for internal use only */ - MD_MN_MSG_TEST1, - MD_MN_MSG_TEST2, - MD_MN_MSG_TEST3, - MD_MN_MSG_TEST4, - MD_MN_MSG_TEST5, - MD_MN_MSG_TEST6, - MD_MN_MSG_BC_CMD, /* Send metacommand */ - MD_MN_MSG_BC_CMD_RETRY, /* Send metacommand, retry on busy */ - MD_MN_MSG_CLU_CHECK, - MD_MN_MSG_CLU_LOCK, - MD_MN_MSG_CLU_UNLOCK, - MD_MN_MSG_REQUIRE_OWNER, /* Request to become Mirror owner */ - MD_MN_MSG_CHOOSE_OWNER, /* Request to allocate a resync owner */ - MD_MN_MSG_CHANGE_OWNER, /* Change owner to a specific node */ - MD_MN_MSG_SUSPEND_WRITES, /* Suspend writes to a mirror */ - MD_MN_MSG_STATE_UPDATE_RESWR, /* Ch state of comp & resume writes */ - MD_MN_MSG_STATE_UPDATE, /* Susp writes/Change state of comp */ - MD_MN_MSG_ALLOCATE_HOTSPARE, /* Allocate hotspare for mirror comp */ - MD_MN_MSG_RESYNC_STARTING, /* Resync thread starting */ - MD_MN_MSG_RESYNC_NEXT, /* Next resync region */ - MD_MN_MSG_RESYNC_FINISH, /* Resync thread finished */ - MD_MN_MSG_RESYNC_PHASE_DONE, /* End of resync phase */ - MD_MN_MSG_SET_CAP, /* Set capability, eg ABR */ - MD_MN_MSG_VERBOSITY, /* set various levels of debug */ - MD_MN_MSG_MDDB_PARSE, /* Slave to reparse portion of mddb */ - MD_MN_MSG_MDDB_BLOCK, /* Block parse/recs on master/slave */ - MD_MN_MSG_META_DB_ATTACH, /* Master message to add new mddb */ - MD_MN_MSG_SM_MDDB_ATTACH, /* Submessage to add new mddb */ - MD_MN_MSG_META_DB_DETACH, /* Master message to delete mddb */ - MD_MN_MSG_SM_MDDB_DETACH, /* Submessage to delete mddb */ - MD_MN_MSG_META_DB_NEWSIDE, /* Node adding mddb side info */ - MD_MN_MSG_META_DB_DELSIDE, /* Node deleting mddb side info */ - MD_MN_MSG_META_MD_ADDSIDE, /* Node adding md side info */ - MD_MN_MSG_META_MD_DELSIDE, /* Node deleting md side info */ - MD_MN_MSG_MDDB_OPTRECERR, /* Node detects opt rec error */ - MD_MN_MSG_ABORT, /* Stop sending messages to any node */ - MD_MN_MSG_STATE_UPDATE_RESWR2, /* UPDATE_RESWR for watermark updates */ - MD_MN_MSG_STATE_UPDATE2, /* STATE_UPDATE for watermark updates */ - MD_MN_MSG_ALLOCATE_HOTSPARE2, /* ALLOCATE_HOTSPARE for wm updates */ - MD_MN_MSG_IOCSET, /* Send IOCSET ioctl */ - MD_MN_MSG_SP_SETSTAT, /* Update status of softpart */ - MD_MN_MSG_ADDKEYNAME, /* Add key */ - MD_MN_MSG_DELKEYNAME, /* Delete key */ - MD_MN_MSG_GET_TSTATE, /* Get tstate from master */ - MD_MN_MSG_GET_MIRROR_STATE, /* Get submirror state from master */ - MD_MN_MSG_SP_SETSTAT2, /* Update softpart status on error */ - MD_MN_MSG_SETSYNC, /* Set resync status */ - MD_MN_MSG_POKE_HOTSPARES, /* Call poke_hotspares */ - MD_MN_MSG_ADDMDNAME, /* Add metadevice name */ - MD_MN_MSG_RR_DIRTY, /* Mark RR range as dirty */ - MD_MN_MSG_RR_CLEAN, /* Mark RR range as clean */ - MD_MN_NMESSAGES /* insert elements before */ -}; - -/* - * A message of class X may trigger only messages of classes higher than X - * Feel free to change the order here. As long as you leave MD_MSG_CL_NULL - * and NCLASSES, of course - */ - -enum md_mn_msgclass_t { - MD_MSG_CLASS0 = 0, /* special message class for internal use only */ - MD_MSG_CLASS1, - MD_MSG_CLASS2, - MD_MSG_CLASS3, - MD_MSG_CLASS4, - MD_MSG_CLASS5, - MD_MSG_CLASS6, - MD_MSG_CLASS7, - MD_MN_NCLASSES /* insert elements before */ -}; - -%/* -% * The following are needed for things like suspend and resume when the -% * operation is to be applied to all classes / all sets. -% */ -%#define MD_COMM_ALL_CLASSES MD_MSG_CLASS0 -%#define MD_COMM_ALL_SETS 0 - -/* This is for state changes of submirror components */ -struct md_mn_msg_stch_t { - minor_t msg_stch_mnum; /* minor number of dev */ - int msg_stch_sm; /* submirror involved */ - int msg_stch_comp; /* component */ - int msg_stch_new_state; /* new state for comp */ - mddb_recid_t msg_stch_hs_id; /* hs_id at time of call */ -}; - - -/* This is for suspending writes to a mirror */ -struct md_mn_msg_suspwr_t { - minor_t msg_suspwr_mnum; /* minor number of dev */ -}; - -/* Message format for choosing a resync owner */ -struct md_mn_msg_chooseid_t { - minor_t msg_chooseid_mnum; /* minor num of dev */ - int msg_chooseid_rcnt; /* resync count for set */ - int msg_chooseid_set_node; /* 1 => use rcnt as nodeid for owner */ -}; - -/* Message format for changing a resync owner */ -struct md_mn_msg_chowner_t { - minor_t msg_chowner_mnum; /* minor num of dev */ - int msg_chowner_nodeid; /* node id of new owner */ -}; - -/* Message format for setting metadevice capability */ -struct md_mn_msg_setcap_t { - char msg_setcap_driver[MD_DRIVERNAMELEN]; /* Driver name */ - minor_t msg_setcap_mnum; /* minor num of dev */ - u_int msg_setcap_set; /* new settings */ -}; - -/* This is for setting the verbosity level (MD_MN_MSG_VERBOSITY) */ -struct md_mn_verbose_t { - set_t mmv_setno; - md_mn_msgclass_t mmv_class; - u_int mmv_what; -}; - -/* What do we want to debug ? (mmv_what) */ -%/* turn off everything */ -%#define MD_MMV_NULL 0x00000000 -%/* initialization of nodes / rpc clients */ -%#define MD_MMV_INIT 0x00000001 -%/* mdmn_send_svc_1 related / early stage */ -%#define MD_MMV_SEND 0x00000002 -%/* mdmn_work_svc_1 stuff on master */ -%#define MD_MMV_WORK 0x00000004 -%/* mdmn_master_process_msg stuff */ -%#define MD_MMV_PROC_M 0x00000008 -%/* mdmn_slave_process_msg stuff */ -%#define MD_MMV_PROC_S 0x00000010 -%/* wakeup_master */ -%#define MD_MMV_WAKE_M 0x00000020 -%/* wakeup_initiator */ -%#define MD_MMV_WAKE_I 0x00000040 -%/* Misc stuff*/ -%#define MD_MMV_MISC 0x00000080 -%/* turn on everything */ -%#define MD_MMV_ALL 0x0000ffff -%/* write to syslog instead of output file, for critical messages */ -%#define MD_MMV_SYSLOG 0x10000000 -%/* enable timestamps */ -%#define MD_MMV_TIMESTAMP 0x20000000 - - -/* Message format for allocating hotspares */ -struct md_mn_msg_allochsp_t { - minor_t msg_allochsp_mnum; /* minor num of dev */ - int msg_allochsp_sm; /* submirror index */ - int msg_allochsp_comp; /* component index */ - mddb_recid_t msg_allochsp_hs_id; /* hotspare id */ -}; - -/* Message format for resync messages */ -struct md_mn_msg_resync_t { - minor_t msg_resync_mnum; /* minor num of dev */ - int msg_resync_type; /* resync type */ - diskaddr_t msg_resync_start; /* start of resync region */ - diskaddr_t msg_resync_rsize; /* size of resync region */ - diskaddr_t msg_resync_done; /* count of resync done */ - diskaddr_t msg_resync_2_do; /* total size of resync */ - int msg_originator; /* message originator */ - int msg_resync_flags; /* resync flags */ - sm_state_t msg_sm_state[NMIRROR]; /* submirror state */ - sm_flags_t msg_sm_flags[NMIRROR]; /* submirror flags */ -}; - -%#define MD_MSGF_DEFAULT_FLAGS 0x00000000 - -/* Message format for blocking/unblocking MDDB parsing and record changes */ -struct md_mn_msg_mddb_block_t { - int msg_block_flags; -}; - -/* Message format for MDDB re-parsing */ -struct md_mn_msg_mddb_parse_t { - int msg_parse_flags; /* flags describe part to reparse */ - int msg_lb_flags[50]; -}; - -/* Message format for MDDB attach */ -struct md_mn_msg_meta_db_attach_t { - md_dev64_t msg_l_dev; - int msg_cnt; - int msg_dbsize; - char msg_dname[16]; - md_splitname msg_splitname; - u_int msg_options; - char msg_devid[1]; /* unused for now, for future */ - /* must be last element */ -}; - -/* Message format for MDDB detach */ -struct md_mn_msg_meta_db_detach_t { - md_splitname msg_splitname; - char msg_devid[1]; /* unused for now, for future */ - /* must be last element */ -}; - -/* Message format for MDDB newside */ -struct md_mn_msg_meta_db_newside_t { - md_dev64_t msg_l_dev; - daddr_t msg_blkno; - side_t msg_sideno; - minor_t msg_mnum; - char msg_dname[16]; - md_splitname msg_splitname; - char msg_devid[1]; /* unused for now, for future */ - /* must be last element */ -}; - -/* Message format for MDDB delside */ -struct md_mn_msg_meta_db_delside_t { - md_dev64_t msg_l_dev; - daddr_t msg_blkno; - side_t msg_sideno; - char msg_devid[1]; /* unused for now, for future */ - /* must be last element */ -}; - -/* Message format for MD addside */ -struct md_mn_msg_meta_md_addside_t { - side_t msg_sideno; - side_t msg_otherside; -}; - -/* Message format for MDDB delside */ -struct md_mn_msg_meta_md_delside_t { - side_t msg_sideno; -}; - -/* Message format for optimized record error */ -struct md_mn_msg_mddb_optrecerr_t { - md_replica_recerr_t msg_recerr[2]; -}; - -/* - * Message format for IOCSET message - */ - -struct md_mn_msg_iocset_t { - md_set_params_t iocset_params; - mp_unit_t unit; -}; - -/* Message format for SP_SETSTAT message */ - -struct md_mn_msg_sp_setstat_t { - minor_t sp_setstat_mnum; - int sp_setstat_status; -}; - -/* Message format for ADDKEYNAME message */ - -struct md_mn_msg_addkeyname_t { - set_t addkeyname_setno; - char addkeyname_name[1]; /* must be last element */ -}; - -/* - * Add metadevice name into replica - */ -struct md_mn_msg_addmdname_t { - set_t addmdname_setno; - char addmdname_name[1]; -}; - -/* Message format for DELKEYNAME message */ - -struct md_mn_msg_delkeyname_t { - md_dev64_t delkeyname_dev; - set_t delkeyname_setno; - mdkey_t delkeyname_key; -}; - -/* Message format for GET_TSTATE message */ - -struct md_mn_msg_gettstate_t { - md_dev64_t gettstate_dev; -}; - -/* Message format for GET_MIRROR_STATE message */ - -struct md_mn_msg_mir_state_t { - minor_t mir_state_mnum; -}; - -/* Results format for GET_SM_STATE message */ -struct md_mn_msg_mir_state_res_t { - sm_state_t sm_state[NMIRROR]; - sm_flags_t sm_flags[NMIRROR]; - u_int mir_tstate; -}; - -/* Message format for MD_MN_MSG_SETSYNC message */ -struct md_mn_msg_setsync_t { - minor_t setsync_mnum; - md_riflags_t setsync_flags; - diskaddr_t setsync_copysize; -}; - -/* Message format for MD_MN_MSG_POKE_HOTSPARES message */ -struct md_mn_msg_pokehsp_t { - minor_t pokehsp_setno; -}; - -/* Message format for MD_MN_MSG_RR_DIRTY message */ -struct md_mn_msg_rr_dirty_t { - minor_t rr_mnum; - int rr_nodeid; - u_int rr_range; /* Start(16bits) | End(16bits) */ -}; - -/* Message format for MD_MN_MSG_RR_CLEAN message */ -%#define MDMN_MSG_RR_CLEAN_DATA_MAX_BYTES \ -% ((MDMN_MAX_KMSG_DATA) - \ -% sizeof (struct md_mn_msg_rr_clean_t)) -%#define MDMN_MSG_RR_CLEAN_SIZE_DATA(x) \ -% (sizeof (struct md_mn_msg_rr_clean_t) + (x)) -%#define MDMN_MSG_RR_CLEAN_MSG_SIZE(x) \ -% (sizeof (struct md_mn_msg_rr_clean_t) \ -% + MDMN_MSG_RR_CLEAN_DATA_BYTES(x)) -%#define MDMN_MSG_RR_CLEAN_DATA(x) \ -% ((unsigned char *)(x) + \ -% sizeof (struct md_mn_msg_rr_clean_t)) - -/* since we cannot use ushorts, some macros to extract the parts from an int */ -%#define MDMN_MSG_RR_CLEAN_START_BIT(x) ((x)->rr_start_size >> 16) -%#define MDMN_MSG_RR_CLEAN_DATA_BYTES(x) ((x)->rr_start_size & 0xffff) -%#define MDMN_MSG_RR_CLEAN_START_SIZE_SET(x, start, size) \ -% ((x)->rr_start_size = (start << 16) | size) - -struct md_mn_msg_rr_clean_t { - md_mn_nodeid_t rr_nodeid; - unsigned int rr_mnum; - unsigned int rr_start_size; /* start_bit (16b) | data_bytes (16b) */ - /* actual data goes here */ -}; - -%#define MD_MSGF_NO_LOG 0x00000001 -%#define MD_MSGF_NO_BCAST 0x00000002 -%#define MD_MSGF_STOP_ON_ERROR 0x00000004 -%#define MD_MSGF_REPLAY_MSG 0x00000008 -%#define MD_MSGF_OVERRIDE_SUSPEND 0x00000010 -%#define MD_MSGF_ON_MASTER 0x00000020 -%#define MD_MSGF_ON_SLAVE 0x00000040 -%#define MD_MSGF_ON_INITIATOR 0x00000080 -%#define MD_MSGF_LOCAL_ONLY 0x00000100 -%#define MD_MSGF_FAIL_ON_SUSPEND 0x00000200 -%#define MD_MSGF_NO_MCT 0x00000400 -%#define MD_MSGF_PANIC_WHEN_INCONSISTENT 0x00000800 -%#define MD_MSGF_BLK_SIGNAL 0x00001000 -%#define MD_MSGF_KSEND_NORETRY 0x00002000 -%#define MD_MSGF_DIRECTED 0x00004000 -%#define MD_MSGF_VERBOSE 0x10000000 -%#define MD_MSGF_VERBOSE_2 0x20000000 - -%#define MD_MSGF_INHERIT_BITS \ -% MD_MSGF_REPLAY_MSG | MD_MSGF_OVERRIDE_SUSPEND - - - -%/* maximum number of nodes in cluster (not in diskset) */ -%#define NNODES MD_MNMAXSIDES - - -/* if you add elements here, make sure, to add them to MSGID_COPY(), too */ -struct md_mn_msgid_t { - uint64_t mid_time; /* unique timestamp */ - md_mn_nodeid_t mid_nid; /* node that created the message */ - md_mn_msgclass_t mid_oclass; /* for submessages original class */ - uint8_t mid_smid; /* sub message number */ - uint8_t mid_spare[15]; /* Always good to have some spares */ -}; - -%#define MD_NULL_MSGID (md_mn_msgid_t *)NULL -% -%/* macros to handle msgid's */ -%#define MSGID_COPY(from, to) { \ -% (to)->mid_nid = (from)->mid_nid; \ -% (to)->mid_smid = (from)->mid_smid; \ -% (to)->mid_oclass = (from)->mid_oclass; \ -% (to)->mid_time = (from)->mid_time; \ -% } -% -%#define MSGID_CMP(a, b) \ -% (((a)->mid_nid == (b)->mid_nid) && \ -% ((a)->mid_smid == (b)->mid_smid) && \ -% ((a)->mid_time == (b)->mid_time)) -% -%#define MSGID_ELEMS(mid) (mid).mid_nid, (mid).mid_time, (mid).mid_smid - -/* if you add elements here, make sure, to add them to copy_msg(), too */ -struct md_mn_msg_t { - md_mn_msgid_t msg_msgid; /* Message id */ - md_mn_nodeid_t msg_sender; /* who wants the results? */ - u_int msg_flags; /* See MD_MSGF_* above */ - set_t msg_setno; /* which set is involved */ - md_mn_msgtype_t msg_type; /* what type of message */ - md_mn_nodeid_t msg_recipient; /* who to send DIRECTED message to */ - char msg_spare[28]; /* Always good to hav'em */ - opaque msg_event<>; /* the actual event wrapped up */ -}; -%#define msg_event_data msg_event.msg_event_val -%#define msg_event_size msg_event.msg_event_len -% -%#define MD_MN_MSG_LEN(msg) ((msg)->msg_event_size +\ -% sizeof (md_mn_msg_t)) -%#define MD_MN_MSG_MAXDATALEN 1024 - -/* ondisk version of the message */ -struct md_mn_msg_od_t { - md_mn_msgid_t msg_msgid; /* Message id */ - md_mn_nodeid_t msg_sender; /* who wants the results? */ - uint32_t msg_flags; /* See MD_MSGF_* above */ - set_t msg_setno; /* which set is involved */ - md_mn_msgtype_t msg_type; /* what type of message */ - md_mn_nodeid_t msg_recipient; /* who to send DIRECTED message to */ - char msg_spare[28]; /* Always good to hav'em */ - uint32_t msg_ev_len; - char msg_ev_val[MD_MN_MSG_MAXDATALEN]; -}; -% -%#define msg_od_event_data msg_ev_val -%#define msg_od_event_size msg_ev_len -%#define MDMN_MAX_KMSG_DATA 256 - -/* needed for mdmn_ksend_message to deliver the data into userland thru doors */ -struct md_mn_kmsg_t { - md_mn_msgid_t kmsg_msgid; - u_int kmsg_flags; - set_t kmsg_setno; - md_mn_msgtype_t kmsg_type; - md_mn_nodeid_t kmsg_recipient; /* who to send DIRECTED message to */ - int kmsg_size; - char kmsg_data[MDMN_MAX_KMSG_DATA]; -}; - -/* if you add elements here, make sure, to add them to copy_result(), too */ -struct md_mn_result_t { - md_mn_msgid_t mmr_msgid; - md_mn_msgtype_t mmr_msgtype; - set_t mmr_setno; - u_int mmr_flags; - md_mn_nodeid_t mmr_sender; /* needed to check for unsolicited msgs */ - md_mn_nodeid_t mmr_failing_node; /* trouble maker */ - int mmr_comm_state; - int mmr_exitval; - md_error_t mmr_ep; - opaque mmr_output<>; /* msg handler can store output here */ - opaque mmr_error<>; /* ... and error output goes here */ -}; - -%#define MDMN_MAX_KRES_DATA 256 -/* kernel results don't provide something like stderr */ -struct md_mn_kresult_t { - md_mn_msgtype_t kmmr_msgtype; - u_int kmmr_flags; - int kmmr_comm_state; - md_mn_nodeid_t kmmr_failing_node; /* trouble maker */ - int kmmr_exitval; - int kmmr_res_size; - char kmmr_res_data[MDMN_MAX_KRES_DATA]; -}; - -/* possible return values for the rpc services */ -enum md_mn_retval_t { - MDMNE_NULL = 0, - MDMNE_ACK, /* this is the good one */ - MDMNE_CLASS_BUSY, /* try again */ - MDMNE_RPC_FAIL, /* some RPC error occurred */ - MDMNE_THR_CREATE_FAIL, /* cannot create working thread */ - MDMNE_NO_HANDLER, /* this message has no handler */ - MDMNE_LOG_FAIL, /* logging failed for some reason */ - MDMNE_CANNOT_CONNECT, /* rpc connection not possible */ - MDMNE_NO_WAKEUP_ENTRY, /* no entry in wakeup table for msgid */ - MDMNE_NOT_JOINED, /* this host hasn't joined yet */ - MDMNE_HANDLER_FAILED, /* could not run the handler for this message */ - MDMNE_EINVAL, /* bad argument specified for special message */ - MDMNE_SUSPENDED, /* commd doesn't accept new messgaes */ - MDMNE_CLASS_LOCKED, /* class has been locked (for testing only) */ - MDMNE_TIMEOUT, /* processing message took too long */ - MDMNE_SET_NOT_DRAINED, /* still outstandang messages for this set */ - MDMNE_ABORT, /* Contacted node is in abort state */ - MDMNE_IGNORE_NODE /* ignore current node, send msg to next one */ -}; - -% -%#define MDMN_KSEND_MSG_OK(rv, kres) \ -% (((rv) == 0) && (((kres)->kmmr_exitval == 0) && \ -% (((kres)->kmmr_comm_state == MDMNE_ACK) || \ -% (!md_mn_is_commd_present() && \ -% ((kres)->kmmr_comm_state == MDMNE_RPC_FAIL))))) -% - -% -%#define mmr_out mmr_output.mmr_output_val -%#define mmr_out_size mmr_output.mmr_output_len -%#define mmr_err mmr_error.mmr_error_val -%#define mmr_err_size mmr_error.mmr_error_len -% -% -%extern void mdmn_master_process_msg(md_mn_msg_t *); -%extern void mdmn_slave_process_msg(md_mn_msg_t *); - - -struct md_mn_set_and_class_t { - set_t msc_set; - md_mn_msgclass_t msc_class; - u_int msc_flags; -}; - -%/* possible values for msc_flags above */ -%#define MD_MSCF_NO_FLAGS 0x0000 -%#define MD_MSCF_DONT_RESUME_CLASS1 0x0001 - -struct md_mn_type_and_lock_t { - md_mn_msgtype_t mmtl_type; - u_int mmtl_lock; -}; - -%/* possible values for mmtl_flags above */ -%#define MMTL_UNLOCK 0x0000 -%#define MMTL_LOCK 0x0001 - -%/* Currently not used, but thinkable extensions */ -%#define MMTL_LOCK_ON_INITIATOR 0x0002 -%#define MMTL_LOCK_ON_MASTER 0x0004 -%#define MMTL_LOCK_ON_SLAVE 0x0008 -%#define MMTL_LOCK_ONE_TIME_ONLY 0x0010 - - -program MDMN_COMMD { - version TWO { - md_mn_result_t - mdmn_send(md_mn_msg_t) = 1; - - int - mdmn_work(md_mn_msg_t msg) = 2; - - int - mdmn_wakeup_initiator(md_mn_result_t) = 3; - - int - mdmn_wakeup_master(md_mn_result_t) = 4; - - int - mdmn_comm_lock(md_mn_set_and_class_t) = 5; - - int - mdmn_comm_unlock(md_mn_set_and_class_t) = 6; - - int - mdmn_comm_suspend(md_mn_set_and_class_t) = 7; - - int - mdmn_comm_resume(md_mn_set_and_class_t) = 8; - - int - mdmn_comm_reinit_set(set_t) = 9; - - int - mdmn_comm_msglock(md_mn_type_and_lock_t) = 10; - } = 2; -} = 100422; diff --git a/usr/src/uts/common/sys/lvm/mdvar.h b/usr/src/uts/common/sys/lvm/mdvar.h deleted file mode 100644 index 3eff20c242f7..000000000000 --- a/usr/src/uts/common/sys/lvm/mdvar.h +++ /dev/null @@ -1,862 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. - */ - -#ifndef _SYS_MDVAR_H -#define _SYS_MDVAR_H - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#ifdef DEBUG -#include -#endif -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * defaults - */ -#define NMD_DEFAULT 128 /* number of metadevices */ -#define MD_NOPS 25 /* number of misc modules */ -#define MAXBOOTLIST 64 - -/* - * Needed for backwards-compatibility with metadevices created under - * 2.6 or earlier. Back then, a krwlock_t was twelve bytes. More - * recently, it's four bytes. Since these get included in structures - * written out to disk, we have to make sure we're using the largest - * size. Things will get interesting if krwlock_t ever gets bigger - * than twelve bytes. - */ - -typedef union _md_krwlock { - krwlock_t lock; - struct { - void *_opaque[3]; - } xx; -} md_krwlock_t; - -typedef struct { - kmutex_t md_io_mx; /* counter mutex */ - kcondvar_t md_io_cv; /* ioctl wait on if draining */ - long io_cnt; /* number of I/Os */ - long io_state; /* !0 if waiting on zero */ -} md_set_io_t; - -typedef enum set_iostate { - MD_SET_ACTIVE = 1, - MD_SET_RELEASE = 2 -}set_iostate_t; - -/* - * for md_dev64_t translation - */ -struct md_xlate_table { - dev32_t mini_devt; - dev32_t targ_devt; -}; - -extern struct md_xlate_table *md_tuple_table; - -/* - * for major number translation - */ - -struct md_xlate_major_table { - char *drv_name; - major_t targ_maj; -}; - -extern struct md_xlate_major_table *md_major_tuple_table; - -extern int md_tuple_length; -extern uint_t md_majortab_len; -extern int md_in_upgrade; - -extern md_mn_nodeid_t md_mn_mynode_id; - -#define MD_UPGRADE (md_in_upgrade == 1) - -/* - * Flags used during upgrade: - * - * md_keep_repl_state flag means that mddb should be kept in the format - * that was found on disk (non-device id format vs. device id format). - * This is used during the upgrade process when install is probing - * for root disks so that the user can choose the one to be upgraded. - * - * md_devid_destroy flag is used to destroy device ids stored in the - * metadevice state database (mddb). - * - * The md_devid_destroy flag is to be used only in a catastrophic failure - * case. An example of this would be if a user upgrades firmware on all - * disks where this causes the disks to now have different device id's. - * The user would not be able to boot a mirror'd root filesystem since the - * system would recognize none of the device id's stored in the mddb. - * This flag would destroy all device id information stored in the mddb and - * if the md_keep_repl_state flag was not set, the mddb would be reconverted - * to device id format on SLVM startup and all of the device id - * information would be regenerated. - * - * If the md_devid_destroy flag is set and the md_keep_repl_state flag is - * set, the mddb's would have their device id information destroyed and - * would be left in non-devid format since the device id information would - * not be regenerated. - * - * This flag is not documented anywhere and is only to be used as a last - * resort as in the described case or if a device driver has a bug where - * device id's are found to not be unique. If device id's aren't unique, - * the user could run without device id's until a patch is released for - * that driver. - */ -extern int md_keep_repl_state; -extern int md_devid_destroy; -extern int mdmn_door_did; -#ifdef _KERNEL -extern door_handle_t mdmn_door_handle; -#endif /* _KERNEL */ - -/* - * An io_lock mechanism for raid, the MD_UL_XXXX bits are used for - * convenience. - */ -typedef struct md_io_lock { - ulong_t io_readercnt; /* number of unit readers */ - ulong_t io_wanabecnt; /* # pending on becoming unit writer */ - ulong_t io_lock; - void *io_list_front; - void *io_list_back; - kmutex_t io_mx; - kcondvar_t io_cv; - kmutex_t io_list_mutex; /* list of waiting io */ - kthread_id_t io_owner; /* writer thread */ -} md_io_lock_t; - -/* - * The following flags are in un_flag field of mdc_unit struct. - */ -#define MD_LABELED 0x1 /* First sector of the metadevice is a label */ -#define MD_EFILABEL 0x2 /* This md has an EFI label and no vtoc */ - -/* - * This is the number of bytes a DKIOCGETEFI ioctl returns - * For now it's one time the header and once the size for a partition info - */ -#define MD_EFI_LABEL_SIZE (sizeof (efi_gpt_t) + sizeof (efi_gpe_t)) - -/* This is the number of bytes consumed by efi_gpe_PartitionName */ -#define MD_EFI_PARTNAME_BYTES (EFI_PART_NAME_LEN * sizeof (ushort_t)) - -typedef enum hs_cmds { - HS_GET, HS_FREE, HS_BAD, HSP_INCREF, HSP_DECREF, HS_MKDEV -} hs_cmds_t; - -typedef struct md_link { - struct md_link *ln_next; - set_t ln_setno; - uint_t ln_id; -} md_link_t; - -typedef struct mdi_unit { - md_link_t ui_link; - ulong_t ui_readercnt; /* number of unit readers */ - ulong_t ui_wanabecnt; /* # pending on becoming unit writer */ - ulong_t ui_lock; - kmutex_t ui_mx; - kcondvar_t ui_cv; - int ui_opsindex; - uint_t ui_ocnt[OTYPCNT]; /* open counts */ - md_io_lock_t *ui_io_lock; /* pointer to io lock */ - kstat_t *ui_kstat; /* kernel statistics */ - kthread_id_t ui_owner; /* writer thread */ - uint_t ui_tstate; /* transient state bits */ - uint_t ui_capab; /* Capability bits supported */ -} mdi_unit_t; - -/* - * Following are used with ui_lock - * which is in the unit incore structure. - */ -#define MD_UL_WRITER 0x0001 /* Stall all new strategy calls */ -#define MD_UL_WANABEWRITER 0x0002 -#define MD_UL_OPENORCLOSE 0x0004 - -#define MD_UL_OPEN 0x0008 /* unit is open */ -#define MD_UL_EXCL 0x0010 /* unit is open exclusively */ - -/* - * The softpart open code may do an I/O to validate the watermarks - * and should hold no open locks during this I/O. So, mark the unit - * as OPENINPROGRESS and drop the locks. This will keep any other - * softpart open's waiting until the validate has completed. - */ -#define MD_UL_OPENINPROGRESS 0x0020 /* Open in Progress */ - -/* - * Following are used with ui_tstate to specify any transient states which - * occur during metadevice operation. These are not written to the metadb as - * they do not represent a failure of the underlying metadevice. - * Transient errors are stored in the lower 16 bits and other transient - * state is stored in the upper 16 bits. - * MD_NOTOPENABLE should contain all the states that are set prior to an - * open (by snarf) and that indicate that a metadevice cannot be opened. - */ -#define MD_DEV_ERRORED 0x0000ffff /* ui_tstate error bits */ -#define MD_EOF_METADEVICE 0x00000001 /* EOF'd metadevice */ -#define MD_64MD_ON_32KERNEL 0x00000002 /* 64bit metadev on 32bit kernel */ -#define MD_INACCESSIBLE 0x00000004 /* metadevice unavailable */ -#define MD_RETRYING 0x00010000 /* retrying errored failfast I/O */ -#define MD_OPENLOCKED 0x00020000 /* MN: open locked before removing */ -#define MD_ERR_PENDING 0x00040000 /* MN: error pending */ -#define MD_ABR_CAP 0x00080000 /* MN: Application Based Recovery */ -#define MD_DMR_CAP 0x00100000 /* MN: Directed Mirror Read */ -#define MD_RELEASE_IOERR_DONE 0x00200000 /* ioerr console message done */ -#define MD_RESYNC_NOT_DONE 0x00400000 /* resync not done yet */ - -/* A metadevice cannot be opened when these states are set */ -#define MD_NOTOPENABLE (MD_EOF_METADEVICE|MD_64MD_ON_32KERNEL) - -typedef struct md_ioctl_lock { - int l_flags; /* locks held */ - mdi_unit_t *l_ui; /* unit for which lock is held */ -} md_ioctl_lock_t; - -#define MD_MASTER_DROPPED 0x0001 -#define MD_READER_HELD 0x0002 -#define MD_WRITER_HELD 0x0004 -#define MD_IO_HELD 0x0008 -#define MD_ARRAY_READER 0x0010 -#define MD_ARRAY_WRITER 0x0020 -#define STALE_OK 0x0100 -#define NO_OLD 0x0200 -#define NO_LOCK 0x0400 -#define MD_MT_IOCTL 0x80000 /* MD_GBL_IOCTL_LOCK not set */ -#define IOLOCK md_ioctl_lock_t - -#define WR_LOCK MD_WRITER_HELD -#define RD_LOCK MD_READER_HELD | STALE_OK -#define ARRAY_WRITER MD_ARRAY_WRITER -#define ARRAY_READER MD_ARRAY_READER -#define WRITERS MD_WRITER_HELD | MD_IO_HELD | MD_ARRAY_WRITER -#define READERS RD_LOCK | MD_ARRAY_READER - -#define IOLOCK_RETURN_IOCTLEND(code, lock) \ - md_ioctl_lock_exit((code), (lock)->l_flags, (lock)->l_ui, TRUE) - -#define IOLOCK_RETURN(code, lock) \ - md_ioctl_lock_exit((code), (lock)->l_flags, (lock)->l_ui, FALSE) - -#define IOLOCK_RETURN_RELEASE(code, lock) \ - md_ioctl_releaselocks((code), (lock)->l_flags, (lock)->l_ui) - -#define IOLOCK_RETURN_REACQUIRE(lock) \ - md_ioctl_reacquirelocks((lock)->l_flags, (lock)->l_ui) - -#define IOLOCK_INIT(lock) bzero((caddr_t)(lock), sizeof (*(lock))) -/* - * checks to be sure locks are held - */ -#define UNIT_WRITER_HELD(un) \ - (MDI_UNIT(MD_SID(un))->ui_lock & MD_UL_WRITER) -#define UNIT_READER_HELD(un) \ - (MDI_UNIT(MD_SID(un))->ui_readercnt != 0) -#define IO_WRITER_HELD(un) \ - (MDI_UNIT(MD_SID(un))->ui_io_lock->io_lock & MD_UL_WRITER) -#define IO_READER_HELD(un) \ - (MDI_UNIT(MD_SID(un))->ui_io_lock->io_readercnt != 0) - -#ifdef DEBUG -#define STAT_INC(statvar) \ - statvar++ -#define STAT_DEC(statvar) \ - statvar-- -#define STAT_ZERO(statvar) \ - statvar = 0; -#define STAT_MAX(statmax, statvar) \ - { \ - statvar++; \ - if (statvar > statmax) \ - statmax = statvar; \ - } -#define STAT_CHECK(statvar, value) \ - { \ - if (value) \ - statvar++; \ - } -#else -#define STAT_INC(statvar) -#define STAT_DEC(statvar) -#define STAT_ZERO(statvar) -#define STAT_MAX(statmax, statvar) -#define STAT_CHECK(statvar, value) -#endif -/* - * bit map related macros - */ -#define setbit(a, i) ((a)[(i)/NBBY] |= 1<<((i)%NBBY)) -#define clrbit(a, i) ((a)[(i)/NBBY] &= ~(1<<((i)%NBBY))) -#define isset(a, i) ((a)[(i)/NBBY] & (1<<((i)%NBBY))) -#define isclr(a, i) (((a)[(i)/NBBY] & (1<<((i)%NBBY))) == 0) - -typedef struct daemon_queue { - int maxq_len; - int qlen; - int treqs; /* total number of requests */ - struct daemon_queue *dq_next; - struct daemon_queue *dq_prev; - void (*dq_call)(); -} daemon_queue_t; - -#define DAEMON_QUEUE daemon_queue_t dq; - -#ifdef _KERNEL -#include -#include -#include - -#define MD_DEV2SET(d) (MD_MIN2SET(md_getminor(d))) - -#define MD_UNIT(m) (md_set[MD_MIN2SET(m)].s_un[MD_MIN2UNIT(m)]) -#define MDI_UNIT(m) ((mdi_unit_t *) \ - md_set[MD_MIN2SET(m)].s_ui[MD_MIN2UNIT(m)]) -#define MD_VOIDUNIT(m) (md_set[MD_MIN2SET(m)].s_un[MD_MIN2UNIT(m)]) -#define MDI_VOIDUNIT(m) (md_set[MD_MIN2SET(m)].s_ui[MD_MIN2UNIT(m)]) - -/* - * This is the current maximum number of real disks per Virtual Disk. - */ -extern uint_t md_mdelay; /* md_mirror timeout delay */ - -#define MD_ADM_MINOR L_MAXMIN32 /* the minor number for md_admin */ -#define MD_MDELAY (md_mdelay) -#define NUM_USEC_IN_SEC 1000000 /* 1 million usec in a second */ - -#define ANY_SERVICE -1 /* md_get_named_service() wild card */ - -/* - * daemon threads are used in multiple places in md. The following set of - * structures and routines allow a common way to create and initialize them. - * - * md_requestq_entry_t - entry of creating request queues. - * struct mdq_anchor - request queue header - * - * Functions associated with request queues: - * - * int init_requestq_entry - - * void daemon_request - put a request on the queue. - */ - -typedef struct md_requestq_entry { - struct mdq_anchor *dispq_headp; - int *num_threadsp; /* threads servicing the queue */ -} md_requestq_entry_t; - -#define NULL_REQUESTQ_ENTRY(rqp)\ - ((rqp)->dispq_headp == NULL || (rqp)->num_threadsp == NULL) - -/* this typedef is used to differentiate between the two call styles */ -typedef enum callstyle { - REQ_OLD, - REQ_NEW -} callstyle_t; - - -#define daemon_request_new daemon_request - -typedef struct mdq_anchor { - DAEMON_QUEUE - kcondvar_t a_cv; /* Request has been put on queue */ - kmutex_t a_mx; -} mdq_anchor_t; - -typedef struct daemon_request { - DAEMON_QUEUE - kmutex_t dr_mx; - int dr_pending; - timeout_id_t dr_timeout_id; -} daemon_request_t; - -typedef struct sv_dev { - set_t setno; - side_t side; - mdkey_t key; -} sv_dev_t; - -/* - * Types of device probes - */ - - -typedef struct probe_req { - DAEMON_QUEUE - minor_t mnum; /* mnum of the metadevice to probe */ - void *private_handle; /* private handle */ - intptr_t (*probe_fcn)(); /* type of probeing to be done */ -} probe_req_t; - -/* Global flags */ -#define MD_NO_GBL_LOCKS_HELD 0x0000 /* currently holding no global locks */ -#define MD_GBL_DAEMONS_LIVE 0x0001 /* master daemon has been started. */ -#define MD_GBL_DAEMONS_DIE 0x0002 -#define MD_GBL_HALTED 0x0004 /* driver is shut down */ - -/* Available bit was GBL_STALE 0x0008 */ - -#define MD_GBL_IOCTL_LOCK 0x0010 /* single-threads ioctls */ -#define MD_GBL_HS_LOCK 0x0020 /* single-threads hotspares */ -#define MD_GBL_OPEN 0x0040 /* admin is open */ -#define MD_GBL_EXCL 0x0080 /* admin is open exclusively */ - -#define MD_OFLG_NULL 0x0000 /* Null flag */ -#define MD_OFLG_CONT_ERRS 0x0001 /* Continue on open errors */ -#define MD_OFLG_PROBEDEV 0x0002 /* force a simulated open */ -#define MD_OFLG_ISINIT 0x0004 /* raid initialization */ -#define MD_OFLG_FROMIOCTL 0x0008 /* Called from an ioctl handler */ - - -typedef struct md_named_services { - - intptr_t (*md_service)(); - char *md_name; -} md_named_services_t; - -typedef enum md_snarfcmd {MD_SNARF_CLEANUP, MD_SNARF_DOIT} md_snarfcmd_t; - -typedef struct md_ops { - int (*md_open)( - dev_t *devp, - int flag, - int otyp, - cred_t *credp, - int md_oflags); - int (*md_close)( - dev_t dev, - int flag, - int otyp, - cred_t *credp, - int md_oflags); - void (*md_strategy)( - buf_t *bufp, - int flag, - void *private); - int (*md_print)(); /* unused now */ - int (*md_dump)( - dev_t dev, - caddr_t addr, - daddr_t blkno, - int nblk); - int (*md_read)( - dev_t dev, - struct uio *uiop, - cred_t *credp); - int (*md_write)( - dev_t dev, - struct uio *uiop, - cred_t *credp); - int (*md_ioctl)( - dev_t dev, - int cmd, - void *data, - int mode, - IOLOCK *lockp); - int (*md_snarf)( - md_snarfcmd_t cmd, - set_t setno); - int (*md_halt)(); - int (*md_aread)( - dev_t dev, - struct aio_req *aiop, - cred_t *credp); - int (*md_awrite)( - dev_t dev, - struct aio_req *aiop, - cred_t *credp); - int (*md_imp_set)( - set_t setno); - md_named_services_t *md_services; - md_krwlock_t md_link_rw; - md_link_t *md_head; - /* - * NOTE: when TSlvm s10/onnv compatibility is not an issue: - * o md_modid and md_locked should be deleted. - * o md_mod should be added - * ddi_modhandle_t md_mod; - * and used instead of the md_mods array (md_mods should - * be deleted). - */ - int md_modid; - int md_locked; - int md_selfindex; - struct md_ops *md_next; - md_driver_t md_driver; - /* NOTE: TSlvm depends on offsets in and sizeof this structure */ -} md_ops_t; - -/* macro to generate linkage for a md misc plugin module */ -#define md_noop -#define MD_PLUGIN_MISC_MODULE(desc, init_init, fini_uninit) \ - static struct modlmisc modlmisc = { \ - &mod_miscops, "Solaris Volume Manager " desc \ - }; \ - static struct modlinkage modlinkage = { \ - MODREV_1, (void *)&modlmisc, NULL \ - }; \ - int \ - _init(void) \ - { \ - int i; \ - init_init; \ - if ((i = mod_install(&modlinkage)) != 0) { \ - fini_uninit; \ - } \ - return (i); \ - } \ - int \ - _fini() \ - { \ - int i; \ - if ((i = mod_remove(&modlinkage)) == 0) { \ - fini_uninit; \ - } \ - return (i); \ - } \ - int \ - _info(struct modinfo *modinfop) \ - { \ - return (mod_info(&modlinkage, modinfop)); \ - } - -typedef enum md_haltcmd {MD_HALT_ALL, MD_HALT_CHECK, MD_HALT_DOIT, - MD_HALT_CLOSE, MD_HALT_OPEN, MD_HALT_UNLOAD -} md_haltcmd_t; - -/* - * To support cpr (Energy Star) we need to know when the resync threads are - * running to not allow suspention. - */ -typedef struct md_resync_thds_cnt { - int md_raid_resync; /* count of active raid resync threads */ - int md_mirror_resync; /* count of active mirror resync threads */ - kmutex_t md_resync_mutex; /* protects both resync counts */ -} md_resync_t; - -/* - * flags used with call to individual strategy routines - */ -#define MD_STR_PASSEDON 0x0000ffff -#define MD_STR_NOTTOP 0x00000001 -#define MD_STR_MAPPED 0x00000002 /* set when buf_t is mapped in */ -#define MD_STR_ABR 0x00000004 /* use ABR to handle any recovery */ -#define MD_STR_WMUPDATE 0x00000008 /* set if updating watermarks for sp */ -#define MD_IO_COUNTED 0x00000400 /* io has been counted */ -#define MD_NOBLOCK 0x00000800 /* do not block io durring release */ - -#define MD_STR_WAR 0x00010000 /* this write is write after read */ -#define MD_STR_WOW 0x00020000 /* handling a write-on-write */ -#define MD_STR_DMR 0x00040000 /* Directed Read request */ -#define MD_STR_DIRTY_RD 0x00080000 /* Read of a dirty block */ -#define MD_STR_FLAG_ERR 0x00100000 /* Flag any write error on this i/o */ -#define MD_STR_BLOCK_OK 0x00200000 /* Flag if caller i/o can be blocked */ - -/* - * Bits for return value of md_getdevnum - */ -#define MD_TRUST_DEVT 1 -#define MD_NOTRUST_DEVT 0 - -/* Flag for drivers to pass to kmem_cache_alloc() */ -#define MD_ALLOCFLAGS (KM_PUSHPAGE | KM_SLEEP) - -/* Named services */ -#define MD_CHECK_OFFLINE "check_offline" -#define MD_INC_ABR_COUNT "inc abr count" -#define MD_DEC_ABR_COUNT "dec abr count" - -/* md_getdevname_common flags for namespace lock */ -#define MD_WAIT_LOCK 0 -#define MD_NOWAIT_LOCK 1 - -/* Externals from md.c */ -extern int md_snarf_db_set(set_t setno, md_error_t *ep); -extern void get_info(struct dk_cinfo *, minor_t); -extern void get_minfo(struct dk_minfo *, minor_t); -extern int mdstrategy(buf_t *); -extern int md_create_minor_node(set_t, minor_t); -extern void md_nblocks_set(minor_t mnum, uint64_t nblocks); - -/* External from md_subr.c */ -extern int md_inc_iocount(set_t); -extern void md_inc_iocount_noblock(set_t); -extern void md_dec_iocount(set_t); -extern int md_isblock_setio(set_t); -extern int md_block_setio(set_t); -extern void md_clearblock_setio(set_t); -extern void md_unblock_setio(set_t); -extern int md_tas_block_setio(set_t); -extern void md_biodone(struct buf *); -extern void md_bioreset(struct buf *); -extern md_dev64_t md_xlate_targ_2_mini(md_dev64_t); -extern md_dev64_t md_xlate_mini_2_targ(md_dev64_t); -extern void md_xlate_free(int); -extern major_t md_targ_name_to_major(char *); -extern char *md_targ_major_to_name(major_t); -extern void md_majortab_free(); -extern void md_set_status(int); -extern void md_clr_status(int); -extern int md_get_status(void); -extern void md_set_setstatus(set_t, int); -extern void md_clr_setstatus(set_t, int); -extern uint_t md_get_setstatus(set_t); -extern void *md_unit_readerlock(mdi_unit_t *); -extern void *md_unit_writerlock(mdi_unit_t *); -extern void md_unit_readerexit(mdi_unit_t *); -extern void md_unit_writerexit(mdi_unit_t *); -extern void md_ioctl_releaselocks(int, int, mdi_unit_t *); -extern void md_ioctl_reacquirelocks(int, mdi_unit_t *); -extern int md_ioctl_lock_exit(int, int, mdi_unit_t *, int); -extern int md_ioctl_lock_enter(void); -extern void *md_ioctl_readerlock(IOLOCK *, mdi_unit_t *); -extern void md_ioctl_readerexit(IOLOCK *); -extern void *md_ioctl_writerlock(IOLOCK *, mdi_unit_t *); -extern void md_ioctl_writerexit(IOLOCK *); -extern void md_ioctl_io_exit(IOLOCK *); -extern void *md_ioctl_io_lock(IOLOCK *, mdi_unit_t *); -extern void md_ioctl_droplocks(IOLOCK *); -extern void md_array_writer(IOLOCK *); -extern void md_array_reader(IOLOCK *); -extern void *md_ioctl_openclose_enter(IOLOCK *, mdi_unit_t *); -extern void md_ioctl_openclose_exit(IOLOCK *); -extern void md_ioctl_openclose_exit_lh(IOLOCK *); -extern void *md_unit_openclose_enter(mdi_unit_t *); -extern void md_unit_openclose_exit(mdi_unit_t *); -extern void md_unit_openclose_exit_lh(mdi_unit_t *); -extern int md_unit_isopen(mdi_unit_t *ui); -extern int md_unit_incopen(minor_t mnum, int flag, int otyp); -extern int md_unit_decopen(minor_t mnum, int otyp); -extern void *md_io_readerlock(mdi_unit_t *); -extern void *md_io_writerlock(mdi_unit_t *); -extern void md_io_readerexit(mdi_unit_t *); -extern void md_io_writerexit(mdi_unit_t *); -extern intptr_t (*md_get_named_service())(); -extern int init_requestq(md_requestq_entry_t *, void (*)(), - caddr_t, int, int); -extern void daemon_request(mdq_anchor_t *, void(*)(), - daemon_queue_t *, callstyle_t); -extern void md_daemon(int, mdq_anchor_t *); -extern void mddb_commitrec_wrapper(mddb_recid_t); -extern void mddb_commitrecs_wrapper(mddb_recid_t *); -extern void mddb_deleterec_wrapper(mddb_recid_t); -extern void md_holdset_enter(set_t setno); -extern void md_holdset_exit(set_t setno); -extern int md_holdset_testandenter(set_t setno); -extern void md_haltsnarf_enter(set_t setno); -extern void md_haltsnarf_exit(set_t setno); -extern void md_haltsnarf_wait(set_t setno); -extern int md_halt_set(set_t setno, enum md_haltcmd cmd); -extern int md_halt(int global_lock_flag); -extern int md_layered_open(minor_t, md_dev64_t *, int); -extern void md_layered_close(md_dev64_t, int); -extern char *md_get_device_name(md_dev64_t); -extern int errdone(mdi_unit_t *, struct buf *, int); -extern int md_checkbuf(mdi_unit_t *, md_unit_t *, buf_t *); -extern int md_start_daemons(int init_queues); -extern int md_loadsubmod(set_t, char *, int); -extern int md_getmodindex(md_driver_t *, int, int); -extern void md_call_strategy(buf_t *, int, void *); -extern int md_call_ioctl(md_dev64_t, int, void *, int, IOLOCK *); -extern void md_rem_link(set_t, int, krwlock_t *, md_link_t **); -extern int md_dev_exists(md_dev64_t); -extern md_parent_t md_get_parent(md_dev64_t); -extern void md_set_parent(md_dev64_t, md_parent_t); -extern void md_reset_parent(md_dev64_t); -extern struct hot_spare_pool *find_hot_spare_pool(set_t, int); -extern int md_hot_spare_ifc(hs_cmds_t, mddb_recid_t, u_longlong_t, int, - mddb_recid_t *, mdkey_t *, md_dev64_t *, diskaddr_t *); -extern int md_notify_interface(md_event_cmds_t cmd, md_tags_t type, - set_t set, md_dev64_t dev, md_event_type_t event); -extern void svm_gen_sysevent(char *se_class, char *se_subclass, - uint32_t tag, set_t setno, md_dev64_t devid); -extern void md_create_unit_incore(minor_t, md_ops_t *, int); -extern void md_destroy_unit_incore(minor_t, md_ops_t *); -extern void md_rem_names(sv_dev_t *, int); -struct uio; -extern int md_chk_uio(struct uio *); -extern char *md_shortname(minor_t mnum); -extern char *md_devname(set_t setno, md_dev64_t dev, char *buf, - size_t size); -extern void md_minphys(buf_t *); -extern void md_kstat_init(minor_t mnum); -extern void md_kstat_init_ui(minor_t mnum, mdi_unit_t *ui); -extern void md_kstat_destroy(minor_t mnum); -extern void md_kstat_destroy_ui(mdi_unit_t *ui); -extern void md_kstat_waitq_enter(mdi_unit_t *ui); -extern void md_kstat_waitq_to_runq(mdi_unit_t *ui); -extern void md_kstat_waitq_exit(mdi_unit_t *ui); -extern void md_kstat_runq_enter(mdi_unit_t *ui); -extern void md_kstat_runq_exit(mdi_unit_t *ui); -extern void md_kstat_done(mdi_unit_t *ui, buf_t *bp, int war); -extern pid_t md_getpid(void); -extern proc_t *md_getproc(void); -extern int md_checkpid(pid_t pid, proc_t *proc); -extern char *md_strdup(char *cp); -extern void freestr(char *cp); -extern int md_check_ioctl_against_unit(int, mdc_unit_t); -extern mddb_recid_t md_vtoc_to_efi_record(mddb_recid_t, set_t); - -extern int mdmn_ksend_message(set_t, md_mn_msgtype_t, uint_t, - md_mn_nodeid_t, char *, int, md_mn_kresult_t *); -extern void mdmn_ksend_show_error(int, md_mn_kresult_t *, const char *); -extern int mdmn_send_capability_message(minor_t, volcap_t, IOLOCK *); -extern void mdmn_clear_all_capabilities(minor_t); -extern int md_init_probereq(struct md_probedev_impl *p, - daemon_queue_t **hdrpp); -extern boolean_t callb_md_mrs_cpr(void *, int); -extern void md_upd_set_unnext(set_t, unit_t); -extern int md_rem_selfname(minor_t); -extern void md_rem_hspname(set_t, mdkey_t); -extern void *md_create_taskq(set_t, minor_t); - -/* Externals from md_ioctl.c */ -extern int md_mn_is_commd_present(void); -extern int md_mn_is_commd_present_lite(void); -extern void md_mn_clear_commd_present(void); -extern int md_admin_ioctl(md_dev64_t, int, caddr_t, int, IOLOCK *lockp); -extern void md_get_geom(md_unit_t *, struct dk_geom *); -extern int md_set_vtoc(md_unit_t *, struct vtoc *); -extern void md_get_vtoc(md_unit_t *, struct vtoc *); -extern int md_set_extvtoc(md_unit_t *, struct extvtoc *); -extern void md_get_extvtoc(md_unit_t *, struct extvtoc *); -extern void md_get_cgapart(md_unit_t *, struct dk_map *); -extern void md_get_efi(md_unit_t *, char *); -extern int md_set_efi(md_unit_t *, char *); -extern int md_dkiocgetefi(minor_t, void *, int); -extern int md_dkiocsetefi(minor_t, void *, int); -extern int md_dkiocpartition(minor_t, void *, int); -extern void md_remove_minor_node(minor_t); - - -/* Externals from md_names.c */ -extern mdkey_t md_setdevname(set_t, side_t, mdkey_t, char *, minor_t, char *, - int imp_flag, ddi_devid_t devid, char *minorname, - set_t, md_error_t *); -extern int md_getdevname(set_t, side_t, mdkey_t, md_dev64_t, char *, - size_t); -extern int md_getdevname_common(set_t, side_t, mdkey_t, md_dev64_t, char *, - size_t, int); -extern int md_gethspinfo(set_t, side_t, mdkey_t, char *, hsp_t *, - char *); -extern int md_getkeyfromdev(set_t, side_t, md_dev64_t, mdkey_t *, int *); -extern int md_devid_found(set_t, side_t, mdkey_t); -extern int md_getnment(set_t, side_t, mdkey_t, md_dev64_t, - char *, uint_t, major_t *, minor_t *, mdkey_t *); -extern md_dev64_t md_getdevnum(set_t, side_t, mdkey_t, int); -extern mdkey_t md_getnextkey(set_t, side_t, mdkey_t, uint_t *); -extern int md_remdevname(set_t, side_t, mdkey_t); -extern mdkey_t md_setshared_name(set_t, char *, int); -extern char *md_getshared_name(set_t, mdkey_t); -extern int md_remshared_name(set_t, mdkey_t); -extern mdkey_t md_getshared_key(set_t, char *); -extern int md_setshared_data(set_t, uint_t, caddr_t); -extern caddr_t md_getshared_data(set_t, uint_t); -extern int md_load_namespace(set_t, md_error_t *ep, int); -extern void md_unload_namespace(set_t, int); -extern int md_nm_did_chkspace(set_t); -extern void md_bioinit(); -extern buf_t *md_bioclone(buf_t *, off_t, size_t, dev_t, diskaddr_t, - int (*)(buf_t *), buf_t *, int); -extern int md_getdevid(set_t setno, side_t side, mdkey_t key, - ddi_devid_t devid, ushort_t *did_size); -extern int md_getdevidminor(set_t setno, side_t side, mdkey_t key, - char *minorname, size_t minorname_len); -extern int md_update_namespace(set_t setno, side_t side, mdkey_t key, - caddr_t devname, caddr_t pathname, major_t major, - minor_t mnum); -extern int md_update_locator_namespace(set_t setno, side_t side, - caddr_t devname, caddr_t pathname, md_dev64_t devt); -extern int md_update_namespace_did(set_t setno, side_t side, mdkey_t key, - md_error_t *ep); -extern int md_validate_devid(set_t setno, side_t side, int *maxsz); -extern int md_get_invdid(set_t setno, side_t side, int cnt, int maxsz, - void *didptr); -extern md_dev64_t md_resolve_bydevid(minor_t, md_dev64_t, mdkey_t key); -extern md_dev64_t md_expldev(md_dev64_t); -extern dev32_t md_cmpldev(md_dev64_t); -extern dev_t md_dev64_to_dev(md_dev64_t); -extern md_dev64_t md_makedevice(major_t, minor_t); -extern major_t md_getmajor(md_dev64_t); -extern minor_t md_getminor(md_dev64_t); -extern void md_timeval(md_timeval32_t *); -extern int md_imp_snarf_set(mddb_config_t *); - -/* externals from md_mddb.c */ -extern int mddb_reread_rr(set_t, mddb_recid_t); -extern int mddb_setowner(mddb_recid_t id, md_mn_nodeid_t owner); -extern int mddb_parse(mddb_parse_parm_t *mpp); -extern int mddb_block(mddb_block_parm_t *mpp); -extern int mddb_optrecfix(mddb_optrec_parm_t *mop); -extern int mddb_check_write_ioctl(mddb_config_t *info); -extern int mddb_setflags_ioctl(mddb_setflags_config_t *info); -extern struct nm_next_hdr *get_first_record(set_t, int, int); -extern void *lookup_entry(struct nm_next_hdr *, set_t, - side_t, mdkey_t, md_dev64_t, int); -extern void *lookup_shared_entry(struct nm_next_hdr *, - mdkey_t key, char *, mddb_recid_t *, int); -extern int remove_shared_entry(struct nm_next_hdr *, mdkey_t key, - char *, int); -extern int remove_entry(struct nm_next_hdr *, side_t, mdkey_t, int); -extern void *alloc_entry(struct nm_next_hdr *, mddb_recid_t, size_t, int, - mddb_recid_t *); -extern void *getshared_name(set_t, mdkey_t, int); - -#endif /* _KERNEL */ - - -/* externals from md_revchk.c */ -extern int revchk(uint_t my_rev, uint_t data); - - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS_MDVAR_H */ diff --git a/usr/src/uts/common/sys/lvm/meta_arr.x b/usr/src/uts/common/sys/lvm/meta_arr.x deleted file mode 100644 index 750c5b038bc3..000000000000 --- a/usr/src/uts/common/sys/lvm/meta_arr.x +++ /dev/null @@ -1,100 +0,0 @@ -%/* -% * Copyright 2005 Sun Microsystems, Inc. All rights reserved. -% * Use is subject to license terms. -% * -% * CDDL HEADER START -% * -% * The contents of this file are subject to the terms of the -% * Common Development and Distribution License, Version 1.0 only -% * (the "License"). You may not use this file except in compliance -% * with the License. -% * -% * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -% * or http://www.opensolaris.org/os/licensing. -% * See the License for the specific language governing permissions -% * and limitations under the License. -% * -% * When distributing Covered Code, include this CDDL HEADER in each -% * file and include the License file at usr/src/OPENSOLARIS.LICENSE. -% * If applicable, add the following below this CDDL HEADER, with the -% * fields enclosed by brackets "[]" replaced with your own identifying -% * information: Portions Copyright [yyyy] [name of copyright owner] -% * -% * CDDL HEADER END -% */ -% -%#pragma ident "%Z%%M% %I% %E% SMI" -% -%/* -% * This file has the shared fixed array RPC definitions for use in a couple -% * places. -% */ -% - -% -%/* -% * Node Name type -% */ -typedef char md_node_nm_t[MD_MAX_NODENAME_PLUS_1]; -typedef char md_mnnode_nm_t[MD_MAX_MNNODENAME_PLUS_1]; - -% -%/* -% * Set Name Type -% */ -typedef char md_set_nm_t[MD_MAX_SETNAME_PLUS_1]; - -% -%/* -% * Mediator Basic Data Types -% */ -typedef md_node_nm_t md_alias_nm_t[MAX_HOST_ADDRS]; -typedef u_int md_alias_ip_t[MAX_HOST_ADDRS]; - -#ifdef RPC_HDR -% -%/* -% * Values for the a_flg structure member of md_alias_nm_ip_t structure -% */ -%#define NMIP_F_LOCAL 0x0001 -% -#endif /* RPC_HDR */ - -struct md_hi_t { - u_int a_flg; - int a_cnt; - md_alias_nm_t a_nm; - md_alias_ip_t a_ip; -}; - -struct md_hi_arr_t { - int n_cnt; - md_hi_t n_lst[MED_MAX_HOSTS]; -}; - -struct md_h_t { - int a_cnt; - md_alias_nm_t a_nm; -}; - -struct md_h_arr_t { - int n_cnt; - md_h_t n_lst[MED_MAX_HOSTS]; -}; - -% -%/* -% * Node Name type -% */ -typedef md_node_nm_t md_node_nm_arr_t[MD_MAXSIDES]; -% -#if 0 -% -%/* -% * Node Name type with added aliases -% */ -struct md_node_nm_arr_t { - int n_cnt; - md_h_t n_lst[MD_MAXSIDES]; -}; -#endif /* 0 */ diff --git a/usr/src/uts/common/sys/lvm/meta_basic.x b/usr/src/uts/common/sys/lvm/meta_basic.x deleted file mode 100644 index 502d5d83a4b6..000000000000 --- a/usr/src/uts/common/sys/lvm/meta_basic.x +++ /dev/null @@ -1,337 +0,0 @@ -%/* -% * CDDL HEADER START -% * -% * The contents of this file are subject to the terms of the -% * Common Development and Distribution License (the "License"). -% * You may not use this file except in compliance with the License. -% * -% * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -% * or http://www.opensolaris.org/os/licensing. -% * See the License for the specific language governing permissions -% * and limitations under the License. -% * -% * When distributing Covered Code, include this CDDL HEADER in each -% * file and include the License file at usr/src/OPENSOLARIS.LICENSE. -% * If applicable, add the following below this CDDL HEADER, with the -% * fields enclosed by brackets "[]" replaced with your own identifying -% * information: Portions Copyright [yyyy] [name of copyright owner] -% * -% * CDDL HEADER END -% */ -% -%/* -% * Copyright 2009 Sun Microsystems, Inc. All rights reserved. -% * Use is subject to license terms. -% */ -% - -%/* get timeval32 definition */ -%#include - -#ifndef _KERNEL -%#ifdef _KERNEL -%#error "Compiling kernel file rpcgened without _KERNEL define." -%#endif /* _KERNEL */ -#endif /* _KERNEL */ -%#include - -#ifdef RPC_XDR -#ifndef _KERNEL -%bool_t -%xdr_uint_t(XDR *xdrs, uint_t *objp) -%{ -% if (!xdr_u_int(xdrs, (u_int *)objp)) -% return (FALSE); -% return (TRUE); -%} -%bool_t -%xdr_ushort_t(XDR *xdrs, ushort_t *objp) -%{ -% if (!xdr_u_short(xdrs, (u_short *)objp)) -% return (FALSE); -% return (TRUE); -%} -%bool_t -%xdr_dev_t(XDR *xdrs, dev_t *objp) -%{ -% if (!xdr_u_int(xdrs, (u_int *)objp)) -% return (FALSE); -% return (TRUE); -%} -%bool_t -%xdr_dev32_t(XDR *xdrs, dev32_t *objp) -%{ -% if (!xdr_u_int(xdrs, (u_int *)objp)) -% return (FALSE); -% return (TRUE); -%} -%bool_t -%xdr_md_dev64_t(XDR *xdrs, md_dev64_t *objp) -%{ -% if (!xdr_u_longlong_t(xdrs, objp)) -% return (FALSE); -% return (TRUE); -%} -%bool_t -%xdr_size_t(XDR *xdrs, size_t *objp) -%{ -% if (!xdr_u_int(xdrs, (u_int *) objp)) -% return (FALSE); -% return (TRUE); -%} -%bool_t -%xdr_daddr_t(XDR *xdrs, daddr_t *objp) -%{ -% if (!xdr_int(xdrs, (int *) objp)) -% return (FALSE); -% return (TRUE); -%} -%bool_t -%xdr_daddr32_t(XDR *xdrs, daddr32_t *objp) -%{ -% if (!xdr_int(xdrs, (int *) objp)) -% return (FALSE); -% return (TRUE); -%} -%bool_t -%xdr_diskaddr_t(XDR *xdrs, diskaddr_t *objp) -%{ -% if (!xdr_u_longlong_t(xdrs, objp)) -% return (FALSE); -% return (TRUE); -%} -%bool_t -%xdr_ddi_devid_t(XDR *xdrs, ddi_devid_t *objp) -%{ -% /* device ids not supported for non-local sets */ -% return (TRUE); -%} -%bool_t -%xdr_off_t(XDR *xdrs, off_t *objp) -%{ -% if (!xdr_int(xdrs, (int *) objp)) -% return (FALSE); -% return (TRUE); -%} -%bool_t -%xdr_timeval(XDR *xdrs, struct timeval *objp) -%{ -% if (!xdr_int(xdrs, (int *)&objp->tv_sec)) -% return (FALSE); -% if (!xdr_int(xdrs, (int *)&objp->tv_usec)) -% return (FALSE); -% return (TRUE); -%} -% -%bool_t -%xdr_md_timeval32_t(XDR *xdrs, md_timeval32_t *objp) -%{ -% if (!xdr_int(xdrs, &objp->tv_sec)) -% return (FALSE); -% if (!xdr_int(xdrs, &objp->tv_usec)) -% return (FALSE); -% return (TRUE); -%} -% -#else /* _KERNEL */ -%#ifdef _LP64 -%bool_t -%xdr_timeval(XDR *xdrs, struct timeval *objp) -%{ -% struct timeval32 tv32; -% if (xdrs->x_op == XDR_ENCODE) -% TIMEVAL_TO_TIMEVAL32(&tv32, objp); -% if (!xdr_int(xdrs, &tv32.tv_sec)) -% return (FALSE); -% if (!xdr_int(xdrs, &tv32.tv_usec)) -% return (FALSE); -% if (xdrs->x_op == XDR_DECODE) -% TIMEVAL32_TO_TIMEVAL(objp, &tv32); -% return (TRUE); -%} -%#else /* !_LP64 */ -%bool_t -%xdr_timeval(XDR *xdrs, struct timeval *objp) -%{ -% if (!xdr_int(xdrs, (int *)&objp->tv_sec)) -% return (FALSE); -% if (!xdr_int(xdrs, (int *)&objp->tv_usec)) -% return (FALSE); -% return (TRUE); -%} -%#endif /* _LP64 */ -#endif /* !_KERNEL */ -% -%bool_t -%xdr_minor_t(XDR *xdrs, minor_t *objp) -%{ -% if (!xdr_u_int(xdrs, (u_int *)objp)) -% return (FALSE); -% return (TRUE); -%} -% -%bool_t -%xdr_clnt_stat(XDR *xdrs, enum clnt_stat *objp) -%{ -% if (!xdr_enum(xdrs, (enum_t *)objp)) -% return (FALSE); -% return (TRUE); -%} -#endif /* RPC_XDR */ - -#ifdef RPC_HDR -% -%/* -% * Some constants -% */ -const MD_MAX_SETNAME = 50; -const MD_MAX_NODENAME = 63; -const MAX_HOST_ADDRS = 3; -const MD_MAX_MNNODENAME = 256; - -const MED_MAX_HOSTS = 3; -const MED_DEF_HOSTS = 3; - -const MD_MAXSIDES = 8; -const MD_LOCAL_SET = 0; - -const MD_MNMAXSIDES = 128; -const MDDB_SN_LEN = 12; -const MDDB_MINOR_NAME_MAX = 32; -const MD_MAXDRVNM = 16; - -const MD_MAX_BLKS_FOR_SMALL_DEVS = 2147483647; -%#define MD_MAX_BLKS_FOR_EXTVTOC 4294967295ULL -% -%/* Minimum number of metadevice database replicas needed */ -const MD_MINREPLICAS = 1; - -%#define MD_MAX_SETNAME_PLUS_1 (MD_MAX_SETNAME + 1) -%#define MD_MAX_NODENAME_PLUS_1 (MD_MAX_NODENAME + 1) -%#define MD_MAX_MNNODENAME_PLUS_1 (MD_MAX_MNNODENAME + 1) -% -%#define MD_SET_BAD ((set_t)~0UL) -% -%#define MD_LOCAL_NAME "" -% -%#define MD_SIDEWILD ((side_t)~0UL) -% -%#define MD_KEYWILD ((mdkey_t)0) -%#define MD_KEYBAD ((mdkey_t)~0UL) -%#define MD_UNITBAD ((unit_t)~0UL) -%#define MD_HSPID_WILD ((hsp_t)~0UL) - -%/* Maximum length of a metadevice name */ -%#define MD_MAX_SIDENAME_LEN (MD_MAXDRVNM + MD_MAX_SETNAME + 2) -% -%/* -% * dev_t is 64 bit now across userland and kernel. Whereever 32 bit value -% * is specifically needed, dev32_t will be used. Internally dev_t is used. -% * timeval is always 32 bit across userland and kernel. -% */ -%typedef u_longlong_t md_dev64_t; -%typedef struct timeval32 md_timeval32_t; -% -%/* -% * The following definitions are not available, when operating in -% * a 32 bit environment. As we are always dealing with -% * 64 bit devices, md_dev64_t, we need those definitions also in -% * a 32 bit environment -% */ -%#ifndef NBITSMAJOR64 -%#define NBITSMAJOR64 32 /* # of major device bits in 64-bit Solaris */ -%#endif /* NBITSMAJOR64 */ -% -%#ifndef NBITSMINOR64 -%#define NBITSMINOR64 32 /* # of minor device bits in 64-bit Solaris */ -%#endif /* NBITSMINOR64 */ -% -%#ifndef MAXMAJ64 -%#define MAXMAJ64 0xfffffffful /* max major value */ -%#endif /* MAXMAJ64 */ -% -%#ifndef MAXMIN64 -%#define MAXMIN64 0xfffffffful /* max minor value */ -%#endif /* MAXMIN64 */ -% -%#ifndef NODEV64 -%#define NODEV64 0xffffffffffffffffuLL -%#endif /* NODEV64 */ -% -%#ifndef NODEV32 -%#define NODEV32 0xffffffffuL -%#endif /* NODEV32 */ -% -%#ifndef MD_DISKADDR_ERROR -%#define MD_DISKADDR_ERROR 0xffffffffffffffffuLL -%#endif /* MD_DISKADDR_ERROR */ - -#endif /* RPC_HDR */ - -#if defined(RPC_HDR) || defined(RPC_XDR) -% -%/* namespace key */ -typedef int mdkey_t; - -% -%/* set ID */ -typedef u_int set_t; - -% -%/* record ID type */ -typedef int mddb_recid_t; - -% -%/* side ID */ -typedef u_int side_t; - -% -%/* Multi-node node ID */ -typedef uint32_t md_mn_nodeid_t; - -% -%/* Shared definitions */ -#include "meta_arr.x" - -#endif /* defined(RPC_HDR) || defined(RPC_XDR) */ - -#ifdef RPC_HDR -% -%#if defined(__STDC__) || defined(__cplusplus) -#ifndef _KERNEL -%extern bool_t xdr_uint_t(XDR *xdrs, uint_t *objp); -%extern bool_t xdr_ushort_t(XDR *xdrs, ushort_t *objp); -%extern bool_t xdr_dev_t(XDR *xdrs, dev_t *objp); -%extern bool_t xdr_dev32_t(XDR *xdrs, dev32_t *objp); -%extern bool_t xdr_md_dev64_t(XDR *xdrs, md_dev64_t *objp); -%extern bool_t xdr_size_t(XDR *xdrs, size_t *objp); -%extern bool_t xdr_daddr_t(XDR *xdrs, daddr_t *objp); -%extern bool_t xdr_daddr32_t(XDR *xdrs, daddr32_t *objp); -%extern bool_t xdr_diskaddr_t(XDR *xdrs, diskaddr_t *objp); -%extern bool_t xdr_ddi_devid_t(XDR *xdrs, ddi_devid_t *objp); -%extern bool_t xdr_off_t(XDR *xdrs, off_t *objp); -%extern bool_t xdr_md_timeval32_t(XDR *xdrs, md_timeval32_t *objp); -#endif /* !_KERNEL */ -%extern bool_t xdr_minor_t(XDR *xdrs, minor_t *objp); -%extern bool_t xdr_timeval(XDR *xdrs, struct timeval *objp); -%extern bool_t xdr_clnt_stat(XDR *xdrs, enum clnt_stat *objp); -%#else /* K&R C */ -#ifndef _KERNEL -%extern bool_t xdr_uint_t(); -%extern bool_t xdr_ushort_t(); -%extern bool_t xdr_dev_t(); -%extern bool_t xdr_dev32_t(); -%extern bool_t xdr_md_dev64_t(); -%extern bool_t xdr_size_t(); -%extern bool_t xdr_daddr_t(); -%extern bool_t xdr_daddr32_t(); -%extern bool_t xdr_diskaddr_t(); -%extern bool_t xdr_ddi_devid_t(); -%extern bool_t xdr_off_t(); -%extern bool_t xdr_md_timeval32_t(); -#endif /* !_KERNEL */ -%extern bool_t xdr_minor_t(); -%extern bool_t xdr_timeval(); -%extern bool_t xdr_clnt_stat(); -%#endif /* K&R C */ -#endif /* RPC_HDR */ diff --git a/usr/src/uts/common/sys/lvm/metamed.x b/usr/src/uts/common/sys/lvm/metamed.x deleted file mode 100644 index 51582310a01b..000000000000 --- a/usr/src/uts/common/sys/lvm/metamed.x +++ /dev/null @@ -1,277 +0,0 @@ -%/* -% * Copyright 2005 Sun Microsystems, Inc. All rights reserved. -% * Use is subject to license terms. -% * -% * CDDL HEADER START -% * -% * The contents of this file are subject to the terms of the -% * Common Development and Distribution License, Version 1.0 only -% * (the "License"). You may not use this file except in compliance -% * with the License. -% * -% * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -% * or http://www.opensolaris.org/os/licensing. -% * See the License for the specific language governing permissions -% * and limitations under the License. -% * -% * When distributing Covered Code, include this CDDL HEADER in each -% * file and include the License file at usr/src/OPENSOLARIS.LICENSE. -% * If applicable, add the following below this CDDL HEADER, with the -% * fields enclosed by brackets "[]" replaced with your own identifying -% * information: Portions Copyright [yyyy] [name of copyright owner] -% * -% * CDDL HEADER END -% */ -% -%#pragma ident "%Z%%M% %I% %E% SMI" -% -%#include -%#include -%#include -#ifndef _KERNEL -%#include -#endif -% -%#include - -#ifdef RPC_SVC -% -%#include -#endif /* RPC_SVC */ - -% -%/* -% * mediator (med) errors, definition of MDE_MED_HOSTNOMED must be changed -% * when new errors are added, since MDE_MED_NOERROR has to come out to -% * be zero! -% */ -enum md_med_errno_t { - MDE_MED_HOSTNOMED = -16, - MDE_MED_DBNOTINIT, - MDE_MED_DBSZBAD, - MDE_MED_DBKEYADDFAIL, - MDE_MED_DBKEYDELFAIL, - MDE_MED_DBHDRSZBAD, - MDE_MED_DBHDRMAGBAD, - MDE_MED_DBHDRREVBAD, - MDE_MED_DBHDRCKSBAD, - MDE_MED_DBRECSZBAD, - MDE_MED_DBRECMAGBAD, - MDE_MED_DBRECREVBAD, - MDE_MED_DBRECCKSBAD, - MDE_MED_DBRECOFFBAD, - MDE_MED_DBRECNOENT, - MDE_MED_DBARGSMISMATCH, - MDE_MED_NOERROR -}; - -struct med_err_t { - int med_errno; /* errno or negative error code */ - string med_node<>; /* associated node */ - string med_misc<>; /* misc text */ -}; - -#ifdef RPC_HDR -% -%/* -% * Null error structure initializer. -% */ -%#define MED_NULL_ERR { 0, NULL, NULL } -%#define MD_MED_DEF_TO {2, 0} /* 2 seconds */ -%#define MD_MED_PMAP_TO {5, 0} /* 5 seconds */ -% -%/* -% * Mediator Magic Number and Data Revision String -% */ -%#define MED_DATA_MAGIC 0x6d656461 -%#define MED_DATA_REV 0x10000000 -% -%#define MED_REC_MAGIC 0x6d657265 -%#define MED_REC_REV 0x10000000 -% -%#define MED_DB_MAGIC 0x6d656462 -%#define MED_DB_REV 0x10000000 -% -%#define METAETCDIR "/etc/lvm/" -%#define MED_DB_FILE METAETCDIR "meddb" -% -%extern char *med_errnum_to_str(int errnum); -#endif /* RPC_HDR */ - -%/* Mediator records in MN diskset have all callers set to multiowner */ -%#define MED_MN_CALLER "multiowner" -% - -#ifdef RPC_XDR -% -%/* Start - Avoid duplicate definitions, but get the xdr calls right */ -%#if 0 -#include "meta_arr.x" -%#endif /* 0 */ -%/* End - Avoid duplicate definitions, but get the xdr calls right */ -% -#endif /* RPC_XDR */ - -#ifdef RPC_HDR -struct med_db_hdr_t { - u_int med_dbh_mag; - u_int med_dbh_rev; - u_int med_dbh_cks; - u_int med_dbh_nm; -}; - -% -%/* -% * Flags for the mediator data -% */ -% -%#define MED_DFL_GOLDEN 0x0001 -%#define MED_DFL_ERROR 0x0002 -% -#endif /* RPC_HDR */ - -% -struct med_data_t { - u_int med_dat_mag; - u_int med_dat_rev; - u_int med_dat_cks; - u_int med_dat_fl; - u_int med_dat_cc; - set_t med_dat_sn; - struct timeval med_dat_id; - int med_dat_spare; -}; - -#ifdef RPC_HDR -% -%/* -% * List of mediator data -% */ -% -struct med_data_lst_t { - med_data_lst_t *mdl_nx; - med_data_t *mdl_med; -}; - -% -%/* -% * Flags for the mediator record -% */ -% -%#define MED_RFL_DEL 0x0001 -% -#endif /* RPC_HDR */ - -% -#ifndef _KERNEL -struct med_rec_t { - u_int med_rec_mag; - u_int med_rec_rev; - u_int med_rec_cks; - u_int med_rec_fl; - set_t med_rec_sn; - md_set_nm_t med_rec_snm; - md_node_nm_arr_t med_rec_nodes; - md_h_arr_t med_rec_meds; - med_data_t med_rec_data; - off_t med_rec_foff; -}; -#endif /* !_KERNEL */ - -struct med_med_t { - set_t med_setno; - string med_setname<>; - string med_caller<>; -}; - -struct med_args_t { - med_med_t med; -}; - -struct med_res_t { - med_err_t med_status; - med_med_t med; -}; - -struct med_get_data_res_t { - med_err_t med_status; - med_data_t med_data; -}; - -struct med_upd_data_args_t { - med_med_t med; - med_data_t med_data; -}; - -#ifndef _KERNEL -struct med_get_rec_res_t { - med_err_t med_status; - med_med_t med; - med_rec_t med_rec; -}; - -struct med_upd_rec_args_t { - u_int med_flags; - med_med_t med; - med_rec_t med_rec; -}; -#endif /* !_KERNEL */ - -struct med_hnm_res_t { - med_err_t med_status; - string med_hnm<>; -}; - -#ifdef RPC_XDR -% -%/* -% * Constant null error struct. -% */ -%const med_err_t med_null_err = MED_NULL_ERR; -%const struct timeval md_med_def_timeout = MD_MED_DEF_TO; -%const struct timeval md_med_pmap_timeout = MD_MED_PMAP_TO; -#endif /* RPC_XDR */ - -#ifdef RPC_HDR -% -%/* -% * External reference to constant null error struct. (declared in med_xdr.c) -% */ -%extern const med_err_t med_null_err; -%extern const struct timeval md_med_def_timeout; -%extern const struct timeval md_med_pmap_timeout; -% -%/* -% * Some useful defines -% */ -%#define MED_SERVNAME "rpc.metamedd" -%#define MED_SVC "metamed" -% -%/* -% * authorization info -% */ -const MED_GID = 14; /* mag sysadmin group */ -#endif /* RPC_HDR */ - -program MED_PROG { - version MED_VERS { - med_err_t MED_NULL(void) = 0; - med_err_t MED_UPD_DATA(med_upd_data_args_t) = 1; - med_get_data_res_t MED_GET_DATA(med_args_t) = 2; -#ifndef _KERNEL - med_err_t MED_UPD_REC(med_upd_rec_args_t) = 3; - med_get_rec_res_t MED_GET_REC(med_args_t) = 4; -#endif - med_hnm_res_t MED_HOSTNAME(void) = 5; - } = 1; -} = 100242; - -#ifdef RPC_HDR -#ifdef _KERNEL -% -%extern int upd_med_hosts(md_hi_arr_t *mp, char *setname, -% med_data_t *meddp, char *caller); -%extern med_data_lst_t *get_med_host_data(md_hi_arr_t *mp, char *setname, -% set_t setno); -#endif /* ! _KERNEL */ -#endif /* RPC_HDR */ diff --git a/usr/src/uts/common/sys/lvm/mhdx.x b/usr/src/uts/common/sys/lvm/mhdx.x deleted file mode 100644 index c55f3c24af24..000000000000 --- a/usr/src/uts/common/sys/lvm/mhdx.x +++ /dev/null @@ -1,96 +0,0 @@ -%/* -% * Copyright 2005 Sun Microsystems, Inc. All rights reserved. -% * Use is subject to license terms. -% * -% * CDDL HEADER START -% * -% * The contents of this file are subject to the terms of the -% * Common Development and Distribution License, Version 1.0 only -% * (the "License"). You may not use this file except in compliance -% * with the License. -% * -% * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -% * or http://www.opensolaris.org/os/licensing. -% * See the License for the specific language governing permissions -% * and limitations under the License. -% * -% * When distributing Covered Code, include this CDDL HEADER in each -% * file and include the License file at usr/src/OPENSOLARIS.LICENSE. -% * If applicable, add the following below this CDDL HEADER, with the -% * fields enclosed by brackets "[]" replaced with your own identifying -% * information: Portions Copyright [yyyy] [name of copyright owner] -% * -% * CDDL HEADER END -% */ -% -%#pragma ident "%Z%%M% %I% %E% SMI" -% -%/* -% * MH shadow structure for struct mhioctkown (sys/mhd.h) -% */ -struct mhd_mhioctkown_t { - int reinstate_resv_delay; - int min_ownership_delay; - int max_ownership_delay; -}; - -% -%/* -% * MH timeout values -% */ -struct mhd_mhiargs_t { - int mh_ff; - mhd_mhioctkown_t mh_tk; -}; - -% -%/* -% * controller info -% */ -#ifdef RPC_HDR -% -%#define METACTLRMAP "/etc/lvm/md.ctlrmap" -%#define META_SSA200_PID "SSA200" -#endif /* RPC_HDR */ -enum mhd_ctlrtype_t { - MHD_CTLR_GENERIC = 0, - MHD_CTLR_SSA100, - MHD_CTLR_SSA200 -}; - -struct mhd_cinfo_t { - mhd_ctlrtype_t mhc_ctype; /* controller type */ - u_int mhc_tray; /* SSA100 tray */ - u_int mhc_bus; /* SSA100 bus */ - u_longlong_t mhc_wwn; /* SSA100 World Wide Name */ -}; - -% -%/* -% * unique drive identifier -% */ -typedef u_int mhd_did_flags_t; -#ifdef RPC_HDR -% -%#define MHD_DID_TIME 0x0001 -%#define MHD_DID_SERIAL 0x0002 -%#define MHD_DID_CINFO 0x0004 -%#define MHD_DID_DUPLICATE 0x0008 -#endif /* RPC_HDR */ -typedef char mhd_serial_t[40]; /* SCSI VID+PID+REV+SERIAL */ -struct mhd_drive_id_t { - mhd_did_flags_t did_flags; - long did_time; /* vtoc timestamp (time_t) */ - mhd_serial_t did_serial; /* SCSI serial number */ - mhd_cinfo_t did_cinfo; /* controller info */ -}; - -% -%/* -% * drive identifier list -% */ -struct mhd_drive_info_t { - string dif_name<>; - mhd_drive_id_t dif_id; -}; -typedef mhd_drive_info_t mhd_drive_info_list_t<>; diff --git a/usr/src/uts/common/sys/sysevent/eventdefs.h b/usr/src/uts/common/sys/sysevent/eventdefs.h index 25401cec5304..bed54d0f24bf 100644 --- a/usr/src/uts/common/sys/sysevent/eventdefs.h +++ b/usr/src/uts/common/sys/sysevent/eventdefs.h @@ -18,9 +18,10 @@ * * CDDL HEADER END */ + /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2015 Nexenta Systems, Inc. All rights reserved. + * Copyright 2016 Nexenta Systems, Inc. */ #ifndef _SYS_SYSEVENT_EVENTDEFS_H @@ -60,67 +61,6 @@ extern "C" { */ #define EC_CLUSTER "EC_Cluster" -/* - * The following classes are exclusively reserved for use by the - * Solaris Volume Manager (SVM) - */ -#define EC_SVM_CONFIG "EC_SVM_Config" -#define EC_SVM_STATE "EC_SVM_State" - -/* - * EC_SVM_CONFIG subclass definitions - supporting attributes (name/value pairs) - * are found in sys/sysevent/svm.h - */ -#define ESC_SVM_CREATE "ESC_SVM_Create" -#define ESC_SVM_DELETE "ESC_SVM_Delete" -#define ESC_SVM_ADD "ESC_SVM_Add" -#define ESC_SVM_REMOVE "ESC_SVM_Remove" -#define ESC_SVM_REPLACE "ESC_SVM_Replace" -#define ESC_SVM_GROW "ESC_SVM_Grow" -#define ESC_SVM_RENAME_SRC "ESC_SVM_Rename_Src" -#define ESC_SVM_RENAME_DST "ESC_SVM_Rename_Dst" -#define ESC_SVM_MEDIATOR_ADD "ESC_SVM_Mediator_Add" -#define ESC_SVM_MEDIATOR_DELETE "ESC_SVM_Mediator_Delete" -#define ESC_SVM_HOST_ADD "ESC_SVM_Host_Add" -#define ESC_SVM_HOST_DELETE "ESC_SVM_Host_Delete" -#define ESC_SVM_DRIVE_ADD "ESC_SVM_Drive_Add" -#define ESC_SVM_DRIVE_DELETE "ESC_SVM_Drive_Delete" -#define ESC_SVM_DETACH "ESC_SVM_Detach" -#define ESC_SVM_DETACHING "ESC_SVM_Detaching" -#define ESC_SVM_ATTACH "ESC_SVM_Attach" -#define ESC_SVM_ATTACHING "ESC_SVM_Attaching" - -/* - * EC_SVM_STATE subclass definitions - supporting attributes (name/value pairs) - * are found in sys/sysevent/svm.h - */ -#define ESC_SVM_INIT_START "ESC_SVM_Init_Start" -#define ESC_SVM_INIT_FAILED "ESC_SVM_Init_Failed" -#define ESC_SVM_INIT_FATAL "ESC_SVM_Init_Fatal" -#define ESC_SVM_INIT_SUCCESS "ESC_SVM_Init_Success" -#define ESC_SVM_IOERR "ESC_SVM_Ioerr" -#define ESC_SVM_ERRED "ESC_SVM_Erred" -#define ESC_SVM_LASTERRED "ESC_SVM_Lasterred" -#define ESC_SVM_OK "ESC_SVM_Ok" -#define ESC_SVM_ENABLE "ESC_SVM_Enable" -#define ESC_SVM_RESYNC_START "ESC_SVM_Resync_Start" -#define ESC_SVM_RESYNC_FAILED "ESC_SVM_Resync_Failed" -#define ESC_SVM_RESYNC_SUCCESS "ESC_SVM_Resync_Success" -#define ESC_SVM_RESYNC_DONE "ESC_SVM_Resync_Done" -#define ESC_SVM_HOTSPARED "ESC_SVM_Hotspared" -#define ESC_SVM_HS_FREED "ESC_SVM_HS_Freed" -#define ESC_SVM_HS_CHANGED "ESC_SVM_HS_Changed" -#define ESC_SVM_TAKEOVER "ESC_SVM_Takeover" -#define ESC_SVM_RELEASE "ESC_SVM_Release" -#define ESC_SVM_OPEN_FAIL "ESC_SVM_Open_Fail" -#define ESC_SVM_OFFLINE "ESC_SVM_Offline" -#define ESC_SVM_ONLINE "ESC_SVM_Online" -#define ESC_SVM_CHANGE "ESC_SVM_Change" -#define ESC_SVM_EXCHANGE "ESC_SVM_Exchange" -#define ESC_SVM_REGEN_START "ESC_SVM_Regen_Start" -#define ESC_SVM_REGEN_DONE "ESC_SVM_Regen_Done" -#define ESC_SVM_REGEN_FAILED "ESC_SVM_Regen_Failed" - /* * EC_DR subclass definitions - supporting attributes (name/value pairs) * are found in sys/sysevent/dr.h diff --git a/usr/src/uts/common/sys/sysevent/svm.h b/usr/src/uts/common/sys/sysevent/svm.h deleted file mode 100644 index 8b6bb8058974..000000000000 --- a/usr/src/uts/common/sys/sysevent/svm.h +++ /dev/null @@ -1,116 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2000-2002 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SYS_SYSEVENT_SVM_H -#define _SYS_SYSEVENT_SVM_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * svm.h contains the publicly defined sysevent attribute names and values - * for all SVM type sysevents. Additions/removals/changes are subject to - * PSARC approval. - */ - -/* - * svm sysevent version - */ -#define SVM_VERSION0 0 -#define SVM_VERSION SVM_VERSION0 - -/* - * Event type EC_SVM_CONFIG/EC_SVM_STATE event schema - * Event Class - EC_SVM_CONFIG | EC_SVM_STATE - * Event Sub-Class - ESC_SVM_CREATE/ESC_SVM_DELETE/ESC_SVM_ADD/ - * ESC_SVM_REMOVE/ESC_SVM_REPLACE/ESC_SVM_GROW/ - * ESC_SVM_RENAME_SRC/ESC_SVM_RENAME_DST/ - * ESC_SVM_MEDIATOR_ADD/ESC_SVM_MEDIATOR_DELETE/ - * ESC_SVM_HOST_ADD/ESC_SVM_HOST_DELETE/ - * ESC_SVM_DRIVE_ADD/ESC_SVM_DRIVE_DELETE/ - * ESC_SVM_DETACH/ESC_SVM_DETACHING/ESC_SVM_ATTACH/ - * ESC_SVM_ATTACHING | - * ESC_SVM_INIT_START/ESC_SVM_INIT_FAILED/ - * ESC_SVM_INIT_FATAL/ESC_SVM_INIT_SUCCESS/ - * ESC_SVM_IOERR/ESC_SVM_ERRED/ESC_SVM_LASTERRED/ - * ESC_SVM_OK/ESC_SVM_ENABLE/ESC_SVM_RESYNC_START/ - * ESC_SVM_RESYNC_FAILED/ESC_SVM_RESYNC_SUCCESS/ - * ESC_SVM_RESYNC_DONE/ESC_SVM_HOTSPARED/ - * ESC_SVM_HS_FREED/ESC_SVM_HS_CHANGED/ - * ESC_SVM_TAKEOVER/ESC_SVM_RELEASE/ESC_SVM_OPEN_FAIL/ - * ESC_SVM_OFFLINE/ESC_SVM_ONLINE/ESC_SVM_CHANGE/ - * ESC_SVM_EXCHANGE/ESC_SVM_REGEN_START/ - * ESC_SVM_REGEN_DONE/ESC_SVM_REGEN_FAILED/ - * Attribute Name - SVM_TAG - * Attribute Type - SE_DATA_TYPE_UINT32 - * Attribute Value - [Device Tag]] - * Attribute Name - SVM_SET_NO - * Attribute Type - SE_DATA_TYPE_UINT32 - uint_t - * Attribute Value - [Device Set Number] - * Attribute Name - SVM_DEV_ID - * Attribute Type - SE_DATA_TYPE_UINT32 - ulong_t - * Attribute Value - [Device ID] - * Attribute Name - SVM_DEV_NAME - * Attribute Type - SE_DATA_TYPE_STRING - * Attribute Value - [Device Name] - */ -#define SVM_VERSION_NO "svm_version" /* event version number */ -#define SVM_TAG "svm_tag" /* device tag */ -#define SVM_SET_NO "svm_set_no" /* device set number */ -#define SVM_DEV_ID "svm_dev_id" /* device event occured on */ -#define SVM_DEV_NAME "svm_dev_name" /* device name */ - -/* - * sys event originator - */ -#define EP_SVM "svm" - -/* - * Device TAG definitions - */ -#define SVM_TAG_METADEVICE 1 -#define SVM_TAG_MIRROR 2 -#define SVM_TAG_STRIPE 3 -#define SVM_TAG_RAID5 4 -#define SVM_TAG_TRANS 5 -#define SVM_TAG_REPLICA 6 -#define SVM_TAG_HSP 7 -#define SVM_TAG_HS 8 -#define SVM_TAG_SET 9 -#define SVM_TAG_DRIVE 10 -#define SVM_TAG_HOST 11 -#define SVM_TAG_MEDIATOR 12 - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS_SYSEVENT_SVM_H */ diff --git a/usr/src/uts/common/sys/systm.h b/usr/src/uts/common/sys/systm.h index 9880b7940648..59c3a55350bf 100644 --- a/usr/src/uts/common/sys/systm.h +++ b/usr/src/uts/common/sys/systm.h @@ -18,13 +18,14 @@ * * CDDL HEADER END */ + /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ - /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2016 Nexenta Systems, Inc. */ #ifndef _SYS_SYSTM_H @@ -87,7 +88,6 @@ extern pgcnt_t freemem; /* Current free memory. */ extern dev_t rootdev; /* device of the root */ extern struct vnode *rootvp; /* vnode of root device */ -extern boolean_t root_is_svm; /* root is a mirrored device flag */ extern boolean_t root_is_ramdisk; /* root is boot_archive ramdisk */ extern uint32_t ramdisk_size; /* (KB) set only for sparc netboots */ extern char *volatile panicstr; /* panic string pointer */ diff --git a/usr/src/uts/common/sys/vfs.h b/usr/src/uts/common/sys/vfs.h index 07736d2b6961..eb3285302a5f 100644 --- a/usr/src/uts/common/sys/vfs.h +++ b/usr/src/uts/common/sys/vfs.h @@ -18,11 +18,12 @@ * * CDDL HEADER END */ + /* * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2015 Nexenta Systems, Inc. All rights reserved. * Copyright 2016 Toomas Soome * Copyright (c) 2016 by Delphix. All rights reserved. + * Copyright 2016 Nexenta Systems, Inc. */ /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ @@ -470,7 +471,6 @@ void vn_reclaim(vnode_t *); void vn_invalid(vnode_t *); int rootconf(void); -int svm_rootconf(void); int domount(char *, struct mounta *, vnode_t *, struct cred *, struct vfs **); int dounmount(struct vfs *, int, cred_t *); diff --git a/usr/src/uts/i86pc/os/ddi_impl.c b/usr/src/uts/i86pc/os/ddi_impl.c index ff8935f3a9ca..84b41cfdadf2 100644 --- a/usr/src/uts/i86pc/os/ddi_impl.c +++ b/usr/src/uts/i86pc/os/ddi_impl.c @@ -23,6 +23,7 @@ * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2012 Garrett D'Amore * Copyright 2014 Pluribus Networks, Inc. + * Copyright 2016 Nexenta Systems, Inc. */ /* @@ -2572,13 +2573,6 @@ impl_setup_ddi(void) dev_t getrootdev(void) { - /* - * Precedence given to rootdev if set in /etc/system - */ - if (root_is_svm == B_TRUE) { - return (ddi_pathname_to_dev_t(svm_bootpath)); - } - /* * Usually rootfs.bo_name is initialized by the * the bootpath property from bootenv.rc, but diff --git a/usr/src/uts/intel/Makefile.intel b/usr/src/uts/intel/Makefile.intel index 7967f919b6c1..8b430e7681dd 100644 --- a/usr/src/uts/intel/Makefile.intel +++ b/usr/src/uts/intel/Makefile.intel @@ -516,13 +516,6 @@ DRV_KMODS += dcam1394 DRV_KMODS += ib ibp eibnx eoib rdsib sdp iser daplt hermon tavor sol_ucma sol_uverbs DRV_KMODS += sol_umad -# -# LVM modules -# -DRV_KMODS += md -MISC_KMODS += md_stripe md_hotspares md_mirror md_raid md_trans md_notify -MISC_KMODS += md_sp - # # Brand modules # diff --git a/usr/src/uts/intel/md/Makefile b/usr/src/uts/intel/md/Makefile deleted file mode 100644 index 271c12b6eec2..000000000000 --- a/usr/src/uts/intel/md/Makefile +++ /dev/null @@ -1,111 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# uts/intel/md/Makefile -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# Copyright (c) 2011 Bayard G. Bell. All rights reserved. -# -# This makefile drives the production of the md driver -# -# architecture independent -# - -# -# Path to the base of the uts directory tree (usually /usr/src/uts). -# -UTSBASE = ../.. - -# -# Define the module and object file sets. -# -MODULE = md -OBJECTS = $(MD_OBJS:%=$(OBJS_DIR)/%) -OBJECTS += $(MD_COMMON_OBJS:%=$(OBJS_DIR)/%) -OBJECTS += $(MD_DERIVED_OBJS:%=$(OBJS_DIR)/%) -LINTS = $(MD_OBJS:%.o=$(LINTS_DIR)/%.ln) -LINTS += $(MD_COMMON_OBJS:%.o=$(LINTS_DIR)/%.ln) -ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE) -CONF_SRCDIR = $(UTSBASE)/common/io/lvm/md - -# -# Include common rules. -# -include $(UTSBASE)/intel/Makefile.intel - -# -# For now, disable these lint checks; maintainers should endeavor -# to investigate and remove these for maximum lint coverage. -# Please do not carry these forward to new Makefiles. -# -LINTTAGS += -erroff=E_SUSPICIOUS_COMPARISON -LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN -LINTTAGS += -erroff=E_SUPPRESSION_DIRECTIVE_UNUSED -LINTTAGS += -erroff=E_STATIC_UNUSED -LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW -LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV - -CERRWARN += -_gcc=-Wno-uninitialized -CERRWARN += -_gcc=-Wno-unused-variable -CERRWARN += -_gcc=-Wno-unused-function -CERRWARN += -_gcc=-Wno-unused-label -CERRWARN += -_gcc=-Wno-parentheses - -# -# Define targets -# -ALL_TARGET = $(BINARY) $(SRC_CONFILE) -LINT_TARGET = $(MODULE).lint -INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE) - -# -# Depends on rpcmod -# -LDFLAGS += -dy -N strmod/rpcmod - -# -# Default build targets. -# -.KEEP_STATE: - -def: derived_h .WAIT $(DEF_DEPS) - -all: derived_h .WAIT $(ALL_DEPS) - -clean: derived_h .WAIT $(CLEAN_DEPS) - -clobber: derived_h .WAIT $(CLOBBER_DEPS) - -lint: $(LINT_DEPS) - -modlintlib: derived_h .WAIT $(MODLINTLIB_DEPS) - -clean.lint: derived_h .WAIT $(CLEAN_LINT_DEPS) - -install: derived_h .WAIT $(INSTALL_DEPS) - -derived_h: - cd $(UTSBASE)/common/sys/lvm; pwd; $(MAKE) $(TARGET) - -# -# Include common targets. -# -include $(UTSBASE)/intel/Makefile.targ diff --git a/usr/src/uts/intel/md_hotspares/Makefile b/usr/src/uts/intel/md_hotspares/Makefile deleted file mode 100644 index a0ae47713015..000000000000 --- a/usr/src/uts/intel/md_hotspares/Makefile +++ /dev/null @@ -1,89 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# uts/intel/md_hotspares/Makefile -# Copyright (c) 1999, 2000 by Sun Microsystems, Inc. -# All rights reserved. -# Copyright (c) 2011 Bayard G. Bell. -# -# This makefile drives the production of the md_hotspares module -# -# architecture independent -# - -# -# Path to the base of the uts directory tree (usually /usr/src/uts). -# -UTSBASE = ../.. - -# -# Define the module and object file sets. -# -MODULE = md_hotspares -OBJECTS = $(HOTSPARES_OBJS:%=$(OBJS_DIR)/%) -LINTS = $(HOTSPARES_OBJS:%.o=$(LINTS_DIR)/%.ln) -ROOTMODULE = $(ROOT_MISC_DIR)/$(MODULE) - -# -# Include common rules. -# -include $(UTSBASE)/intel/Makefile.intel - -CERRWARN += -_gcc=-Wno-uninitialized - -# -# Define targets -# -ALL_TARGET = $(BINARY) -LINT_TARGET = $(MODULE).lint -INSTALL_TARGET = $(BINARY) $(ROOTMODULE) - -# -# Depends on md -# -LDFLAGS += -dy -N drv/md - -# -# Default build targets. -# -.KEEP_STATE: - -def: $(DEF_DEPS) - -all: $(ALL_DEPS) - -clean: $(CLEAN_DEPS) - -clobber: $(CLOBBER_DEPS) - -lint: $(LINT_DEPS) - -modlintlib: $(MODLINTLIB_DEPS) - -clean.lint: $(CLEAN_LINT_DEPS) - -install: $(INSTALL_DEPS) - -# -# Include common targets. -# -include $(UTSBASE)/intel/Makefile.targ diff --git a/usr/src/uts/intel/md_mirror/Makefile b/usr/src/uts/intel/md_mirror/Makefile deleted file mode 100644 index 0015ca055d35..000000000000 --- a/usr/src/uts/intel/md_mirror/Makefile +++ /dev/null @@ -1,100 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# Copyright (c) 2011 Bayard G. Bell. All rights reserved. -# -# This makefile drives the production of the md_mirror module -# -# architecture independent -# - -# -# Path to the base of the uts directory tree (usually /usr/src/uts). -# -UTSBASE = ../.. - -# -# Define the module and object file sets. -# -MODULE = md_mirror -OBJECTS = $(MIRROR_OBJS:%=$(OBJS_DIR)/%) -LINTS = $(MIRROR_OBJS:%.o=$(LINTS_DIR)/%.ln) -ROOTMODULE = $(ROOT_MISC_DIR)/$(MODULE) - -# -# Include common rules. -# -include $(UTSBASE)/intel/Makefile.intel - -# -# Define targets -# -ALL_TARGET = $(BINARY) -LINT_TARGET = $(MODULE).lint -INSTALL_TARGET = $(BINARY) $(ROOTMODULE) - -# -# Define dependency on md -# -LDFLAGS += -dy -N drv/md - -# -# For now, disable these lint checks; maintainers should endeavor -# to investigate and remove these for maximum lint coverage. -# Please do not carry these forward to new Makefiles. -# -LINTTAGS += -erroff=E_SUSPICIOUS_COMPARISON -LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN -LINTTAGS += -erroff=E_SUPPRESSION_DIRECTIVE_UNUSED -LINTTAGS += -erroff=E_STATIC_UNUSED -LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW -LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV - -CERRWARN += -_gcc=-Wno-parentheses -CERRWARN += -_gcc=-Wno-uninitialized - -# -# Default build targets. -# -.KEEP_STATE: - -def: $(DEF_DEPS) - -all: $(ALL_DEPS) - -clean: $(CLEAN_DEPS) - -clobber: $(CLOBBER_DEPS) - -lint: $(LINT_DEPS) - -modlintlib: $(MODLINTLIB_DEPS) - -clean.lint: $(CLEAN_LINT_DEPS) - -install: $(INSTALL_DEPS) - -# -# Include common targets. -# -include $(UTSBASE)/intel/Makefile.targ diff --git a/usr/src/uts/intel/md_notify/Makefile b/usr/src/uts/intel/md_notify/Makefile deleted file mode 100644 index 04ad80b965fb..000000000000 --- a/usr/src/uts/intel/md_notify/Makefile +++ /dev/null @@ -1,89 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# uts/intel/md_notify/Makefile -# Copyright (c) 1999, 2000 by Sun Microsystems, Inc. -# All rights reserved. -# Copyright (c) 2011 Bayard G. Bell. All rights reserved. -# -# This makefile drives the production of the lvm notify module -# -# architecture independent -# - -# -# Path to the base of the uts directory tree (usually /usr/src/uts). -# -UTSBASE = ../.. - -# -# Define the module and object file sets. -# -MODULE = md_notify -OBJECTS = $(NOTIFY_OBJS:%=$(OBJS_DIR)/%) -LINTS = $(NOTIFY_OBJS:%.o=$(LINTS_DIR)/%.ln) -ROOTMODULE = $(ROOT_MISC_DIR)/$(MODULE) - -# -# Include common rules. -# -include $(UTSBASE)/intel/Makefile.intel - -CERRWARN += -_gcc=-Wno-uninitialized - -# -# Define targets -# -ALL_TARGET = $(BINARY) -LINT_TARGET = $(MODULE).lint -INSTALL_TARGET = $(BINARY) $(ROOTMODULE) - -# -# Depends on md -# -LDFLAGS += -dy -N drv/md - -# -# Default build targets. -# -.KEEP_STATE: - -def: $(DEF_DEPS) - -all: $(ALL_DEPS) - -clean: $(CLEAN_DEPS) - -clobber: $(CLOBBER_DEPS) - -lint: $(LINT_DEPS) - -modlintlib: $(MODLINTLIB_DEPS) - -clean.lint: $(CLEAN_LINT_DEPS) - -install: $(INSTALL_DEPS) - -# -# Include common targets. -# -include $(UTSBASE)/intel/Makefile.targ diff --git a/usr/src/uts/intel/md_raid/Makefile b/usr/src/uts/intel/md_raid/Makefile deleted file mode 100644 index 7cbdee5d89ec..000000000000 --- a/usr/src/uts/intel/md_raid/Makefile +++ /dev/null @@ -1,101 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# uts/intel/md_raid/Makefile -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# Copyright (c) 2011 Bayard G. Bell. All rights reserved. -# -# This makefile drives the production of the md_raid module -# -# architecture independent -# - -# -# Path to the base of the uts directory tree (usually /usr/src/uts). -# -UTSBASE = ../.. - -# -# Define the module and object file sets. -# -MODULE = md_raid -OBJECTS = $(RAID_OBJS:%=$(OBJS_DIR)/%) -LINTS = $(RAID_OBJS:%.o=$(LINTS_DIR)/%.ln) -ROOTMODULE = $(ROOT_MISC_DIR)/$(MODULE) - -# -# Include common rules. -# -include $(UTSBASE)/intel/Makefile.intel - -# -# Define targets -# -ALL_TARGET = $(BINARY) -LINT_TARGET = $(MODULE).lint -INSTALL_TARGET = $(BINARY) $(ROOTMODULE) - -# -# For now, disable these lint checks; maintainers should endeavor -# to investigate and remove these for maximum lint coverage. -# Please do not carry these forward to new Makefiles. -# -LINTTAGS += -erroff=E_SUSPICIOUS_COMPARISON -LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN -LINTTAGS += -erroff=E_SUPPRESSION_DIRECTIVE_UNUSED -LINTTAGS += -erroff=E_STATIC_UNUSED -LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW -LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV - -CERRWARN += -_gcc=-Wno-parentheses -CERRWARN += -_gcc=-Wno-uninitialized - -# -# Depends on md -# -LDFLAGS += -dy -N drv/md - -# -# Default build targets. -# -.KEEP_STATE: - -def: $(DEF_DEPS) - -all: $(ALL_DEPS) - -clean: $(CLEAN_DEPS) - -clobber: $(CLOBBER_DEPS) - -lint: $(LINT_DEPS) - -modlintlib: $(MODLINTLIB_DEPS) - -clean.lint: $(CLEAN_LINT_DEPS) - -install: $(INSTALL_DEPS) - -# -# Include common targets. -# -include $(UTSBASE)/intel/Makefile.targ diff --git a/usr/src/uts/intel/md_sp/Makefile b/usr/src/uts/intel/md_sp/Makefile deleted file mode 100644 index 4dfbcd37473f..000000000000 --- a/usr/src/uts/intel/md_sp/Makefile +++ /dev/null @@ -1,99 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# uts/intel/md_sp/Makefile -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# Copyright (c) 2011 Bayard G. Bell. All rights reserved. -# -# This makefile drives the production of the md_sp module -# -# architecture independent -# - -# -# Path to the base of the uts directory tree (usually /usr/src/uts). -# -UTSBASE = ../.. - -# -# Define the module and object file sets. -# -MODULE = md_sp -OBJECTS = $(SOFTPART_OBJS:%=$(OBJS_DIR)/%) -LINTS = $(SOFTPART_OBJS:%.o=$(LINTS_DIR)/%.ln) -ROOTMODULE = $(ROOT_MISC_DIR)/$(MODULE) - -# -# Include common rules. -# -include $(UTSBASE)/intel/Makefile.intel - -# -# Define targets -# -ALL_TARGET = $(BINARY) -LINT_TARGET = $(MODULE).lint -INSTALL_TARGET = $(BINARY) $(ROOTMODULE) - -# -# For now, disable these lint checks; maintainers should endeavor -# to investigate and remove these for maximum lint coverage. -# Please do not carry these forward to new Makefiles. -# -LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN -LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV -LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW - -CERRWARN += -_gcc=-Wno-unused-label -CERRWARN += -_gcc=-Wno-parentheses -CERRWARN += -_gcc=-Wno-uninitialized - -# -# Depends on md -# -LDFLAGS += -dy -N drv/md - -# -# Default build targets. -# -.KEEP_STATE: - -def: $(DEF_DEPS) - -all: $(ALL_DEPS) - -clean: $(CLEAN_DEPS) - -clobber: $(CLOBBER_DEPS) - -lint: $(LINT_DEPS) - -modlintlib: $(MODLINTLIB_DEPS) - -clean.lint: $(CLEAN_LINT_DEPS) - -install: $(INSTALL_DEPS) - -# -# Include common targets. -# -include $(UTSBASE)/intel/Makefile.targ diff --git a/usr/src/uts/intel/md_stripe/Makefile b/usr/src/uts/intel/md_stripe/Makefile deleted file mode 100644 index b63e96c0d071..000000000000 --- a/usr/src/uts/intel/md_stripe/Makefile +++ /dev/null @@ -1,102 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# uts/intel/md_stripe/Makefile -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# Copyright (c) 2011 Bayard G. Bell. All rights reserved. -# -# This makefile drives the production of the md_stripe module -# -# architecture independent -# - -# -# Path to the base of the uts directory tree (usually /usr/src/uts). -# -UTSBASE = ../.. - -# -# Define the module and object file sets. -# -MODULE = md_stripe -OBJECTS = $(STRIPE_OBJS:%=$(OBJS_DIR)/%) -LINTS = $(STRIPE_OBJS:%.o=$(LINTS_DIR)/%.ln) -ROOTMODULE = $(ROOT_MISC_DIR)/$(MODULE) - -# -# Include common rules. -# -include $(UTSBASE)/intel/Makefile.intel - -# -# Define targets -# -ALL_TARGET = $(BINARY) -LINT_TARGET = $(MODULE).lint -INSTALL_TARGET = $(BINARY) $(ROOTMODULE) - -# -# For now, disable these lint checks; maintainers should endeavor -# to investigate and remove these for maximum lint coverage. -# Please do not carry these forward to new Makefiles. -# -LINTTAGS += -erroff=E_SUSPICIOUS_COMPARISON -LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN -LINTTAGS += -erroff=E_SUPPRESSION_DIRECTIVE_UNUSED -LINTTAGS += -erroff=E_STATIC_UNUSED -LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW -LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV - -CERRWARN += -_gcc=-Wno-uninitialized -CERRWARN += -_gcc=-Wno-unused-label -CERRWARN += -_gcc=-Wno-parentheses - -# -# Depends on md -# -LDFLAGS += -dy -N drv/md - -# -# Default build targets. -# -.KEEP_STATE: - -def: $(DEF_DEPS) - -all: $(ALL_DEPS) - -clean: $(CLEAN_DEPS) - -clobber: $(CLOBBER_DEPS) - -lint: $(LINT_DEPS) - -modlintlib: $(MODLINTLIB_DEPS) - -clean.lint: $(CLEAN_LINT_DEPS) - -install: $(INSTALL_DEPS) - -# -# Include common targets. -# -include $(UTSBASE)/intel/Makefile.targ diff --git a/usr/src/uts/intel/md_trans/Makefile b/usr/src/uts/intel/md_trans/Makefile deleted file mode 100644 index 83dd631aad2a..000000000000 --- a/usr/src/uts/intel/md_trans/Makefile +++ /dev/null @@ -1,97 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# uts/intel/md_trans/Makefile -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# Copyright (c) 2011 Bayard G. Bell. All rights reserved. -# -# This makefile drives the production of the md_trans module -# -# architecture independent -# - -# -# Path to the base of the uts directory tree (usually /usr/src/uts). -# -UTSBASE = ../.. - -# -# Define the module and object file sets. -# -MODULE = md_trans -OBJECTS = $(TRANS_OBJS:%=$(OBJS_DIR)/%) -LINTS = $(TRANS_OBJS:%.o=$(LINTS_DIR)/%.ln) -ROOTMODULE = $(ROOT_MISC_DIR)/$(MODULE) - -# -# Include common rules. -# -include $(UTSBASE)/intel/Makefile.intel - -# -# Define targets -# -ALL_TARGET = $(BINARY) -LINT_TARGET = $(MODULE).lint -INSTALL_TARGET = $(BINARY) $(ROOTMODULE) - -# -# For now, disable these lint checks; maintainers should endeavor -# to investigate and remove these for maximum lint coverage. -# Please do not carry these forward to new Makefiles. -# -LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN -LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV - -CERRWARN += -_gcc=-Wno-unused-label -CERRWARN += -_gcc=-Wno-parentheses - -# -# Depends on md & ufs -# -LDFLAGS += -dy -N drv/md -N fs/ufs - -# -# Default build targets. -# -.KEEP_STATE: - -def: $(DEF_DEPS) - -all: $(ALL_DEPS) - -clean: $(CLEAN_DEPS) - -clobber: $(CLOBBER_DEPS) - -lint: $(LINT_DEPS) - -modlintlib: $(MODLINTLIB_DEPS) - -clean.lint: $(CLEAN_LINT_DEPS) - -install: $(INSTALL_DEPS) - -# -# Include common targets. -# -include $(UTSBASE)/intel/Makefile.targ diff --git a/usr/src/uts/intel/sys/bootconf.h b/usr/src/uts/intel/sys/bootconf.h index 4f6c48c3950b..5d21143d3897 100644 --- a/usr/src/uts/intel/sys/bootconf.h +++ b/usr/src/uts/intel/sys/bootconf.h @@ -22,6 +22,7 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2016 Nexenta Systems, Inc. */ #ifndef _SYS_BOOTCONF_H @@ -194,7 +195,6 @@ extern struct bootobj rootfs; extern struct bootobj swapfile; extern char obp_bootpath[BO_MAXOBJNAME]; -extern char svm_bootpath[BO_MAXOBJNAME]; extern void *gfx_devinfo_list; diff --git a/usr/src/uts/sparc/Makefile.sparc b/usr/src/uts/sparc/Makefile.sparc index abea00c62561..60e2a3a34e9d 100644 --- a/usr/src/uts/sparc/Makefile.sparc +++ b/usr/src/uts/sparc/Makefile.sparc @@ -19,12 +19,13 @@ # CDDL HEADER END # +# # Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. # Copyright (c) 2013 Andrew Stormont. All rights reserved. # Copyright (c) 2015, Joyent, Inc. All rights reserved. -# Copyright 2015 Nexenta Systems, Inc. All rights reserved. # Copyright 2016 Gary Mills - +# Copyright 2016 Nexenta Systems, Inc. +# # # This makefile contains the common definitions for all sparc @@ -217,7 +218,7 @@ DRV_KMODS += ipsecesp iptun iwscn keysock kmdb kstat ksyms llc1 DRV_KMODS += lofi DRV_KMODS += log logindmux kssl mm nca physmem pm poll pool DRV_KMODS += pseudo ptc ptm pts ptsl ramdisk random rsm rts sad -DRV_KMODS += simnet softmac sppp sppptun sy sysevent sysmsg +DRV_KMODS += simnet softmac sppp sppptun sy sysevent sysmsg DRV_KMODS += spdsock DRV_KMODS += tcp tcp6 timerfd tl tnf ttymux udp udp6 wc winlock zcons DRV_KMODS += ippctl @@ -313,7 +314,7 @@ DRV_KMODS += qlt DRV_KMODS += iscsit DRV_KMODS += pppt DRV_KMODS += ncall nsctl sdbc nskern sv -DRV_KMODS += ii rdc rdcsrv rdcstub +DRV_KMODS += ii rdc rdcsrv rdcstub DRV_KMODS += iscsi DRV_KMODS += emlxs DRV_KMODS += oce @@ -336,12 +337,6 @@ DRV_KMODS += pcs MISC_KMODS += busra cardbus dada pcmcia DRV_KMODS += pcic -# Add lvm -# -DRV_KMODS += md -MISC_KMODS += md_mirror md_stripe md_hotspares md_raid md_trans md_notify -MISC_KMODS += md_sp - # # Exec Class Modules (/kernel/exec): # @@ -405,7 +400,7 @@ MISC_KMODS += idm MISC_KMODS += idmap MISC_KMODS += hook MISC_KMODS += neti -MISC_KMODS += ctf +MISC_KMODS += ctf MISC_KMODS += mac dls MISC_KMODS += cmlb MISC_KMODS += tem diff --git a/usr/src/uts/sparc/md/Makefile b/usr/src/uts/sparc/md/Makefile deleted file mode 100644 index 9dcd315e9f67..000000000000 --- a/usr/src/uts/sparc/md/Makefile +++ /dev/null @@ -1,114 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# Copyright (c) 2011 Bayard G. Bell. All rights reserved. -# -# This makefile drives the production of the SLVM's md driver module. -# -# sparc implementation architecture dependent -# - -# -# Path to the base of the uts directory tree (usually /usr/src/uts). -# -UTSBASE = ../.. - -# -# Define the module and object file sets. -# -MODULE = md -OBJECTS = $(MD_OBJS:%=$(OBJS_DIR)/%) -OBJECTS += $(MD_COMMON_OBJS:%=$(OBJS_DIR)/%) -OBJECTS += $(MD_DERIVED_OBJS:%=$(OBJS_DIR)/%) -LINTS = $(MD_OBJS:%.o=$(LINTS_DIR)/%.ln) -LINTS += $(MD_COMMON_OBJS:%.o=$(LINTS_DIR)/%.ln) -ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE) -CONF_SRCDIR = $(UTSBASE)/common/io/lvm/md - -# -# Include common rules. -# -include $(UTSBASE)/sparc/Makefile.sparc - -# -# Define targets -# -ALL_TARGET = $(BINARY) -LINT_TARGET = $(MODULE).lint -INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE) - -# -# Overrides. -# -CFLAGS += $(CCVERBOSE) - -CERRWARN += -_gcc=-Wno-uninitialized -CERRWARN += -_gcc=-Wno-unused-variable -CERRWARN += -_gcc=-Wno-unused-function -CERRWARN += -_gcc=-Wno-unused-label -CERRWARN += -_gcc=-Wno-parentheses - -# -# Define dependency on rpcmod -# -LDFLAGS += -dy -N strmod/rpcmod - -# -# For now, disable these lint checks; maintainers should endeavor -# to investigate and remove these for maximum lint coverage. -# Please do not carry these forward to new Makefiles. -# -LINTTAGS += -erroff=E_SUSPICIOUS_COMPARISON -LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN -LINTTAGS += -erroff=E_SUPPRESSION_DIRECTIVE_UNUSED -LINTTAGS += -erroff=E_STATIC_UNUSED -LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW -LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV - -# -# Default build targets. -# -.KEEP_STATE: - -def: derived_h .WAIT $(DEF_DEPS) - -all: derived_h .WAIT $(ALL_DEPS) - -clean: derived_h .WAIT $(CLEAN_DEPS) - -clobber: derived_h .WAIT $(CLOBBER_DEPS) - -lint: $(LINT_DEPS) - -modlintlib: derived_h .WAIT $(MODLINTLIB_DEPS) - -clean.lint: derived_h .WAIT $(CLEAN_LINT_DEPS) - -install: derived_h .WAIT $(INSTALL_DEPS) - -derived_h: - cd $(UTSBASE)/common/sys/lvm; pwd; $(MAKE) $(TARGET) -# -# Include common targets. -# -include $(UTSBASE)/sparc/Makefile.targ diff --git a/usr/src/uts/sparc/md_hotspares/Makefile b/usr/src/uts/sparc/md_hotspares/Makefile deleted file mode 100644 index 80cddd6b0dc5..000000000000 --- a/usr/src/uts/sparc/md_hotspares/Makefile +++ /dev/null @@ -1,94 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# uts/sparc/md_hotspares/Makefile -# Copyright 2004 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# Copyright (c) 2011 Bayard G. Bell. All rights reserved. -# -# This makefile drives the production of the SLVM's hotspares misc module. -# -# sparc implementation architecture dependent -# - -# -# Path to the base of the uts directory tree (usually /usr/src/uts). -# -UTSBASE = ../.. - -# -# Define the module and object file sets. -# -MODULE = md_hotspares -OBJECTS = $(HOTSPARES_OBJS:%=$(OBJS_DIR)/%) -LINTS = $(HOTSPARES_OBJS:%.o=$(LINTS_DIR)/%.ln) -ROOTMODULE = $(ROOT_MISC_DIR)/$(MODULE) - -# -# Include common rules. -# -include $(UTSBASE)/sparc/Makefile.sparc - -CERRWARN += -_gcc=-Wno-uninitialized - -# -# Define targets -# -ALL_TARGET = $(BINARY) -LINT_TARGET = $(MODULE).lint -INSTALL_TARGET = $(BINARY) $(ROOTMODULE) - -# -# Overrides. -# -CFLAGS += $(CCVERBOSE) - -# -# Define dependency on md -# -LDFLAGS += -dy -N drv/md - -# -# Default build targets. -# -.KEEP_STATE: - -def: $(DEF_DEPS) - -all: $(ALL_DEPS) - -clean: $(CLEAN_DEPS) - -clobber: $(CLOBBER_DEPS) - -lint: $(LINT_DEPS) - -modlintlib: $(MODLINTLIB_DEPS) - -clean.lint: $(CLEAN_LINT_DEPS) - -install: $(INSTALL_DEPS) - -# -# Include common targets. -# -include $(UTSBASE)/sparc/Makefile.targ diff --git a/usr/src/uts/sparc/md_mirror/Makefile b/usr/src/uts/sparc/md_mirror/Makefile deleted file mode 100644 index 5a5235521989..000000000000 --- a/usr/src/uts/sparc/md_mirror/Makefile +++ /dev/null @@ -1,106 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# uts/sparc/md_mirror/Makefile -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# Copyright (c) 2011 Bayard G. Bell. All rights reserved. -# -# This makefile drives the production of the SLVM's mirror misc module. -# -# sparc implementation architecture dependent -# - -# -# Path to the base of the uts directory tree (usually /usr/src/uts). -# -UTSBASE = ../.. - -# -# Define the module and object file sets. -# -MODULE = md_mirror -OBJECTS = $(MIRROR_OBJS:%=$(OBJS_DIR)/%) -LINTS = $(MIRROR_OBJS:%.o=$(LINTS_DIR)/%.ln) -ROOTMODULE = $(ROOT_MISC_DIR)/$(MODULE) - -# -# Include common rules. -# -include $(UTSBASE)/sparc/Makefile.sparc - -# -# Define targets -# -ALL_TARGET = $(BINARY) -LINT_TARGET = $(MODULE).lint -INSTALL_TARGET = $(BINARY) $(ROOTMODULE) - -# -# Overrides. -# -CFLAGS += $(CCVERBOSE) - -# -# Define dependency on md -# -LDFLAGS += -dy -N drv/md - -# -# For now, disable these lint checks; maintainers should endeavor -# to investigate and remove these for maximum lint coverage. -# Please do not carry these forward to new Makefiles. -# -LINTTAGS += -erroff=E_SUSPICIOUS_COMPARISON -LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN -LINTTAGS += -erroff=E_SUPPRESSION_DIRECTIVE_UNUSED -LINTTAGS += -erroff=E_STATIC_UNUSED -LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW -LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV - -CERRWARN += -_gcc=-Wno-parentheses -CERRWARN += -_gcc=-Wno-uninitialized - -# -# Default build targets. -# -.KEEP_STATE: - -def: $(DEF_DEPS) - -all: $(ALL_DEPS) - -clean: $(CLEAN_DEPS) - -clobber: $(CLOBBER_DEPS) - -lint: $(LINT_DEPS) - -modlintlib: $(MODLINTLIB_DEPS) - -clean.lint: $(CLEAN_LINT_DEPS) - -install: $(INSTALL_DEPS) - -# -# Include common targets. -# -include $(UTSBASE)/sparc/Makefile.targ diff --git a/usr/src/uts/sparc/md_notify/Makefile b/usr/src/uts/sparc/md_notify/Makefile deleted file mode 100644 index 19730bb4f615..000000000000 --- a/usr/src/uts/sparc/md_notify/Makefile +++ /dev/null @@ -1,94 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# uts/sparc/notify/Makefile -# Copyright 2004 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# Copyright (c) 2011 Bayard G. Bell. All rights reserved. -# -# This makefile drives the production of the SLVM's notify misc module. -# -# sparc implementation architecture dependent -# - -# -# Path to the base of the uts directory tree (usually /usr/src/uts). -# -UTSBASE = ../.. - -# -# Define the module and object file sets. -# -MODULE = md_notify -OBJECTS = $(NOTIFY_OBJS:%=$(OBJS_DIR)/%) -LINTS = $(NOTIFY_OBJS:%.o=$(LINTS_DIR)/%.ln) -ROOTMODULE = $(ROOT_MISC_DIR)/$(MODULE) - -# -# Include common rules. -# -include $(UTSBASE)/sparc/Makefile.sparc - -CERRWARN += -_gcc=-Wno-uninitialized - -# -# Define targets -# -ALL_TARGET = $(BINARY) -LINT_TARGET = $(MODULE).lint -INSTALL_TARGET = $(BINARY) $(ROOTMODULE) - -# -# Overrides. -# -CFLAGS += $(CCVERBOSE) - -# -# Define dependency on md -# -LDFLAGS += -dy -N drv/md - -# -# Default build targets. -# -.KEEP_STATE: - -def: $(DEF_DEPS) - -all: $(ALL_DEPS) - -clean: $(CLEAN_DEPS) - -clobber: $(CLOBBER_DEPS) - -lint: $(LINT_DEPS) - -modlintlib: $(MODLINTLIB_DEPS) - -clean.lint: $(CLEAN_LINT_DEPS) - -install: $(INSTALL_DEPS) - -# -# Include common targets. -# -include $(UTSBASE)/sparc/Makefile.targ diff --git a/usr/src/uts/sparc/md_raid/Makefile b/usr/src/uts/sparc/md_raid/Makefile deleted file mode 100644 index 1cf5aba4c961..000000000000 --- a/usr/src/uts/sparc/md_raid/Makefile +++ /dev/null @@ -1,106 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# uts/sparc/raid/Makefile -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# Copyright (c) 2011 Bayard G. Bell. All rights reserved. -# -# This makefile drives the production of the SLVM's raid misc module. -# -# sparc implementation architecture dependent -# - -# -# Path to the base of the uts directory tree (usually /usr/src/uts). -# -UTSBASE = ../.. - -# -# Define the module and object file sets. -# -MODULE = md_raid -OBJECTS = $(RAID_OBJS:%=$(OBJS_DIR)/%) -LINTS = $(RAID_OBJS:%.o=$(LINTS_DIR)/%.ln) -ROOTMODULE = $(ROOT_MISC_DIR)/$(MODULE) - -# -# Include common rules. -# -include $(UTSBASE)/sparc/Makefile.sparc - -# -# Define targets -# -ALL_TARGET = $(BINARY) -LINT_TARGET = $(MODULE).lint -INSTALL_TARGET = $(BINARY) $(ROOTMODULE) - -# -# Overrides. -# -CFLAGS += $(CCVERBOSE) - -# -# Define dependency on md -# -LDFLAGS += -dy -N drv/md - -CERRWARN += -_gcc=-Wno-parentheses -CERRWARN += -_gcc=-Wno-uninitialized - -# -# For now, disable these lint checks; maintainers should endeavor -# to investigate and remove these for maximum lint coverage. -# Please do not carry these forward to new Makefiles. -# -LINTTAGS += -erroff=E_SUSPICIOUS_COMPARISON -LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN -LINTTAGS += -erroff=E_SUPPRESSION_DIRECTIVE_UNUSED -LINTTAGS += -erroff=E_STATIC_UNUSED -LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW -LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV - -# -# Default build targets. -# -.KEEP_STATE: - -def: $(DEF_DEPS) - -all: $(ALL_DEPS) - -clean: $(CLEAN_DEPS) - -clobber: $(CLOBBER_DEPS) - -lint: $(LINT_DEPS) - -modlintlib: $(MODLINTLIB_DEPS) - -clean.lint: $(CLEAN_LINT_DEPS) - -install: $(INSTALL_DEPS) - -# -# Include common targets. -# -include $(UTSBASE)/sparc/Makefile.targ diff --git a/usr/src/uts/sparc/md_sp/Makefile b/usr/src/uts/sparc/md_sp/Makefile deleted file mode 100644 index 251a6b08a0db..000000000000 --- a/usr/src/uts/sparc/md_sp/Makefile +++ /dev/null @@ -1,99 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# uts/sparc/md_sp/Makefile -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# Copyright (c) 2011 Bayard G. Bell. All rights reserved. -# -# This makefile drives the production of the soft partitioning module. -# -# sparc implementation architecture dependent -# - -# -# Path to the base of the uts directory tree (usually /usr/src/uts). -# -UTSBASE = ../.. - -# -# Define the module and object file sets. -# -MODULE = md_sp -OBJECTS = $(SOFTPART_OBJS:%=$(OBJS_DIR)/%) -LINTS = $(SOFTPART_OBJS:%.o=$(LINTS_DIR)/%.ln) -ROOTMODULE = $(ROOT_MISC_DIR)/$(MODULE) - -# -# Include common rules. -# -include $(UTSBASE)/sparc/Makefile.sparc - -# -# Define targets -# -ALL_TARGET = $(BINARY) -LINT_TARGET = $(MODULE).lint -INSTALL_TARGET = $(BINARY) $(ROOTMODULE) - -# -# Define dependency on md -# -LDFLAGS += -dy -N drv/md - -CERRWARN += -_gcc=-Wno-unused-label -CERRWARN += -_gcc=-Wno-parentheses -CERRWARN += -_gcc=-Wno-uninitialized - -# -# For now, disable these lint checks; maintainers should endeavor -# to investigate and remove these for maximum lint coverage. -# Please do not carry these forward to new Makefiles. -# -LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN -LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW -LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV - -# -# Default build targets. -# -.KEEP_STATE: - -def: $(DEF_DEPS) - -all: $(ALL_DEPS) - -clean: $(CLEAN_DEPS) - -clobber: $(CLOBBER_DEPS) - -lint: $(LINT_DEPS) - -modlintlib: $(MODLINTLIB_DEPS) - -clean.lint: $(CLEAN_LINT_DEPS) - -install: $(INSTALL_DEPS) - -# -# Include common targets. -# -include $(UTSBASE)/sparc/Makefile.targ diff --git a/usr/src/uts/sparc/md_stripe/Makefile b/usr/src/uts/sparc/md_stripe/Makefile deleted file mode 100644 index 88f196844088..000000000000 --- a/usr/src/uts/sparc/md_stripe/Makefile +++ /dev/null @@ -1,107 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# uts/sparc/stripe/Makefile -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# Copyright (c) 2011 Bayard G. Bell. All rights reserved. -# -# This makefile drives the production of the SLVM's stripe misc module. -# -# sparc implementation architecture dependent -# - -# -# Path to the base of the uts directory tree (usually /usr/src/uts). -# -UTSBASE = ../.. - -# -# Define the module and object file sets. -# -MODULE = md_stripe -OBJECTS = $(STRIPE_OBJS:%=$(OBJS_DIR)/%) -LINTS = $(STRIPE_OBJS:%.o=$(LINTS_DIR)/%.ln) -ROOTMODULE = $(ROOT_MISC_DIR)/$(MODULE) - -# -# Include common rules. -# -include $(UTSBASE)/sparc/Makefile.sparc - -# -# Define targets -# -ALL_TARGET = $(BINARY) -LINT_TARGET = $(MODULE).lint -INSTALL_TARGET = $(BINARY) $(ROOTMODULE) - -# -# Overrides. -# -CFLAGS += $(CCVERBOSE) - -# -# Define dependency on md -# -LDFLAGS += -dy -N drv/md - -CERRWARN += -_gcc=-Wno-uninitialized -CERRWARN += -_gcc=-Wno-unused-label -CERRWARN += -_gcc=-Wno-parentheses - -# -# For now, disable these lint checks; maintainers should endeavor -# to investigate and remove these for maximum lint coverage. -# Please do not carry these forward to new Makefiles. -# -LINTTAGS += -erroff=E_SUSPICIOUS_COMPARISON -LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN -LINTTAGS += -erroff=E_SUPPRESSION_DIRECTIVE_UNUSED -LINTTAGS += -erroff=E_STATIC_UNUSED -LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW -LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV - -# -# Default build targets. -# -.KEEP_STATE: - -def: $(DEF_DEPS) - -all: $(ALL_DEPS) - -clean: $(CLEAN_DEPS) - -clobber: $(CLOBBER_DEPS) - -lint: $(LINT_DEPS) - -modlintlib: $(MODLINTLIB_DEPS) - -clean.lint: $(CLEAN_LINT_DEPS) - -install: $(INSTALL_DEPS) - -# -# Include common targets. -# -include $(UTSBASE)/sparc/Makefile.targ diff --git a/usr/src/uts/sparc/md_trans/Makefile b/usr/src/uts/sparc/md_trans/Makefile deleted file mode 100644 index 77f5aaf6a729..000000000000 --- a/usr/src/uts/sparc/md_trans/Makefile +++ /dev/null @@ -1,101 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# uts/sparc/md_trans/Makefile -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# Copyright (c) 2011 Bayard G. Bell. All rights reserved. -# -# This makefile drives the production of the SLVM's trans misc module. -# -# sparc implementation architecture dependent -# - -# -# Path to the base of the uts directory tree (usually /usr/src/uts). -# -UTSBASE = ../.. - -# -# Define the module and object file sets. -# -MODULE = md_trans -OBJECTS = $(TRANS_OBJS:%=$(OBJS_DIR)/%) -LINTS = $(TRANS_OBJS:%.o=$(LINTS_DIR)/%.ln) -ROOTMODULE = $(ROOT_MISC_DIR)/$(MODULE) - -# -# Include common rules. -# -include $(UTSBASE)/sparc/Makefile.sparc - -# -# Define targets -# -ALL_TARGET = $(BINARY) -LINT_TARGET = $(MODULE).lint -INSTALL_TARGET = $(BINARY) $(ROOTMODULE) - -# -# Overrides. -# -CFLAGS += $(CCVERBOSE) - -# -# Define dependencies on md and specfs -# -LDFLAGS += -dy -N drv/md -N fs/ufs - -CERRWARN += -_gcc=-Wno-unused-label -CERRWARN += -_gcc=-Wno-parentheses - -# -# For now, disable these lint checks; maintainers should endeavor -# to investigate and remove these for maximum lint coverage. -# Please do not carry these forward to new Makefiles. -# -LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV - -# -# Default build targets. -# -.KEEP_STATE: - -def: $(DEF_DEPS) - -all: $(ALL_DEPS) - -clean: $(CLEAN_DEPS) - -clobber: $(CLOBBER_DEPS) - -lint: $(LINT_DEPS) - -modlintlib: $(MODLINTLIB_DEPS) - -clean.lint: $(CLEAN_LINT_DEPS) - -install: $(INSTALL_DEPS) - -# -# Include common targets. -# -include $(UTSBASE)/sparc/Makefile.targ diff --git a/usr/src/uts/sparc/ml/modstubs.s b/usr/src/uts/sparc/ml/modstubs.s index 1028b2626cbe..845705dba88f 100644 --- a/usr/src/uts/sparc/ml/modstubs.s +++ b/usr/src/uts/sparc/ml/modstubs.s @@ -18,9 +18,11 @@ * * CDDL HEADER END */ + /* * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2015, Joyent, Inc. All rights reserved. + * Copyright 2016 Nexenta Systems, Inc. */ #if !defined(lint) @@ -621,7 +623,6 @@ stubs_base: #ifndef UFS_MODULE MODULE(ufs,fs); STUB(ufs, quotactl, nomod_minus_one); - STUB(ufs, ufs_remountroot, 0); END_MODULE(ufs); #endif @@ -731,7 +732,6 @@ stubs_base: #ifndef SWAPGENERIC_MODULE MODULE(swapgeneric,misc); STUB(swapgeneric, rootconf, 0); - STUB(swapgeneric, svm_rootconf, 0); STUB(swapgeneric, getrootdev, 0); STUB(swapgeneric, getfsname, 0); STUB(swapgeneric, loadrootmodules, 0); diff --git a/usr/src/uts/sun/sys/bootconf.h b/usr/src/uts/sun/sys/bootconf.h index 1377b4a139ef..3fe17c419852 100644 --- a/usr/src/uts/sun/sys/bootconf.h +++ b/usr/src/uts/sun/sys/bootconf.h @@ -18,9 +18,11 @@ * * CDDL HEADER END */ + /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2016 Nexenta Systems, Inc. */ #ifndef _SYS_BOOTCONF_H @@ -198,7 +200,6 @@ extern struct bootobj rootfs; extern struct bootobj swapfile; extern char obp_bootpath[BO_MAXOBJNAME]; -extern char svm_bootpath[BO_MAXOBJNAME]; extern dev_t getrootdev(void); extern void getfsname(char *, char *, size_t);