forked from dlorch/hammer-linux
-
Notifications
You must be signed in to change notification settings - Fork 0
/
hammer.h
1286 lines (1135 loc) · 45.8 KB
/
hammer.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* Copyright (c) 2007-2008 The DragonFly Project. All rights reserved.
*
* This code is derived from software contributed to The DragonFly Project
* by Matthew Dillon <dillon@backplane.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* 3. Neither the name of The DragonFly Project nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific, prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.130 2008/11/13 02:18:43 dillon Exp $
*/
/*
* This header file contains structures used internally by the HAMMERFS
* implementation. See hammer_disk.h for on-disk structures.
*/
#ifndef _HAMMER_H
#define _HAMMER_H
#include <linux/buffer_head.h> // for sb_bread
#include "dfly_wrap.h"
#include <sys/param.h>
#include <sys/types.h>
#include <sys/kernel.h>
#include <sys/conf.h>
#include <sys/systm.h>
#include <sys/tree.h>
#include <sys/malloc.h>
#include <sys/mount.h>
#include <sys/mountctl.h>
#include <sys/vnode.h>
#include <sys/proc.h>
#include <sys/priv.h>
#include <sys/stat.h>
#include <sys/globaldata.h>
#include <sys/lockf.h>
#include <sys/buf.h>
#include <sys/queue.h>
#include <sys/ktr.h>
#include <sys/globaldata.h>
#include <sys/buf2.h>
#include <sys/signal2.h>
#include <vfs/hammer/hammer_disk.h>
#include <vfs/hammer/hammer_mount.h>
#include <vfs/hammer/hammer_ioctl.h>
#if defined(_KERNEL) || defined(_KERNEL_STRUCTURES)
MALLOC_DECLARE(M_HAMMER);
/*
* Kernel trace
*/
#if !defined(KTR_HAMMER)
#define KTR_HAMMER KTR_ALL
#endif
KTR_INFO_MASTER_EXTERN(hammer);
/*
* Misc structures
*/
struct hammer_mount;
/*
* Key structure used for custom RB tree inode lookups. This prototypes
* the function hammer_ino_rb_tree_RB_LOOKUP_INFO(root, info).
*/
typedef struct hammer_inode_info {
int64_t obj_id; /* (key) object identifier */
hammer_tid_t obj_asof; /* (key) snapshot transid or 0 */
u_int32_t obj_localization; /* (key) pseudo-fs */
union {
struct hammer_btree_leaf_elm *leaf;
} u;
} *hammer_inode_info_t;
typedef enum hammer_transaction_type {
HAMMER_TRANS_RO,
HAMMER_TRANS_STD,
HAMMER_TRANS_FLS
} hammer_transaction_type_t;
/*
* HAMMER Transaction tracking
*/
struct hammer_transaction {
hammer_transaction_type_t type;
struct hammer_mount *hmp;
hammer_tid_t tid;
u_int64_t time;
u_int32_t time32;
int sync_lock_refs;
int flags;
struct hammer_volume *rootvol;
};
typedef struct hammer_transaction *hammer_transaction_t;
#define HAMMER_TRANSF_NEWINODE 0x0001
#define HAMMER_TRANSF_DIDIO 0x0002
/*
* HAMMER locks
*/
struct hammer_lock {
int refs; /* active references delay writes */
int lockcount; /* lock count for exclusive/shared access */
int wanted;
int exwanted; /* number of threads waiting for ex lock */
struct thread *locktd;
};
static __inline int
hammer_islocked(struct hammer_lock *lock)
{
return(lock->lockcount != 0);
}
static __inline int
hammer_isactive(struct hammer_lock *lock)
{
return(lock->refs != 0);
}
static __inline int
hammer_islastref(struct hammer_lock *lock)
{
return(lock->refs == 1);
}
/*
* Return if we specifically own the lock exclusively.
*/
static __inline int
hammer_lock_excl_owned(struct hammer_lock *lock, thread_t td)
{
if (lock->lockcount > 0 && 0)
return(1);
return(0);
}
/*
* Flush state, used by various structures
*/
typedef enum hammer_inode_state {
HAMMER_FST_IDLE,
HAMMER_FST_SETUP,
HAMMER_FST_FLUSH
} hammer_inode_state_t;
TAILQ_HEAD(hammer_record_list, hammer_record);
/*
* Pseudo-filesystem extended data tracking
*/
struct hammer_pfs_rb_tree;
struct hammer_pseudofs_inmem;
RB_HEAD(hammer_pfs_rb_tree, hammer_pseudofs_inmem);
RB_PROTOTYPE2(hammer_pfs_rb_tree, hammer_pseudofs_inmem, rb_node,
hammer_pfs_rb_compare, u_int32_t);
struct hammer_pseudofs_inmem {
RB_ENTRY(hammer_pseudofs_inmem) rb_node;
struct hammer_lock lock;
u_int32_t localization;
hammer_tid_t create_tid;
int flags;
udev_t fsid_udev;
struct hammer_pseudofs_data pfsd;
};
typedef struct hammer_pseudofs_inmem *hammer_pseudofs_inmem_t;
#define HAMMER_PFSM_DELETED 0x0001
/*
* Cache object ids. A fixed number of objid cache structures are
* created to reserve object id's for newly created files in multiples
* of 100,000, localized to a particular directory, and recycled as
* needed. This allows parallel create operations in different
* directories to retain fairly localized object ids which in turn
* improves reblocking performance and layout.
*/
#define OBJID_CACHE_SIZE 1024
#define OBJID_CACHE_BULK 100000
typedef struct hammer_objid_cache {
TAILQ_ENTRY(hammer_objid_cache) entry;
struct hammer_inode *dip;
hammer_tid_t next_tid;
int count;
} *hammer_objid_cache_t;
/*
* Associate an inode with a B-Tree node to cache search start positions
*/
typedef struct hammer_node_cache {
TAILQ_ENTRY(hammer_node_cache) entry;
struct hammer_node *node;
struct hammer_inode *ip;
} *hammer_node_cache_t;
TAILQ_HEAD(hammer_node_cache_list, hammer_node_cache);
/*
* Structure used to organize flush groups. Flush groups must be
* organized into chunks in order to avoid blowing out the UNDO FIFO.
* Without this a 'sync' could end up flushing 50,000 inodes in a single
* transaction.
*/
struct hammer_flush_group {
TAILQ_ENTRY(hammer_flush_group) flush_entry;
TAILQ_HEAD(, hammer_inode) flush_list;
int inode_count; /* inode load */
int total_count; /* record load */
int running; /* group is running */
int closed;
int refs;
};
typedef struct hammer_flush_group *hammer_flush_group_t;
TAILQ_HEAD(hammer_flush_group_list, hammer_flush_group);
/*
* Structure used to represent an inode in-memory.
*
* The record and data associated with an inode may be out of sync with
* the disk (xDIRTY flags), or not even on the disk at all (ONDISK flag
* clear).
*
* An inode may also hold a cache of unsynchronized records, used for
* database and directories only. Unsynchronized regular file data is
* stored in the buffer cache.
*
* NOTE: A file which is created and destroyed within the initial
* synchronization period can wind up not doing any disk I/O at all.
*
* Finally, an inode may cache numerous disk-referencing B-Tree cursors.
*/
struct hammer_ino_rb_tree;
struct hammer_inode;
RB_HEAD(hammer_ino_rb_tree, hammer_inode);
RB_PROTOTYPEX(hammer_ino_rb_tree, INFO, hammer_inode, rb_node,
hammer_ino_rb_compare, hammer_inode_info_t);
struct hammer_rec_rb_tree;
struct hammer_record;
RB_HEAD(hammer_rec_rb_tree, hammer_record);
RB_PROTOTYPEX(hammer_rec_rb_tree, INFO, hammer_record, rb_node,
hammer_rec_rb_compare, hammer_btree_leaf_elm_t);
TAILQ_HEAD(hammer_node_list, hammer_node);
struct hammer_inode {
RB_ENTRY(hammer_inode) rb_node;
hammer_inode_state_t flush_state;
hammer_flush_group_t flush_group;
TAILQ_ENTRY(hammer_inode) flush_entry;
struct hammer_record_list target_list; /* target of dependant recs */
int64_t obj_id; /* (key) object identifier */
hammer_tid_t obj_asof; /* (key) snapshot or 0 */
u_int32_t obj_localization; /* (key) pseudo-fs */
struct hammer_mount *hmp;
hammer_objid_cache_t objid_cache;
int flags;
int error; /* flush error */
int cursor_ip_refs; /* sanity */
int rsv_recs;
struct vnode *vp;
hammer_pseudofs_inmem_t pfsm;
struct lockf advlock;
struct hammer_lock lock; /* sync copy interlock */
off_t trunc_off;
struct hammer_btree_leaf_elm ino_leaf; /* in-memory cache */
struct hammer_inode_data ino_data; /* in-memory cache */
struct hammer_rec_rb_tree rec_tree; /* in-memory cache */
struct hammer_node_cache cache[2]; /* search initiate cache */
/*
* When a demark is created to synchronize an inode to
* disk, certain fields are copied so the front-end VOPs
* can continue to run in parallel with the synchronization
* occuring in the background.
*/
int sync_flags; /* to-sync flags cache */
off_t sync_trunc_off; /* to-sync truncation */
off_t save_trunc_off; /* write optimization */
struct hammer_btree_leaf_elm sync_ino_leaf; /* to-sync cache */
struct hammer_inode_data sync_ino_data; /* to-sync cache */
};
typedef struct hammer_inode *hammer_inode_t;
#define VTOI(vp) ((struct hammer_inode *)(vp)->v_data)
#define HAMMER_INODE_DDIRTY 0x0001 /* in-memory ino_data is dirty */
/* (not including atime/mtime) */
#define HAMMER_INODE_RSV_INODES 0x0002 /* hmp->rsv_inodes bumped */
#define HAMMER_INODE_CONN_DOWN 0x0004 /* include in downward recursion */
#define HAMMER_INODE_XDIRTY 0x0008 /* in-memory records */
#define HAMMER_INODE_ONDISK 0x0010 /* inode is on-disk (else not yet) */
#define HAMMER_INODE_FLUSH 0x0020 /* flush on last ref */
#define HAMMER_INODE_DELETED 0x0080 /* inode delete (backend) */
#define HAMMER_INODE_DELONDISK 0x0100 /* delete synchronized to disk */
#define HAMMER_INODE_RO 0x0200 /* read-only (because of as-of) */
#define HAMMER_INODE_VHELD 0x0400 /* vnode held on sync */
#define HAMMER_INODE_DONDISK 0x0800 /* data records may be on disk */
#define HAMMER_INODE_BUFS 0x1000 /* dirty high level bps present */
#define HAMMER_INODE_REFLUSH 0x2000 /* flush on dependancy / reflush */
#define HAMMER_INODE_RECLAIM 0x4000 /* trying to reclaim */
#define HAMMER_INODE_FLUSHW 0x8000 /* Someone waiting for flush */
#define HAMMER_INODE_TRUNCATED 0x00010000
#define HAMMER_INODE_DELETING 0x00020000 /* inode delete request (frontend)*/
#define HAMMER_INODE_RESIGNAL 0x00040000 /* re-signal on re-flush */
#define HAMMER_INODE_ATIME 0x00100000 /* in-memory atime modified */
#define HAMMER_INODE_MTIME 0x00200000 /* in-memory mtime modified */
#define HAMMER_INODE_WOULDBLOCK 0x00400000 /* re-issue to new flush group */
#define HAMMER_INODE_MODMASK (HAMMER_INODE_DDIRTY| \
HAMMER_INODE_XDIRTY|HAMMER_INODE_BUFS| \
HAMMER_INODE_ATIME|HAMMER_INODE_MTIME| \
HAMMER_INODE_TRUNCATED|HAMMER_INODE_DELETING)
#define HAMMER_INODE_MODMASK_NOXDIRTY \
(HAMMER_INODE_MODMASK & ~HAMMER_INODE_XDIRTY)
#define HAMMER_FLUSH_GROUP_SIZE 64
#define HAMMER_FLUSH_SIGNAL 0x0001
#define HAMMER_FLUSH_RECURSION 0x0002
/*
* Used by the inode reclaim code to pipeline reclaims and avoid
* blowing out kernel memory or letting the flusher get too far
* behind. The reclaim wakes up when count reaches 0 or the
* timer expires.
*/
struct hammer_reclaim {
TAILQ_ENTRY(hammer_reclaim) entry;
int count;
};
#define HAMMER_RECLAIM_FLUSH 2000
#define HAMMER_RECLAIM_WAIT 4000
/*
* Structure used to represent an unsynchronized record in-memory. These
* records typically represent directory entries. Only non-historical
* records are kept in-memory.
*
* Records are organized as a per-inode RB-Tree. If the inode is not
* on disk then neither are any records and the in-memory record tree
* represents the entire contents of the inode. If the inode is on disk
* then the on-disk B-Tree is scanned in parallel with the in-memory
* RB-Tree to synthesize the current state of the file.
*
* Records are also used to enforce the ordering of directory create/delete
* operations. A new inode will not be flushed to disk unless its related
* directory entry is also being flushed at the same time. A directory entry
* will not be removed unless its related inode is also being removed at the
* same time.
*/
typedef enum hammer_record_type {
HAMMER_MEM_RECORD_GENERAL, /* misc record */
HAMMER_MEM_RECORD_INODE, /* inode record */
HAMMER_MEM_RECORD_ADD, /* positive memory cache record */
HAMMER_MEM_RECORD_DEL, /* negative delete-on-disk record */
HAMMER_MEM_RECORD_DATA /* bulk-data record w/on-disk ref */
} hammer_record_type_t;
struct hammer_record {
RB_ENTRY(hammer_record) rb_node;
TAILQ_ENTRY(hammer_record) target_entry;
hammer_inode_state_t flush_state;
hammer_flush_group_t flush_group;
hammer_record_type_t type;
struct hammer_lock lock;
struct hammer_reserve *resv;
struct hammer_inode *ip;
struct hammer_inode *target_ip;
struct hammer_btree_leaf_elm leaf;
union hammer_data_ondisk *data;
int flags;
hammer_off_t zone2_offset; /* direct-write only */
};
typedef struct hammer_record *hammer_record_t;
/*
* Record flags. Note that FE can only be set by the frontend if the
* record has not been interlocked by the backend w/ BE.
*/
#define HAMMER_RECF_ALLOCDATA 0x0001
#define HAMMER_RECF_ONRBTREE 0x0002
#define HAMMER_RECF_DELETED_FE 0x0004 /* deleted (frontend) */
#define HAMMER_RECF_DELETED_BE 0x0008 /* deleted (backend) */
#define HAMMER_RECF_COMMITTED 0x0010 /* committed to the B-Tree */
#define HAMMER_RECF_INTERLOCK_BE 0x0020 /* backend interlock */
#define HAMMER_RECF_WANTED 0x0040 /* wanted by the frontend */
#define HAMMER_RECF_CONVERT_DELETE 0x0100 /* special case */
#define HAMMER_RECF_DIRECT_IO 0x0200 /* related direct I/O running*/
#define HAMMER_RECF_DIRECT_WAIT 0x0400 /* related direct I/O running*/
#define HAMMER_RECF_DIRECT_INVAL 0x0800 /* buffer alias invalidation */
/*
* hammer_delete_at_cursor() flags
*/
#define HAMMER_DELETE_ADJUST 0x0001
#define HAMMER_DELETE_DESTROY 0x0002
/*
* In-memory structures representing on-disk structures.
*/
struct hammer_volume;
struct hammer_buffer;
struct hammer_node;
struct hammer_undo;
struct hammer_reserve;
RB_HEAD(hammer_vol_rb_tree, hammer_volume);
RB_HEAD(hammer_buf_rb_tree, hammer_buffer);
RB_HEAD(hammer_nod_rb_tree, hammer_node);
RB_HEAD(hammer_und_rb_tree, hammer_undo);
RB_HEAD(hammer_res_rb_tree, hammer_reserve);
RB_PROTOTYPE2(hammer_vol_rb_tree, hammer_volume, rb_node,
hammer_vol_rb_compare, int32_t);
RB_PROTOTYPE2(hammer_buf_rb_tree, hammer_buffer, rb_node,
hammer_buf_rb_compare, hammer_off_t);
RB_PROTOTYPE2(hammer_nod_rb_tree, hammer_node, rb_node,
hammer_nod_rb_compare, hammer_off_t);
RB_PROTOTYPE2(hammer_und_rb_tree, hammer_undo, rb_node,
hammer_und_rb_compare, hammer_off_t);
RB_PROTOTYPE2(hammer_res_rb_tree, hammer_reserve, rb_node,
hammer_res_rb_compare, hammer_off_t);
/*
* IO management - embedded at the head of various in-memory structures
*
* VOLUME - hammer_volume containing meta-data
* META_BUFFER - hammer_buffer containing meta-data
* DATA_BUFFER - hammer_buffer containing pure-data
*
* Dirty volume headers and dirty meta-data buffers are locked until the
* flusher can sequence them out. Dirty pure-data buffers can be written.
* Clean buffers can be passively released.
*/
typedef enum hammer_io_type {
HAMMER_STRUCTURE_VOLUME,
HAMMER_STRUCTURE_META_BUFFER,
HAMMER_STRUCTURE_UNDO_BUFFER,
HAMMER_STRUCTURE_DATA_BUFFER
} hammer_io_type_t;
union hammer_io_structure;
struct hammer_io;
struct worklist {
LIST_ENTRY(worklist) node;
};
TAILQ_HEAD(hammer_io_list, hammer_io);
typedef struct hammer_io_list *hammer_io_list_t;
struct hammer_io {
struct worklist worklist;
struct hammer_lock lock;
enum hammer_io_type type;
struct hammer_mount *hmp;
struct hammer_volume *volume;
TAILQ_ENTRY(hammer_io) mod_entry; /* list entry if modified */
hammer_io_list_t mod_list;
struct buf *bp;
int64_t offset; /* zone-2 offset */
int bytes; /* buffer cache buffer size */
int loading; /* loading/unloading interlock */
int modify_refs;
u_int modified : 1; /* bp's data was modified */
u_int released : 1; /* bp released (w/ B_LOCKED set) */
u_int running : 1; /* bp write IO in progress */
u_int waiting : 1; /* someone is waiting on us */
u_int validated : 1; /* ondisk has been validated */
u_int waitdep : 1; /* flush waits for dependancies */
u_int recovered : 1; /* has recovery ref */
u_int waitmod : 1; /* waiting for modify_refs */
u_int reclaim : 1; /* reclaim requested */
u_int gencrc : 1; /* crc needs to be generated */
u_int ioerror : 1; /* abort on io-error */
};
typedef struct hammer_io *hammer_io_t;
#define HAMMER_CLUSTER_SIZE (64 * 1024)
#if HAMMER_CLUSTER_SIZE > MAXBSIZE
#undef HAMMER_CLUSTER_SIZE
#define HAMMER_CLUSTER_SIZE MAXBSIZE
#endif
#define HAMMER_CLUSTER_BUFS (HAMMER_CLUSTER_SIZE / HAMMER_BUFSIZE)
/*
* In-memory volume representing on-disk buffer
*/
struct hammer_volume {
struct hammer_io io;
RB_ENTRY(hammer_volume) rb_node;
struct hammer_volume_ondisk *ondisk;
int32_t vol_no;
int64_t nblocks; /* note: special calculation for statfs */
int64_t buffer_base; /* base offset of buffer 0 */
hammer_off_t maxbuf_off; /* Maximum buffer offset (zone-2) */
hammer_off_t maxraw_off; /* Maximum raw offset for device */
char *vol_name;
struct super_block *sb;
struct vnode *devvp;
int vol_flags;
};
typedef struct hammer_volume *hammer_volume_t;
/*
* In-memory buffer (other then volume, super-cluster, or cluster),
* representing an on-disk buffer.
*/
struct hammer_buffer {
struct hammer_io io;
RB_ENTRY(hammer_buffer) rb_node;
void *ondisk;
hammer_off_t zoneX_offset;
hammer_off_t zone2_offset;
struct hammer_reserve *resv;
struct hammer_node_list clist;
};
typedef struct hammer_buffer *hammer_buffer_t;
/*
* In-memory B-Tree node, representing an on-disk B-Tree node.
*
* This is a hang-on structure which is backed by a hammer_buffer,
* indexed by a hammer_cluster, and used for fine-grained locking of
* B-Tree nodes in order to properly control lock ordering. A hammer_buffer
* can contain multiple nodes representing wildly disassociated portions
* of the B-Tree so locking cannot be done on a buffer-by-buffer basis.
*
* This structure uses a cluster-relative index to reduce the number
* of layers required to access it, and also because all on-disk B-Tree
* references are cluster-relative offsets.
*/
struct hammer_node {
struct hammer_lock lock; /* node-by-node lock */
TAILQ_ENTRY(hammer_node) entry; /* per-buffer linkage */
RB_ENTRY(hammer_node) rb_node; /* per-cluster linkage */
hammer_off_t node_offset; /* full offset spec */
struct hammer_mount *hmp;
struct hammer_buffer *buffer; /* backing buffer */
hammer_node_ondisk_t ondisk; /* ptr to on-disk structure */
TAILQ_HEAD(, hammer_cursor) cursor_list; /* deadlock recovery */
struct hammer_node_cache_list cache_list; /* passive caches */
int flags;
int loading; /* load interlock */
};
#define HAMMER_NODE_DELETED 0x0001
#define HAMMER_NODE_FLUSH 0x0002
#define HAMMER_NODE_CRCGOOD 0x0004
#define HAMMER_NODE_NEEDSCRC 0x0008
#define HAMMER_NODE_NEEDSMIRROR 0x0010
typedef struct hammer_node *hammer_node_t;
/*
* List of locked nodes.
*/
struct hammer_node_locklist {
struct hammer_node_locklist *next;
hammer_node_t node;
};
typedef struct hammer_node_locklist *hammer_node_locklist_t;
/*
* Common I/O management structure - embedded in in-memory structures
* which are backed by filesystem buffers.
*/
union hammer_io_structure {
struct hammer_io io;
struct hammer_volume volume;
struct hammer_buffer buffer;
};
typedef union hammer_io_structure *hammer_io_structure_t;
/*
* The reserve structure prevents the blockmap from allocating
* out of a reserved bigblock. Such reservations are used by
* the direct-write mechanism.
*
* The structure is also used to hold off on reallocations of
* big blocks from the freemap until flush dependancies have
* been dealt with.
*/
struct hammer_reserve {
RB_ENTRY(hammer_reserve) rb_node;
TAILQ_ENTRY(hammer_reserve) delay_entry;
int flush_group;
int flags;
int refs;
int zone;
int append_off;
hammer_off_t zone_offset;
};
typedef struct hammer_reserve *hammer_reserve_t;
#define HAMMER_RESF_ONDELAY 0x0001
#define HAMMER_RESF_LAYER2FREE 0x0002
#include <vfs/hammer/hammer_cursor.h>
/*
* The undo structure tracks recent undos to avoid laying down duplicate
* undos within a flush group, saving us a significant amount of overhead.
*
* This is strictly a heuristic.
*/
#define HAMMER_MAX_UNDOS 1024
#define HAMMER_MAX_FLUSHERS 4
struct hammer_undo {
RB_ENTRY(hammer_undo) rb_node;
TAILQ_ENTRY(hammer_undo) lru_entry;
hammer_off_t offset;
int bytes;
};
typedef struct hammer_undo *hammer_undo_t;
struct hammer_flusher_info;
TAILQ_HEAD(hammer_flusher_info_list, hammer_flusher_info);
struct hammer_flusher {
int signal; /* flusher thread sequencer */
int act; /* currently active flush group */
int done; /* set to act when complete */
int next; /* next flush group */
int group_lock; /* lock sequencing of the next flush */
int exiting; /* request master exit */
thread_t td; /* master flusher thread */
hammer_tid_t tid; /* last flushed transaction id */
int finalize_want; /* serialize finalization */
struct hammer_lock finalize_lock; /* serialize finalization */
struct hammer_transaction trans; /* shared transaction */
struct hammer_flusher_info_list run_list;
struct hammer_flusher_info_list ready_list;
};
/*
* Internal hammer mount data structure
*/
struct hammer_mount {
struct mount *mp;
/*struct vnode *rootvp;*/
struct hammer_ino_rb_tree rb_inos_root;
struct hammer_vol_rb_tree rb_vols_root;
struct hammer_nod_rb_tree rb_nods_root;
struct hammer_und_rb_tree rb_undo_root;
struct hammer_res_rb_tree rb_resv_root;
struct hammer_buf_rb_tree rb_bufs_root;
struct hammer_pfs_rb_tree rb_pfsm_root;
struct hammer_volume *rootvol;
struct hammer_base_elm root_btree_beg;
struct hammer_base_elm root_btree_end;
struct malloc_type *m_misc;
struct malloc_type *m_inodes;
int flags; /* HAMMER_MOUNT_xxx flags */
int hflags;
int ronly;
int nvolumes;
int volume_iterator;
int master_id; /* -1 or 0-15 - clustering and mirroring */
int version; /* hammer filesystem version to use */
int rsv_inodes; /* reserved space due to dirty inodes */
int64_t rsv_databytes; /* reserved space due to record data */
int rsv_recs; /* reserved space due to dirty records */
int rsv_fromdelay; /* bigblocks reserved due to flush delay */
int undo_rec_limit; /* based on size of undo area */
int last_newrecords;
int count_newrecords;
int inode_reclaims; /* inodes pending reclaim by flusher */
int count_inodes; /* total number of inodes */
int count_iqueued; /* inodes queued to flusher */
struct hammer_flusher flusher;
u_int check_interrupt;
uuid_t fsid;
struct hammer_io_list volu_list; /* dirty undo buffers */
struct hammer_io_list undo_list; /* dirty undo buffers */
struct hammer_io_list data_list; /* dirty data buffers */
struct hammer_io_list alt_data_list; /* dirty data buffers */
struct hammer_io_list meta_list; /* dirty meta bufs */
struct hammer_io_list lose_list; /* loose buffers */
int locked_dirty_space; /* meta/volu count */
int io_running_space;
int objid_cache_count;
int error; /* critical I/O error */
struct krate krate; /* rate limited kprintf */
hammer_tid_t asof; /* snapshot mount */
hammer_tid_t next_tid;
hammer_tid_t flush_tid1; /* flusher tid sequencing */
hammer_tid_t flush_tid2; /* flusher tid sequencing */
int64_t copy_stat_freebigblocks; /* number of free bigblocks */
struct netexport export;
struct hammer_lock sync_lock;
struct hammer_lock free_lock;
struct hammer_lock undo_lock;
struct hammer_lock blkmap_lock;
struct hammer_blockmap blockmap[HAMMER_MAX_ZONES];
struct hammer_undo undos[HAMMER_MAX_UNDOS];
int undo_alloc;
TAILQ_HEAD(, hammer_undo) undo_lru_list;
TAILQ_HEAD(, hammer_reserve) delay_list;
struct hammer_flush_group_list flush_group_list;
hammer_flush_group_t next_flush_group;
TAILQ_HEAD(, hammer_objid_cache) objid_cache_list;
TAILQ_HEAD(, hammer_reclaim) reclaim_list;
};
typedef struct hammer_mount *hammer_mount_t;
#define HAMMER_MOUNT_CRITICAL_ERROR 0x0001
#define HAMMER_MOUNT_FLUSH_RECOVERY 0x0002
struct hammer_sync_info {
int error;
int waitfor;
};
#endif
/*
* checkspace slop (8MB chunks), higher numbers are more conservative.
*/
#define HAMMER_CHKSPC_REBLOCK 25
#define HAMMER_CHKSPC_MIRROR 20
#define HAMMER_CHKSPC_WRITE 20
#define HAMMER_CHKSPC_CREATE 20
#define HAMMER_CHKSPC_REMOVE 10
#define HAMMER_CHKSPC_EMERGENCY 0
#if defined(_KERNEL)
extern struct vop_ops hammer_vnode_vops;
extern struct vop_ops hammer_spec_vops;
extern struct vop_ops hammer_fifo_vops;
extern struct bio_ops hammer_bioops;
extern int hammer_debug_io;
extern int hammer_debug_general;
extern int hammer_debug_debug;
extern int hammer_debug_inode;
extern int hammer_debug_locks;
extern int hammer_debug_btree;
extern int hammer_debug_tid;
extern int hammer_debug_recover;
extern int hammer_debug_recover_faults;
extern int hammer_cluster_enable;
extern int hammer_count_fsyncs;
extern int hammer_count_inodes;
extern int hammer_count_iqueued;
extern int hammer_count_reclaiming;
extern int hammer_count_records;
extern int hammer_count_record_datas;
extern int hammer_count_volumes;
extern int hammer_count_buffers;
extern int hammer_count_nodes;
extern int64_t hammer_count_extra_space_used;
extern int64_t hammer_stats_btree_lookups;
extern int64_t hammer_stats_btree_searches;
extern int64_t hammer_stats_btree_inserts;
extern int64_t hammer_stats_btree_deletes;
extern int64_t hammer_stats_btree_elements;
extern int64_t hammer_stats_btree_splits;
extern int64_t hammer_stats_btree_iterations;
extern int64_t hammer_stats_record_iterations;
extern int64_t hammer_stats_file_read;
extern int64_t hammer_stats_file_write;
extern int64_t hammer_stats_file_iopsr;
extern int64_t hammer_stats_file_iopsw;
extern int64_t hammer_stats_disk_read;
extern int64_t hammer_stats_disk_write;
extern int64_t hammer_stats_inode_flushes;
extern int64_t hammer_stats_commits;
extern int hammer_count_dirtybufspace;
extern int hammer_count_refedbufs;
extern int hammer_count_reservations;
extern int hammer_count_io_running_read;
extern int hammer_count_io_running_write;
extern int hammer_count_io_locked;
extern int hammer_limit_dirtybufspace;
extern int hammer_limit_recs;
extern int hammer_bio_count;
extern int hammer_verify_zone;
extern int hammer_verify_data;
extern int hammer_write_mode;
extern int hammer_autoflush;
extern int64_t hammer_contention_count;
void hammer_critical_error(hammer_mount_t hmp, hammer_inode_t ip,
int error, const char *msg);
int hammer_vop_inactive(struct vop_inactive_args *);
int hammer_vop_reclaim(struct vop_reclaim_args *);
int hammer_get_vnode(struct hammer_inode *ip, struct vnode **vpp);
struct hammer_inode *hammer_get_inode(hammer_transaction_t trans,
hammer_inode_t dip, int64_t obj_id,
hammer_tid_t asof, u_int32_t localization,
int flags, int *errorp);
void hammer_scan_inode_snapshots(hammer_mount_t hmp,
hammer_inode_info_t iinfo,
int (*callback)(hammer_inode_t ip, void *data),
void *data);
void hammer_put_inode(struct hammer_inode *ip);
void hammer_put_inode_ref(struct hammer_inode *ip);
void hammer_inode_waitreclaims(hammer_mount_t hmp);
void hammer_inode_waithard(hammer_mount_t hmp);
int hammer_unload_volume(hammer_volume_t volume, void *data __unused);
int hammer_adjust_volume_mode(hammer_volume_t volume, void *data __unused);
int hammer_unload_buffer(hammer_buffer_t buffer, void *data __unused);
int hammer_install_volume(hammer_mount_t hmp, const char *volname,
struct vnode *devvp);
int hammer_mountcheck_volumes(hammer_mount_t hmp);
int hammer_mem_add(hammer_record_t record);
int hammer_ip_lookup(hammer_cursor_t cursor);
int hammer_ip_first(hammer_cursor_t cursor);
int hammer_ip_next(hammer_cursor_t cursor);
int hammer_ip_resolve_data(hammer_cursor_t cursor);
int hammer_ip_delete_record(hammer_cursor_t cursor, hammer_inode_t ip,
hammer_tid_t tid);
int hammer_delete_at_cursor(hammer_cursor_t cursor, int delete_flags,
hammer_tid_t delete_tid, u_int32_t delete_ts,
int track, int64_t *stat_bytes);
int hammer_ip_check_directory_empty(hammer_transaction_t trans,
hammer_inode_t ip);
int hammer_sync_hmp(hammer_mount_t hmp, int waitfor);
int hammer_queue_inodes_flusher(hammer_mount_t hmp, int waitfor);
hammer_record_t
hammer_alloc_mem_record(hammer_inode_t ip, int data_len);
void hammer_flush_record_done(hammer_record_t record, int error);
void hammer_wait_mem_record_ident(hammer_record_t record, const char *ident);
void hammer_rel_mem_record(hammer_record_t record);
int hammer_cursor_up(hammer_cursor_t cursor);
int hammer_cursor_up_locked(hammer_cursor_t cursor);
int hammer_cursor_down(hammer_cursor_t cursor);
int hammer_cursor_upgrade(hammer_cursor_t cursor);
int hammer_cursor_upgrade_node(hammer_cursor_t cursor);
void hammer_cursor_downgrade(hammer_cursor_t cursor);
int hammer_cursor_seek(hammer_cursor_t cursor, hammer_node_t node,
int index);
void hammer_lock_ex_ident(struct hammer_lock *lock, const char *ident);
int hammer_lock_ex_try(struct hammer_lock *lock);
void hammer_lock_sh(struct hammer_lock *lock);
int hammer_lock_sh_try(struct hammer_lock *lock);
int hammer_lock_upgrade(struct hammer_lock *lock);
void hammer_lock_downgrade(struct hammer_lock *lock);
int hammer_lock_status(struct hammer_lock *lock);
void hammer_unlock(struct hammer_lock *lock);
void hammer_ref(struct hammer_lock *lock);
void hammer_unref(struct hammer_lock *lock);
void hammer_sync_lock_ex(hammer_transaction_t trans);
void hammer_sync_lock_sh(hammer_transaction_t trans);
int hammer_sync_lock_sh_try(hammer_transaction_t trans);
void hammer_sync_unlock(hammer_transaction_t trans);
u_int32_t hammer_to_unix_xid(uuid_t *uuid);
void hammer_guid_to_uuid(uuid_t *uuid, u_int32_t guid);
void hammer_time_to_timespec(u_int64_t xtime, struct timespec *ts);
u_int64_t hammer_timespec_to_time(struct timespec *ts);
int hammer_str_to_tid(const char *str, int *ispfsp,
hammer_tid_t *tidp, u_int32_t *localizationp);
int hammer_is_atatext(const char *name, int len);
hammer_tid_t hammer_alloc_objid(hammer_mount_t hmp, hammer_inode_t dip);
void hammer_clear_objid(hammer_inode_t dip);
void hammer_destroy_objid_cache(hammer_mount_t hmp);
int hammer_enter_undo_history(hammer_mount_t hmp, hammer_off_t offset,
int bytes);
void hammer_clear_undo_history(hammer_mount_t hmp);
enum vtype hammer_get_vnode_type(u_int8_t obj_type);
int hammer_get_dtype(u_int8_t obj_type);
u_int8_t hammer_get_obj_type(enum vtype vtype);
int64_t hammer_directory_namekey(hammer_inode_t dip, const void *name, int len,
u_int32_t *max_iterationsp);
int hammer_nohistory(hammer_inode_t ip);
int hammer_init_cursor(hammer_transaction_t trans, hammer_cursor_t cursor,
hammer_node_cache_t cache, hammer_inode_t ip);
void hammer_normalize_cursor(hammer_cursor_t cursor);
void hammer_done_cursor(hammer_cursor_t cursor);
int hammer_recover_cursor(hammer_cursor_t cursor);
void hammer_unlock_cursor(hammer_cursor_t cursor, int also_ip);
int hammer_lock_cursor(hammer_cursor_t cursor, int also_ip);
hammer_cursor_t hammer_push_cursor(hammer_cursor_t ocursor);
void hammer_pop_cursor(hammer_cursor_t ocursor, hammer_cursor_t ncursor);
void hammer_cursor_replaced_node(hammer_node_t onode, hammer_node_t nnode);
void hammer_cursor_removed_node(hammer_node_t onode, hammer_node_t parent,
int index);
void hammer_cursor_split_node(hammer_node_t onode, hammer_node_t nnode,
int index);
void hammer_cursor_inserted_element(hammer_node_t node, int index);
void hammer_cursor_deleted_element(hammer_node_t node, int index);
int hammer_btree_lookup(hammer_cursor_t cursor);
int hammer_btree_first(hammer_cursor_t cursor);
int hammer_btree_last(hammer_cursor_t cursor);
int hammer_btree_extract(hammer_cursor_t cursor, int flags);
int hammer_btree_iterate(hammer_cursor_t cursor);
int hammer_btree_iterate_reverse(hammer_cursor_t cursor);
int hammer_btree_insert(hammer_cursor_t cursor,
hammer_btree_leaf_elm_t elm, int *doprop);
int hammer_btree_delete(hammer_cursor_t cursor);
void hammer_btree_do_propagation(hammer_cursor_t cursor,
hammer_pseudofs_inmem_t pfsm,
hammer_btree_leaf_elm_t leaf);
int hammer_btree_cmp(hammer_base_elm_t key1, hammer_base_elm_t key2);
int hammer_btree_chkts(hammer_tid_t ts, hammer_base_elm_t key);
int hammer_btree_correct_rhb(hammer_cursor_t cursor, hammer_tid_t tid);
int hammer_btree_correct_lhb(hammer_cursor_t cursor, hammer_tid_t tid);
int btree_set_parent(hammer_transaction_t trans, hammer_node_t node,
hammer_btree_elm_t elm);
int hammer_btree_lock_children(hammer_cursor_t cursor,
struct hammer_node_locklist **locklistp);
void hammer_btree_unlock_children(hammer_cursor_t cursor,
struct hammer_node_locklist **locklistp);
int hammer_btree_search_node(hammer_base_elm_t elm, hammer_node_ondisk_t node);
hammer_node_t hammer_btree_get_parent(hammer_transaction_t trans,
hammer_node_t node, int *parent_indexp,
int *errorp, int try_exclusive);
void hammer_print_btree_node(hammer_node_ondisk_t ondisk);
void hammer_print_btree_elm(hammer_btree_elm_t elm, u_int8_t type, int i);
void *hammer_bread(struct hammer_mount *hmp, hammer_off_t off,
int *errorp, struct hammer_buffer **bufferp);
void *hammer_bnew(struct hammer_mount *hmp, hammer_off_t off,
int *errorp, struct hammer_buffer **bufferp);
void *hammer_bread_ext(struct hammer_mount *hmp, hammer_off_t off, int bytes,
int *errorp, struct hammer_buffer **bufferp);
void *hammer_bnew_ext(struct hammer_mount *hmp, hammer_off_t off, int bytes,
int *errorp, struct hammer_buffer **bufferp);
hammer_volume_t hammer_get_root_volume(hammer_mount_t hmp, int *errorp);
hammer_volume_t hammer_get_volume(hammer_mount_t hmp,
int32_t vol_no, int *errorp);
hammer_buffer_t hammer_get_buffer(hammer_mount_t hmp, hammer_off_t buf_offset,
int bytes, int isnew, int *errorp);