Skip to content

Commit

Permalink
fb-oomd: use scuba-sourced test samples for cfgen where possible
Browse files Browse the repository at this point in the history
Summary:
We recently implemented the option to pull test data from scuba (dataset cfgen_inputs) instead of ssh-ing to a hardcoded host name. The benefits include security (less ssh to prod) and user experience (no need to update the test manifest when hosts die; do-not-repeat-yourself: no need for the ad hoc comments how to update the host if the scuba query is descriptive enough).

Updating fb-oomd manifest with scuba-sourced hosts where possible.

Unfortunately I couldn't do it for all hosts yet, but this should still be an improvement. I hope we will have richer data in scuba later on and be able to update the rest as well

Reviewed By: chengxiong-ruan

Differential Revision: D58533150

fbshipit-source-id: 5a6804d44f9e2e9e05e12e60e4fb2f59ae346b2d
  • Loading branch information
Sergey Anpilov authored and facebook-github-bot committed Jun 13, 2024
1 parent 35a3527 commit 61825d4
Show file tree
Hide file tree
Showing 8 changed files with 306 additions and 424 deletions.
28 changes: 15 additions & 13 deletions src/oomd/cfgen/test/cfgen_test_inputs/devbig.json
Original file line number Diff line number Diff line change
@@ -1,19 +1,21 @@
@generated SignedSource<<896ff729516b5e965bc9b110160cf100>>
@generated SignedSource<<c54e3fdf5805d4074185132a0ce23636>>
@codegen-command arc cfgen update-inputs fb-oomd
{
"fqdn": "devbig284.ash8.facebook.com",
"fqdn": "devbig470.ash8.facebook.com",
"region": "east",
"clusterType": "SERVICE_GENERIC_NON_MEMCACHE",
"modelId": 336209,
"kernelRelease": "5.19.0-0_fbk21_hardened_12633_g4db063a1bcb5",
"kernelRelease": "5.19.0-0_fbk12_hardened_11583_g0bef9520ca2b",
"serverType": "TYPE_VI_FEED",
"experiments": [],
"cpuArchitecture": "skylake",
"metalosRootfs": false,
"provisioningConfig": {
"ethtoolByInterface": {
"eth0": {
"maxChannelsCombined": 63
"maxChannelsCombined": 128,
"maxChannelsRx": 64,
"maxChannelsTx": 64
}
},
"cpuCoreCount": 40,
Expand All @@ -22,7 +24,7 @@
"deviceType": "SERVER",
"datacenter": "ash8",
"cluster": "08",
"memTotal": 270017355776,
"memTotal": 269614637056,
"osVersion": {
"distribution_name": "CentOS Stream release",
"version": 9,
Expand All @@ -31,10 +33,10 @@
},
"pciByAddress": {
"0000:5e:00.0": {
"vendor_id": 5555,
"device_id": 4117,
"vendor_id": 5348,
"device_id": 5833,
"class_code": 131072,
"board_part_number": "MCX4411D-ACAN_FB",
"board_part_number": "BCM957302M3023CBK",
"current_speed_mts": 8000,
"current_width": 8
}
Expand All @@ -45,9 +47,9 @@
"bootConfig": {
"ethtoolByInterface": {
"eth0": {
"driver": "mlx5_core",
"driver_version": "5.19.0-0_fbk21_hardened_12633_g",
"firmware_version": "14.27.2606 (FB_0000000005)",
"driver": "bnxt_en",
"driver_version": "5.19.0-0_fbk12_hardened_11583_g",
"firmware_version": "20.6.167.0/pkg 20.6.4.12",
"bus_info": "0000:5e:00.0",
"speed_mbps": 25000
}
Expand All @@ -62,7 +64,7 @@
"size_bytes": 1920383410176,
"is_rotational": false,
"model": "KXD51LN11T92 TOSHIBA",
"serial": "798S10S0T7RQ",
"serial": "798S10ART7RQ",
"physical_block_size": 4096,
"logical_block_size": 4096,
"is_root": false
Expand All @@ -71,7 +73,7 @@
"size_bytes": 2000398934016,
"is_rotational": true,
"model": "TOSHIBA MG04ACA200A",
"serial": "79REKGBZFKGA",
"serial": "89CVK6BJFKGA",
"physical_block_size": 4096,
"logical_block_size": 4096,
"is_root": true
Expand Down
164 changes: 111 additions & 53 deletions src/oomd/cfgen/test/cfgen_test_inputs/devgpu_T17.json
Original file line number Diff line number Diff line change
@@ -1,43 +1,74 @@
@generated SignedSource<<c1fa5e796716c915704d08118fe9ae17>>
@generated SignedSource<<e6cdcb8327d1e51500f7fc47e816cb6f>>
@codegen-command arc cfgen update-inputs fb-oomd
{
"fqdn": "devgpu109.cln3.facebook.com",
"region": "ireland",
"fqdn": "devgpu119.cco2.facebook.com",
"region": "crookcounty",
"clusterType": "SERVICE_GENERIC_NON_MEMCACHE",
"modelId": 342921,
"kernelRelease": "5.19.0-0_fbk9_fbaccel.phvuisqkhkrv_11421_g3b98c64ebe59",
"modelId": 347673,
"kernelRelease": "5.19.0-0_fbk21_12633_g4db063a1bcb5",
"serverType": "TYPE_XVII_INFERENCE",
"experiments": [],
"cpuArchitecture": "cooperlake",
"cpuArchitecture": "genoa",
"metalosRootfs": false,
"provisioningConfig": {
"ethtoolByInterface": {
"eth0": {
"maxChannelsCombined": 128,
"maxChannelsRx": 64,
"maxChannelsTx": 64
"maxChannelsCombined": 63
},
"eth1": {
"maxChannelsCombined": 63
},
"eth2": {
"maxChannelsCombined": 63
},
"eth3": {
"maxChannelsCombined": 63
}
},
"cpuCoreCount": 26,
"parentModelId": 342524,
"cpuCoreCount": 192,
"parentModelId": 344274,
"recoveryEnvironment": false,
"deviceType": "SERVER",
"datacenter": "cln3",
"datacenter": "cco2",
"cluster": "03",
"memTotal": 99638423552,
"memTotal": 2434647334912,
"osVersion": {
"distribution_name": "CentOS Stream release",
"version": 9,
"is_in_ramdisk": false,
"is_metalos": false
},
"pciByAddress": {
"0000:b3:00.0": {
"vendor_id": 5348,
"device_id": 5969,
"0000:2d:00.0": {
"vendor_id": 5555,
"device_id": 4129,
"class_code": 131072,
"board_part_number": "BCM957504-N1100FXB",
"current_speed_mts": 8000,
"board_part_number": "CX71343DAC-WEBF_FB",
"current_speed_mts": 32000,
"current_width": 16
},
"0000:2d:00.1": {
"vendor_id": 5555,
"device_id": 4129,
"class_code": 131072,
"board_part_number": "CX71343DAC-WEBF_FB",
"current_speed_mts": 32000,
"current_width": 16
},
"0000:cd:00.0": {
"vendor_id": 5555,
"device_id": 4129,
"class_code": 131072,
"board_part_number": "CX71343DAC-WEBF_FB",
"current_speed_mts": 32000,
"current_width": 16
},
"0000:cd:00.1": {
"vendor_id": 5555,
"device_id": 4129,
"class_code": 131072,
"board_part_number": "CX71343DAC-WEBF_FB",
"current_speed_mts": 32000,
"current_width": 16
}
},
Expand All @@ -47,62 +78,83 @@
"bootConfig": {
"ethtoolByInterface": {
"eth0": {
"driver": "bnxt_en",
"driver_version": "5.19.0-0_fbk9_fbaccel.phvuisqkh",
"firmware_version": "220.0.59.0/pkg 220.0.83.0",
"bus_info": "0000:b3:00.0",
"speed_mbps": 100000
"driver": "mlx5_core",
"driver_version": "5.19.0-0_fbk21_12633_g4db063a1b",
"firmware_version": "28.39.1002 (FB_0000000038)",
"bus_info": "0000:2d:00.0",
"speed_mbps": 200000
},
"eth1": {
"driver": "mlx5_core",
"driver_version": "5.19.0-0_fbk21_12633_g4db063a1b",
"firmware_version": "28.39.1002 (FB_0000000038)",
"bus_info": "0000:2d:00.1",
"speed_mbps": 200000
},
"eth2": {
"driver": "mlx5_core",
"driver_version": "5.19.0-0_fbk21_12633_g4db063a1b",
"firmware_version": "28.39.1002 (FB_0000000038)",
"bus_info": "0000:cd:00.0",
"speed_mbps": 200000
},
"eth3": {
"driver": "mlx5_core",
"driver_version": "5.19.0-0_fbk21_12633_g4db063a1b",
"firmware_version": "28.39.1002 (FB_0000000038)",
"bus_info": "0000:cd:00.1",
"speed_mbps": 200000
}
}
},
"runtimeConfig": {
"hasHighPrivCert": true,
"regionRoutableCluster": "cln1.02",
"regionRoutableCluster": "cco1.02",
"block_devices": {
"block_devices": {
"nvme0n1": {
"size_bytes": 400088457216,
"size_bytes": 3760740458496,
"is_rotational": false,
"model": "INTEL SSDPFR1Q400GBF",
"serial": "PHAK2260006X400AGN",
"physical_block_size": 512,
"logical_block_size": 512,
"is_root": true
"model": "MZOL63T8HDLT-00AFB",
"serial": "S77UNG0W800873",
"physical_block_size": 4096,
"logical_block_size": 4096,
"is_root": false
},
"nvme1n1": {
"size_bytes": 400088457216,
"size_bytes": 3760740458496,
"is_rotational": false,
"model": "INTEL SSDPFR1Q400GBF",
"serial": "PHAK2256007W400AGN",
"physical_block_size": 512,
"logical_block_size": 512,
"model": "MZOL63T8HDLT-00AFB",
"serial": "S77UNG0W801214",
"physical_block_size": 4096,
"logical_block_size": 4096,
"is_root": true
},
"nvme2n1": {
"size_bytes": 400088457216,
"size_bytes": 3760740458496,
"is_rotational": false,
"model": "INTEL SSDPFR1Q400GBF",
"serial": "PHAK22600049400AGN",
"physical_block_size": 512,
"logical_block_size": 512,
"is_root": true
"model": "MZOL63T8HDLT-00AFB",
"serial": "S77UNG0W800880",
"physical_block_size": 4096,
"logical_block_size": 4096,
"is_root": false
},
"nvme3n1": {
"size_bytes": 400088457216,
"size_bytes": 3760740458496,
"is_rotational": false,
"model": "INTEL SSDPFR1Q400GBF",
"serial": "PHAK226000VR400AGN",
"physical_block_size": 512,
"logical_block_size": 512,
"is_root": true
"model": "MZOL63T8HDLT-00AFB",
"serial": "S77UNG0W800871",
"physical_block_size": 4096,
"logical_block_size": 4096,
"is_root": false
},
"nvme4n1": {
"size_bytes": 506989088768,
"size_bytes": 3760740458496,
"is_rotational": false,
"model": "KXG6AZNV512G TOSHIBA",
"serial": "82AFC00JF5F5",
"physical_block_size": 512,
"logical_block_size": 512,
"model": "MZOL63T8HDLT-00AFB",
"serial": "S77UNG0W800872",
"physical_block_size": 4096,
"logical_block_size": 4096,
"is_root": false
}
}
Expand All @@ -117,7 +169,13 @@
],
"device_nics_enum": [
"ETH0",
"SVC0"
"ETH1",
"ETH2",
"ETH3",
"SVC0",
"SVC0_1",
"SVC0_2",
"SVC0_3"
],
"loaded_kernel_modules": []
},
Expand Down
Loading

0 comments on commit 61825d4

Please sign in to comment.