From e71af5ae6c95d8936efeb5f46640136819e57025 Mon Sep 17 00:00:00 2001 From: Alexander von Gluck IV Date: Mon, 31 Dec 2012 16:35:22 -0600 Subject: [PATCH] intel_extreme: Add RC6 downclocking support * Generation 6 (SandyBridge) and later support automatic downclocking of the GPU offering substantial battery use reductions. * As we're playing with fire here, only use on mobile devices SandyBridge or later. * This is testing stable on my SandyBridge laptop, however I need further confirmation of the functionality of this. * Move clock gating into a function in the power.cpp file --- .../drivers/graphics/intel_extreme/Jamfile | 2 +- .../graphics/intel_extreme/intel_extreme.cpp | 45 +---- .../drivers/graphics/intel_extreme/power.cpp | 175 ++++++++++++++++++ .../drivers/graphics/intel_extreme/power.h | 121 ++++++++++++ 4 files changed, 306 insertions(+), 37 deletions(-) create mode 100644 src/add-ons/kernel/drivers/graphics/intel_extreme/power.cpp create mode 100644 src/add-ons/kernel/drivers/graphics/intel_extreme/power.h diff --git a/src/add-ons/kernel/drivers/graphics/intel_extreme/Jamfile b/src/add-ons/kernel/drivers/graphics/intel_extreme/Jamfile index 54e1a5e9f0b..27611643cca 100644 --- a/src/add-ons/kernel/drivers/graphics/intel_extreme/Jamfile +++ b/src/add-ons/kernel/drivers/graphics/intel_extreme/Jamfile @@ -11,9 +11,9 @@ KernelAddon intel_extreme : driver.cpp device.cpp intel_extreme.cpp + power.cpp kernel_cpp.cpp - : libgraphicscommon.a ; diff --git a/src/add-ons/kernel/drivers/graphics/intel_extreme/intel_extreme.cpp b/src/add-ons/kernel/drivers/graphics/intel_extreme/intel_extreme.cpp index 462976a9984..a6a8c811782 100644 --- a/src/add-ons/kernel/drivers/graphics/intel_extreme/intel_extreme.cpp +++ b/src/add-ons/kernel/drivers/graphics/intel_extreme/intel_extreme.cpp @@ -10,16 +10,16 @@ #include "intel_extreme.h" #include "AreaKeeper.h" -#include "driver.h" -#include "utility.h" - #include #include #include #include - #include #include +#include "utility.h" + +#include "driver.h" +#include "power.h" #define TRACE_INTELEXTREME @@ -296,38 +296,11 @@ intel_extreme_init(intel_info &info) primary.offset = (addr_t)primary.base - info.aperture_base; } - // Clock gating - // Fix some problems on certain chips (taken from X driver) - // TODO: clean this up - if (info.pci->device_id == 0x2a02 || info.pci->device_id == 0x2a12) { - TRACE("i965GM/i965GME quirk\n"); - write32(info, 0x6204, (1L << 29)); - } else if (info.device_type.InGroup(INTEL_TYPE_SNB)) { - TRACE("SandyBridge clock gating\n"); - write32(info, 0x42020, (1L << 28) | (1L << 7) | (1L << 5)); - } else if (info.device_type.InGroup(INTEL_TYPE_IVB)) { - TRACE("IvyBridge clock gating\n"); - write32(info, 0x42020, (1L << 28)); - } else if (info.device_type.InGroup(INTEL_TYPE_ILK)) { - TRACE("IronLake clock gating\n"); - write32(info, 0x42020, (1L << 7) | (1L << 5)); - } else if (info.device_type.InGroup(INTEL_TYPE_G4x)) { - TRACE("G4x clock gating\n"); - write32(info, 0x6204, 0); - write32(info, 0x6208, (1L << 9) | (1L << 7) | (1L << 6)); - write32(info, 0x6210, 0); - - uint32 gateValue = (1L << 28) | (1L << 3) | (1L << 2); - if ((info.device_type.type & INTEL_TYPE_MOBILE) == INTEL_TYPE_MOBILE) { - TRACE("G4x mobile clock gating\n"); - gateValue |= 1L << 18; - } - write32(info, 0x6200, gateValue); - } else { - TRACE("i965 quirk\n"); - write32(info, 0x6204, (1L << 29) | (1L << 23)); - } - write32(info, 0x7408, 0x10); + // Enable clock gating + intel_en_gating(info); + + // Enable automatic gpu downclocking if we can to save power + intel_en_downclock(info); // no errors, so keep areas and mappings sharedCreator.Detach(); diff --git a/src/add-ons/kernel/drivers/graphics/intel_extreme/power.cpp b/src/add-ons/kernel/drivers/graphics/intel_extreme/power.cpp new file mode 100644 index 00000000000..9c4f6c7e8bb --- /dev/null +++ b/src/add-ons/kernel/drivers/graphics/intel_extreme/power.cpp @@ -0,0 +1,175 @@ +/* + * Copyright 2012-2013, Haiku, Inc. All Rights Reserved. + * Distributed under the terms of the MIT License. + * + * Authors: + * Alexander von Gluck IV, kallisti5@unixzen.com + */ + + +#include "power.h" + + +#undef TRACE +#define TRACE_POWER +#ifdef TRACE_POWER +# define TRACE(x...) dprintf("intel_extreme:" x) +#else +# define TRACE(x...) +#endif + +#define ERROR(x...) dprintf("intel_extreme: " x) +#define CALLED(x...) TRACE("CALLED %s\n", __PRETTY_FUNCTION__) + + +status_t +intel_en_gating(intel_info &info) +{ + CALLED(); + // Fix some problems on certain chips (taken from X driver) + // TODO: clean this up + if (info.pci->device_id == 0x2a02 || info.pci->device_id == 0x2a12) { + TRACE("i965GM/i965GME quirk\n"); + write32(info, 0x6204, (1L << 29)); + } else if (info.device_type.InGroup(INTEL_TYPE_SNB)) { + TRACE("SandyBridge clock gating\n"); + write32(info, 0x42020, (1L << 28) | (1L << 7) | (1L << 5)); + } else if (info.device_type.InGroup(INTEL_TYPE_IVB)) { + TRACE("IvyBridge clock gating\n"); + write32(info, 0x42020, (1L << 28)); + } else if (info.device_type.InGroup(INTEL_TYPE_ILK)) { + TRACE("IronLake clock gating\n"); + write32(info, 0x42020, (1L << 7) | (1L << 5)); + } else if (info.device_type.InGroup(INTEL_TYPE_G4x)) { + TRACE("G4x clock gating\n"); + write32(info, 0x6204, 0); + write32(info, 0x6208, (1L << 9) | (1L << 7) | (1L << 6)); + write32(info, 0x6210, 0); + + uint32 gateValue = (1L << 28) | (1L << 3) | (1L << 2); + if ((info.device_type.type & INTEL_TYPE_MOBILE) == INTEL_TYPE_MOBILE) { + TRACE("G4x mobile clock gating\n"); + gateValue |= 1L << 18; + } + write32(info, 0x6200, gateValue); + } else { + TRACE("i965 quirk\n"); + write32(info, 0x6204, (1L << 29) | (1L << 23)); + } + write32(info, 0x7408, 0x10); + + return B_OK; +} + + +status_t +intel_en_downclock(intel_info &info) +{ + CALLED(); + + if (!info.device_type.InGroup(INTEL_TYPE_SNB) + && !info.device_type.InGroup(INTEL_TYPE_IVB)) { + TRACE("%s: Downclocking not supported on this chipset.\n", __func__); + return B_NOT_ALLOWED; + } + + if((info.device_type.type & INTEL_TYPE_MOBILE) == 0) { + // I don't see a point enabling auto-downclocking on non-mobile devices. + TRACE("%s: Skip GPU downclocking on non-mobile device.\n", __func__); + return B_NOT_ALLOWED; + } + // TODO: Check for deep RC6 + // IvyBridge, SandyBridge, and Haswell can do depth 1 atm + // Some chipsets can go deeper... but this is safe for now + // Haswell should *NOT* do over depth 1; + int depth = 1; + + // Lets always print this for now incase it causes regressions for someone. + ERROR("%s: Enabling Intel GPU auto downclocking depth %d\n", __func__, + depth); + + /* Magical sequence of register writes to enable + * downclocking from the fine folks at Xorg + */ + write32(info, INTEL6_RC_STATE, 0); + + uint32 rpStateCapacity = read32(info, INTEL6_RP_STATE_CAP); + uint32 gtPerfStatus = read32(info, INTEL6_GT_PERF_STATUS); + uint8 maxDelay = rpStateCapacity & 0xff; + uint8 minDelay = (rpStateCapacity & 0xff0000) >> 16; + + write32(info, INTEL6_RC_CONTROL, 0); + + write32(info, INTEL6_RC1_WAKE_RATE_LIMIT, 1000 << 16); + write32(info, INTEL6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30); + write32(info, INTEL6_RC6pp_WAKE_RATE_LIMIT, 30); + write32(info, INTEL6_RC_EVALUATION_INTERVAL, 125000); + write32(info, INTEL6_RC_IDLE_HYSTERSIS, 25); + + // TODO: Idle each ring + + write32(info, INTEL6_RC_SLEEP, 0); + write32(info, INTEL6_RC1e_THRESHOLD, 1000); + write32(info, INTEL6_RC6_THRESHOLD, 50000); + write32(info, INTEL6_RC6p_THRESHOLD, 100000); + write32(info, INTEL6_RC6pp_THRESHOLD, 64000); + + uint32 rc6Mask = INTEL6_RC_CTL_RC6_ENABLE; + + if (depth > 1) + rc6Mask |= INTEL6_RC_CTL_RC6p_ENABLE; + if (depth > 2) + rc6Mask |= INTEL6_RC_CTL_RC6pp_ENABLE; + + write32(info, INTEL6_RC_CONTROL, rc6Mask | INTEL6_RC_CTL_EI_MODE(1) + | INTEL6_RC_CTL_HW_ENABLE); + write32(info, INTEL6_RPNSWREQ, INTEL6_FREQUENCY(10) | INTEL6_OFFSET(0) + | INTEL6_AGGRESSIVE_TURBO); + write32(info, INTEL6_RC_VIDEO_FREQ, INTEL6_FREQUENCY(12)); + + write32(info, INTEL6_RP_DOWN_TIMEOUT, 1000000); + write32(info, INTEL6_RP_INTERRUPT_LIMITS, maxDelay << 24 | minDelay << 16); + + write32(info, INTEL6_RP_UP_THRESHOLD, 59400); + write32(info, INTEL6_RP_DOWN_THRESHOLD, 245000); + write32(info, INTEL6_RP_UP_EI, 66000); + write32(info, INTEL6_RP_DOWN_EI, 350000); + + write32(info, INTEL6_RP_IDLE_HYSTERSIS, 10); + write32(info, INTEL6_RP_CONTROL, INTEL6_RP_MEDIA_TURBO + | INTEL6_RP_MEDIA_HW_NORMAL_MODE | INTEL6_RP_MEDIA_IS_GFX + | INTEL6_RP_ENABLE | INTEL6_RP_UP_BUSY_AVG + | INTEL6_RP_DOWN_IDLE_CONT); + // TODO: | (HASWELL ? GEN7_RP_DOWN_IDLE_AVG : INTEL6_RP_DOWN_IDLE_CONT)); + + // TODO: wait for (read32(INTEL6_PCODE_MAILBOX) & INTEL6_PCODE_READY) + write32(info, INTEL6_PCODE_DATA, 0); + write32(info, INTEL6_PCODE_MAILBOX, INTEL6_PCODE_READY + | INTEL6_PCODE_WRITE_MIN_FREQ_TABLE); + // TODO: wait for (read32(INTEL6_PCODE_MAILBOX) & INTEL6_PCODE_READY) + + // TODO: check for overclock support and set. + + // Calculate limits and enforce them + uint8 gtPerfShift = (gtPerfStatus & 0xff00) >> 8; + if (gtPerfShift >= maxDelay) + gtPerfShift = maxDelay; + uint32 limits = maxDelay << 24; + if (gtPerfShift <= minDelay) { + gtPerfShift = minDelay; + limits |= minDelay << 16; + } + write32(info, INTEL6_RP_INTERRUPT_LIMITS, limits); + + write32(info, INTEL6_RPNSWREQ, INTEL6_FREQUENCY(gtPerfShift) + | INTEL6_OFFSET(0) | INTEL6_AGGRESSIVE_TURBO); + + // Requires MSI to be enabled. + write32(info, INTEL6_PMIER, INTEL6_PM_DEFERRED_EVENTS); + // TODO: Review need for spin lock irq rps here? + write32(info, INTEL6_PMIMR, 0); + // TODO: Review need for spin unlock irq rps here? + write32(info, INTEL6_PMINTRMSK, 0); + + return B_OK; +} \ No newline at end of file diff --git a/src/add-ons/kernel/drivers/graphics/intel_extreme/power.h b/src/add-ons/kernel/drivers/graphics/intel_extreme/power.h new file mode 100644 index 00000000000..c75acef70dd --- /dev/null +++ b/src/add-ons/kernel/drivers/graphics/intel_extreme/power.h @@ -0,0 +1,121 @@ +/* + * Copyright 2012-2013, Haiku, Inc. All Rights Reserved. + * Distributed under the terms of the MIT License. + * + * Authors: + * Alexander von Gluck IV, kallisti5@unixzen.com + */ +#ifndef _INTEL_POWER_H_ +#define _INTEL_POWER_H_ + + +#include + +#include "driver.h" + + +// Clocking configuration +#define INTEL6_GT_THREAD_STATUS_REG 0x13805c +#define INTEL6_GT_THREAD_STATUS_CORE_MASK 0x7 +#define INTEL6_GT_THREAD_STATUS_CORE_MASK_HSW (0x7 | (0x07 << 16)) +#define INTEL6_GT_PERF_STATUS 0x145948 +#define INTEL6_RP_STATE_LIMITS 0x145994 +#define INTEL6_RP_STATE_CAP 0x145998 +#define INTEL6_RPNSWREQ 0xA008 +#define INTEL6_TURBO_DISABLE (1<<31) +#define INTEL6_FREQUENCY(x) ((x)<<25) +#define INTEL6_OFFSET(x) ((x)<<19) +#define INTEL6_AGGRESSIVE_TURBO (0<<15) +#define INTEL6_RC_VIDEO_FREQ 0xA00C +#define INTEL6_RC_CONTROL 0xA090 +#define INTEL6_RC_CTL_RC6pp_ENABLE (1<<16) +#define INTEL6_RC_CTL_RC6p_ENABLE (1<<17) +#define INTEL6_RC_CTL_RC6_ENABLE (1<<18) +#define INTEL6_RC_CTL_RC1e_ENABLE (1<<20) +#define INTEL6_RC_CTL_RC7_ENABLE (1<<22) +#define INTEL6_RC_CTL_EI_MODE(x) ((x)<<27) +#define INTEL6_RC_CTL_HW_ENABLE (1<<31) +#define INTEL6_RP_DOWN_TIMEOUT 0xA010 +#define INTEL6_RP_INTERRUPT_LIMITS 0xA014 +#define INTEL6_RPSTAT1 0xA01C +#define INTEL6_CAGF_SHIFT 8 +#define INTEL6_CAGF_MASK (0x7f << INTEL6_CAGF_SHIFT) +#define INTEL6_RP_CONTROL 0xA024 +#define INTEL6_RP_MEDIA_TURBO (1<<11) +#define INTEL6_RP_MEDIA_MODE_MASK (3<<9) +#define INTEL6_RP_MEDIA_HW_TURBO_MODE (3<<9) +#define INTEL6_RP_MEDIA_HW_NORMAL_MODE (2<<9) +#define INTEL6_RP_MEDIA_HW_MODE (1<<9) +#define INTEL6_RP_MEDIA_SW_MODE (0<<9) +#define INTEL6_RP_MEDIA_IS_GFX (1<<8) +#define INTEL6_RP_ENABLE (1<<7) +#define INTEL6_RP_UP_IDLE_MIN (0x1<<3) +#define INTEL6_RP_UP_BUSY_AVG (0x2<<3) +#define INTEL6_RP_UP_BUSY_CONT (0x4<<3) +#define GEN7_RP_DOWN_IDLE_AVG (0x2<<0) +#define INTEL6_RP_DOWN_IDLE_CONT (0x1<<0) +#define INTEL6_RP_UP_THRESHOLD 0xA02C +#define INTEL6_RP_DOWN_THRESHOLD 0xA030 +#define INTEL6_RP_CUR_UP_EI 0xA050 +#define INTEL6_CURICONT_MASK 0xffffff +#define INTEL6_RP_CUR_UP 0xA054 +#define INTEL6_CURBSYTAVG_MASK 0xffffff +#define INTEL6_RP_PREV_UP 0xA058 +#define INTEL6_RP_CUR_DOWN_EI 0xA05C +#define INTEL6_CURIAVG_MASK 0xffffff +#define INTEL6_RP_CUR_DOWN 0xA060 +#define INTEL6_RP_PREV_DOWN 0xA064 +#define INTEL6_RP_UP_EI 0xA068 +#define INTEL6_RP_DOWN_EI 0xA06C +#define INTEL6_RP_IDLE_HYSTERSIS 0xA070 +#define INTEL6_RC_STATE 0xA094 +#define INTEL6_RC1_WAKE_RATE_LIMIT 0xA098 +#define INTEL6_RC6_WAKE_RATE_LIMIT 0xA09C +#define INTEL6_RC6pp_WAKE_RATE_LIMIT 0xA0A0 +#define INTEL6_RC_EVALUATION_INTERVAL 0xA0A8 +#define INTEL6_RC_IDLE_HYSTERSIS 0xA0AC +#define INTEL6_RC_SLEEP 0xA0B0 +#define INTEL6_RC1e_THRESHOLD 0xA0B4 +#define INTEL6_RC6_THRESHOLD 0xA0B8 +#define INTEL6_RC6p_THRESHOLD 0xA0BC +#define INTEL6_RC6pp_THRESHOLD 0xA0C0 +#define INTEL6_PMINTRMSK 0xA168 +#define INTEL6_PMISR 0x44020 +#define INTEL6_PMIMR 0x44024 /* rps_lock */ +#define INTEL6_PMIIR 0x44028 +#define INTEL6_PMIER 0x4402C +#define INTEL6_PM_MBOX_EVENT (1<<25) +#define INTEL6_PM_THERMAL_EVENT (1<<24) +#define INTEL6_PM_RP_DOWN_TIMEOUT (1<<6) +#define INTEL6_PM_RP_UP_THRESHOLD (1<<5) +#define INTEL6_PM_RP_DOWN_THRESHOLD (1<<4) +#define INTEL6_PM_RP_UP_EI_EXPIRED (1<<2) +#define INTEL6_PM_RP_DOWN_EI_EXPIRED (1<<1) +#define INTEL6_PM_DEFERRED_EVENTS (INTEL6_PM_RP_UP_THRESHOLD \ + | INTEL6_PM_RP_DOWN_THRESHOLD \ + | INTEL6_PM_RP_DOWN_TIMEOUT) +#define INTEL6_GT_GFX_RC6_LOCKED 0x138104 +#define INTEL6_GT_GFX_RC6 0x138108 +#define INTEL6_GT_GFX_RC6p 0x13810C +#define INTEL6_GT_GFX_RC6pp 0x138110 +#define INTEL6_PCODE_MAILBOX 0x138124 +#define INTEL6_PCODE_READY (1<<31) +#define INTEL6_READ_OC_PARAMS 0xc +#define INTEL6_PCODE_WRITE_MIN_FREQ_TABLE 0x8 +#define INTEL6_PCODE_READ_MIN_FREQ_TABLE 0x9 +#define INTEL6_PCODE_DATA 0x138128 +#define INTEL6_PCODE_FREQ_IA_RATIO_SHIFT 8 +#define INTEL6_GT_CORE_STATUS 0x138060 +#define INTEL6_CORE_CPD_STATE_MASK (7<<4) +#define INTEL6_RCn_MASK 7 +#define INTEL6_RC0 0 +#define INTEL6_RC3 2 +#define INTEL6_RC6 3 +#define INTEL6_RC7 4 + + +status_t intel_en_gating(intel_info &info); +status_t intel_en_downclock(intel_info &info); + + +#endif /* _INTEL_POWER_H_ */ \ No newline at end of file