Skip to content

Commit

Permalink
intel_extreme: Add RC6 downclocking support
Browse files Browse the repository at this point in the history
* Generation 6 (SandyBridge) and later support
  automatic downclocking of the GPU offering
  substantial battery use reductions.
* As we're playing with fire here, only use on
  mobile devices SandyBridge or later.
* This is testing stable on my SandyBridge laptop,
  however I need further confirmation of the
  functionality of this.
* Move clock gating into a function in the power.cpp
  file
  • Loading branch information
kallisti5 committed Dec 31, 2012
1 parent 87466cc commit e71af5a
Show file tree
Hide file tree
Showing 4 changed files with 306 additions and 37 deletions.
2 changes: 1 addition & 1 deletion src/add-ons/kernel/drivers/graphics/intel_extreme/Jamfile
Expand Up @@ -11,9 +11,9 @@ KernelAddon intel_extreme :
driver.cpp
device.cpp
intel_extreme.cpp
power.cpp

kernel_cpp.cpp

: libgraphicscommon.a
;

Expand Down
Expand Up @@ -10,16 +10,16 @@
#include "intel_extreme.h"

#include "AreaKeeper.h"
#include "driver.h"
#include "utility.h"

#include <unistd.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>

#include <driver_settings.h>
#include <util/kernel_cpp.h>
#include "utility.h"

#include "driver.h"
#include "power.h"


#define TRACE_INTELEXTREME
Expand Down Expand Up @@ -296,38 +296,11 @@ intel_extreme_init(intel_info &info)
primary.offset = (addr_t)primary.base - info.aperture_base;
}

// Clock gating
// Fix some problems on certain chips (taken from X driver)
// TODO: clean this up
if (info.pci->device_id == 0x2a02 || info.pci->device_id == 0x2a12) {
TRACE("i965GM/i965GME quirk\n");
write32(info, 0x6204, (1L << 29));
} else if (info.device_type.InGroup(INTEL_TYPE_SNB)) {
TRACE("SandyBridge clock gating\n");
write32(info, 0x42020, (1L << 28) | (1L << 7) | (1L << 5));
} else if (info.device_type.InGroup(INTEL_TYPE_IVB)) {
TRACE("IvyBridge clock gating\n");
write32(info, 0x42020, (1L << 28));
} else if (info.device_type.InGroup(INTEL_TYPE_ILK)) {
TRACE("IronLake clock gating\n");
write32(info, 0x42020, (1L << 7) | (1L << 5));
} else if (info.device_type.InGroup(INTEL_TYPE_G4x)) {
TRACE("G4x clock gating\n");
write32(info, 0x6204, 0);
write32(info, 0x6208, (1L << 9) | (1L << 7) | (1L << 6));
write32(info, 0x6210, 0);

uint32 gateValue = (1L << 28) | (1L << 3) | (1L << 2);
if ((info.device_type.type & INTEL_TYPE_MOBILE) == INTEL_TYPE_MOBILE) {
TRACE("G4x mobile clock gating\n");
gateValue |= 1L << 18;
}
write32(info, 0x6200, gateValue);
} else {
TRACE("i965 quirk\n");
write32(info, 0x6204, (1L << 29) | (1L << 23));
}
write32(info, 0x7408, 0x10);
// Enable clock gating
intel_en_gating(info);

// Enable automatic gpu downclocking if we can to save power
intel_en_downclock(info);

// no errors, so keep areas and mappings
sharedCreator.Detach();
Expand Down
175 changes: 175 additions & 0 deletions src/add-ons/kernel/drivers/graphics/intel_extreme/power.cpp
@@ -0,0 +1,175 @@
/*
* Copyright 2012-2013, Haiku, Inc. All Rights Reserved.
* Distributed under the terms of the MIT License.
*
* Authors:
* Alexander von Gluck IV, kallisti5@unixzen.com
*/


#include "power.h"


#undef TRACE
#define TRACE_POWER
#ifdef TRACE_POWER
# define TRACE(x...) dprintf("intel_extreme:" x)
#else
# define TRACE(x...)
#endif

#define ERROR(x...) dprintf("intel_extreme: " x)
#define CALLED(x...) TRACE("CALLED %s\n", __PRETTY_FUNCTION__)


status_t
intel_en_gating(intel_info &info)
{
CALLED();
// Fix some problems on certain chips (taken from X driver)
// TODO: clean this up
if (info.pci->device_id == 0x2a02 || info.pci->device_id == 0x2a12) {
TRACE("i965GM/i965GME quirk\n");
write32(info, 0x6204, (1L << 29));
} else if (info.device_type.InGroup(INTEL_TYPE_SNB)) {
TRACE("SandyBridge clock gating\n");
write32(info, 0x42020, (1L << 28) | (1L << 7) | (1L << 5));
} else if (info.device_type.InGroup(INTEL_TYPE_IVB)) {
TRACE("IvyBridge clock gating\n");
write32(info, 0x42020, (1L << 28));
} else if (info.device_type.InGroup(INTEL_TYPE_ILK)) {
TRACE("IronLake clock gating\n");
write32(info, 0x42020, (1L << 7) | (1L << 5));
} else if (info.device_type.InGroup(INTEL_TYPE_G4x)) {
TRACE("G4x clock gating\n");
write32(info, 0x6204, 0);
write32(info, 0x6208, (1L << 9) | (1L << 7) | (1L << 6));
write32(info, 0x6210, 0);

uint32 gateValue = (1L << 28) | (1L << 3) | (1L << 2);
if ((info.device_type.type & INTEL_TYPE_MOBILE) == INTEL_TYPE_MOBILE) {
TRACE("G4x mobile clock gating\n");
gateValue |= 1L << 18;
}
write32(info, 0x6200, gateValue);
} else {
TRACE("i965 quirk\n");
write32(info, 0x6204, (1L << 29) | (1L << 23));
}
write32(info, 0x7408, 0x10);

return B_OK;
}


status_t
intel_en_downclock(intel_info &info)
{
CALLED();

if (!info.device_type.InGroup(INTEL_TYPE_SNB)
&& !info.device_type.InGroup(INTEL_TYPE_IVB)) {
TRACE("%s: Downclocking not supported on this chipset.\n", __func__);
return B_NOT_ALLOWED;
}

if((info.device_type.type & INTEL_TYPE_MOBILE) == 0) {
// I don't see a point enabling auto-downclocking on non-mobile devices.
TRACE("%s: Skip GPU downclocking on non-mobile device.\n", __func__);
return B_NOT_ALLOWED;
}
// TODO: Check for deep RC6
// IvyBridge, SandyBridge, and Haswell can do depth 1 atm
// Some chipsets can go deeper... but this is safe for now
// Haswell should *NOT* do over depth 1;
int depth = 1;

// Lets always print this for now incase it causes regressions for someone.
ERROR("%s: Enabling Intel GPU auto downclocking depth %d\n", __func__,
depth);

/* Magical sequence of register writes to enable
* downclocking from the fine folks at Xorg
*/
write32(info, INTEL6_RC_STATE, 0);

uint32 rpStateCapacity = read32(info, INTEL6_RP_STATE_CAP);
uint32 gtPerfStatus = read32(info, INTEL6_GT_PERF_STATUS);
uint8 maxDelay = rpStateCapacity & 0xff;
uint8 minDelay = (rpStateCapacity & 0xff0000) >> 16;

write32(info, INTEL6_RC_CONTROL, 0);

write32(info, INTEL6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
write32(info, INTEL6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
write32(info, INTEL6_RC6pp_WAKE_RATE_LIMIT, 30);
write32(info, INTEL6_RC_EVALUATION_INTERVAL, 125000);
write32(info, INTEL6_RC_IDLE_HYSTERSIS, 25);

// TODO: Idle each ring

write32(info, INTEL6_RC_SLEEP, 0);
write32(info, INTEL6_RC1e_THRESHOLD, 1000);
write32(info, INTEL6_RC6_THRESHOLD, 50000);
write32(info, INTEL6_RC6p_THRESHOLD, 100000);
write32(info, INTEL6_RC6pp_THRESHOLD, 64000);

uint32 rc6Mask = INTEL6_RC_CTL_RC6_ENABLE;

if (depth > 1)
rc6Mask |= INTEL6_RC_CTL_RC6p_ENABLE;
if (depth > 2)
rc6Mask |= INTEL6_RC_CTL_RC6pp_ENABLE;

write32(info, INTEL6_RC_CONTROL, rc6Mask | INTEL6_RC_CTL_EI_MODE(1)
| INTEL6_RC_CTL_HW_ENABLE);
write32(info, INTEL6_RPNSWREQ, INTEL6_FREQUENCY(10) | INTEL6_OFFSET(0)
| INTEL6_AGGRESSIVE_TURBO);
write32(info, INTEL6_RC_VIDEO_FREQ, INTEL6_FREQUENCY(12));

write32(info, INTEL6_RP_DOWN_TIMEOUT, 1000000);
write32(info, INTEL6_RP_INTERRUPT_LIMITS, maxDelay << 24 | minDelay << 16);

write32(info, INTEL6_RP_UP_THRESHOLD, 59400);
write32(info, INTEL6_RP_DOWN_THRESHOLD, 245000);
write32(info, INTEL6_RP_UP_EI, 66000);
write32(info, INTEL6_RP_DOWN_EI, 350000);

write32(info, INTEL6_RP_IDLE_HYSTERSIS, 10);
write32(info, INTEL6_RP_CONTROL, INTEL6_RP_MEDIA_TURBO
| INTEL6_RP_MEDIA_HW_NORMAL_MODE | INTEL6_RP_MEDIA_IS_GFX
| INTEL6_RP_ENABLE | INTEL6_RP_UP_BUSY_AVG
| INTEL6_RP_DOWN_IDLE_CONT);
// TODO: | (HASWELL ? GEN7_RP_DOWN_IDLE_AVG : INTEL6_RP_DOWN_IDLE_CONT));

// TODO: wait for (read32(INTEL6_PCODE_MAILBOX) & INTEL6_PCODE_READY)
write32(info, INTEL6_PCODE_DATA, 0);
write32(info, INTEL6_PCODE_MAILBOX, INTEL6_PCODE_READY
| INTEL6_PCODE_WRITE_MIN_FREQ_TABLE);
// TODO: wait for (read32(INTEL6_PCODE_MAILBOX) & INTEL6_PCODE_READY)

// TODO: check for overclock support and set.

// Calculate limits and enforce them
uint8 gtPerfShift = (gtPerfStatus & 0xff00) >> 8;
if (gtPerfShift >= maxDelay)
gtPerfShift = maxDelay;
uint32 limits = maxDelay << 24;
if (gtPerfShift <= minDelay) {
gtPerfShift = minDelay;
limits |= minDelay << 16;
}
write32(info, INTEL6_RP_INTERRUPT_LIMITS, limits);

write32(info, INTEL6_RPNSWREQ, INTEL6_FREQUENCY(gtPerfShift)
| INTEL6_OFFSET(0) | INTEL6_AGGRESSIVE_TURBO);

// Requires MSI to be enabled.
write32(info, INTEL6_PMIER, INTEL6_PM_DEFERRED_EVENTS);
// TODO: Review need for spin lock irq rps here?
write32(info, INTEL6_PMIMR, 0);
// TODO: Review need for spin unlock irq rps here?
write32(info, INTEL6_PMINTRMSK, 0);

return B_OK;
}
121 changes: 121 additions & 0 deletions src/add-ons/kernel/drivers/graphics/intel_extreme/power.h
@@ -0,0 +1,121 @@
/*
* Copyright 2012-2013, Haiku, Inc. All Rights Reserved.
* Distributed under the terms of the MIT License.
*
* Authors:
* Alexander von Gluck IV, kallisti5@unixzen.com
*/
#ifndef _INTEL_POWER_H_
#define _INTEL_POWER_H_


#include <string.h>

#include "driver.h"


// Clocking configuration
#define INTEL6_GT_THREAD_STATUS_REG 0x13805c
#define INTEL6_GT_THREAD_STATUS_CORE_MASK 0x7
#define INTEL6_GT_THREAD_STATUS_CORE_MASK_HSW (0x7 | (0x07 << 16))
#define INTEL6_GT_PERF_STATUS 0x145948
#define INTEL6_RP_STATE_LIMITS 0x145994
#define INTEL6_RP_STATE_CAP 0x145998
#define INTEL6_RPNSWREQ 0xA008
#define INTEL6_TURBO_DISABLE (1<<31)
#define INTEL6_FREQUENCY(x) ((x)<<25)
#define INTEL6_OFFSET(x) ((x)<<19)
#define INTEL6_AGGRESSIVE_TURBO (0<<15)
#define INTEL6_RC_VIDEO_FREQ 0xA00C
#define INTEL6_RC_CONTROL 0xA090
#define INTEL6_RC_CTL_RC6pp_ENABLE (1<<16)
#define INTEL6_RC_CTL_RC6p_ENABLE (1<<17)
#define INTEL6_RC_CTL_RC6_ENABLE (1<<18)
#define INTEL6_RC_CTL_RC1e_ENABLE (1<<20)
#define INTEL6_RC_CTL_RC7_ENABLE (1<<22)
#define INTEL6_RC_CTL_EI_MODE(x) ((x)<<27)
#define INTEL6_RC_CTL_HW_ENABLE (1<<31)
#define INTEL6_RP_DOWN_TIMEOUT 0xA010
#define INTEL6_RP_INTERRUPT_LIMITS 0xA014
#define INTEL6_RPSTAT1 0xA01C
#define INTEL6_CAGF_SHIFT 8
#define INTEL6_CAGF_MASK (0x7f << INTEL6_CAGF_SHIFT)
#define INTEL6_RP_CONTROL 0xA024
#define INTEL6_RP_MEDIA_TURBO (1<<11)
#define INTEL6_RP_MEDIA_MODE_MASK (3<<9)
#define INTEL6_RP_MEDIA_HW_TURBO_MODE (3<<9)
#define INTEL6_RP_MEDIA_HW_NORMAL_MODE (2<<9)
#define INTEL6_RP_MEDIA_HW_MODE (1<<9)
#define INTEL6_RP_MEDIA_SW_MODE (0<<9)
#define INTEL6_RP_MEDIA_IS_GFX (1<<8)
#define INTEL6_RP_ENABLE (1<<7)
#define INTEL6_RP_UP_IDLE_MIN (0x1<<3)
#define INTEL6_RP_UP_BUSY_AVG (0x2<<3)
#define INTEL6_RP_UP_BUSY_CONT (0x4<<3)
#define GEN7_RP_DOWN_IDLE_AVG (0x2<<0)
#define INTEL6_RP_DOWN_IDLE_CONT (0x1<<0)
#define INTEL6_RP_UP_THRESHOLD 0xA02C
#define INTEL6_RP_DOWN_THRESHOLD 0xA030
#define INTEL6_RP_CUR_UP_EI 0xA050
#define INTEL6_CURICONT_MASK 0xffffff
#define INTEL6_RP_CUR_UP 0xA054
#define INTEL6_CURBSYTAVG_MASK 0xffffff
#define INTEL6_RP_PREV_UP 0xA058
#define INTEL6_RP_CUR_DOWN_EI 0xA05C
#define INTEL6_CURIAVG_MASK 0xffffff
#define INTEL6_RP_CUR_DOWN 0xA060
#define INTEL6_RP_PREV_DOWN 0xA064
#define INTEL6_RP_UP_EI 0xA068
#define INTEL6_RP_DOWN_EI 0xA06C
#define INTEL6_RP_IDLE_HYSTERSIS 0xA070
#define INTEL6_RC_STATE 0xA094
#define INTEL6_RC1_WAKE_RATE_LIMIT 0xA098
#define INTEL6_RC6_WAKE_RATE_LIMIT 0xA09C
#define INTEL6_RC6pp_WAKE_RATE_LIMIT 0xA0A0
#define INTEL6_RC_EVALUATION_INTERVAL 0xA0A8
#define INTEL6_RC_IDLE_HYSTERSIS 0xA0AC
#define INTEL6_RC_SLEEP 0xA0B0
#define INTEL6_RC1e_THRESHOLD 0xA0B4
#define INTEL6_RC6_THRESHOLD 0xA0B8
#define INTEL6_RC6p_THRESHOLD 0xA0BC
#define INTEL6_RC6pp_THRESHOLD 0xA0C0
#define INTEL6_PMINTRMSK 0xA168
#define INTEL6_PMISR 0x44020
#define INTEL6_PMIMR 0x44024 /* rps_lock */
#define INTEL6_PMIIR 0x44028
#define INTEL6_PMIER 0x4402C
#define INTEL6_PM_MBOX_EVENT (1<<25)
#define INTEL6_PM_THERMAL_EVENT (1<<24)
#define INTEL6_PM_RP_DOWN_TIMEOUT (1<<6)
#define INTEL6_PM_RP_UP_THRESHOLD (1<<5)
#define INTEL6_PM_RP_DOWN_THRESHOLD (1<<4)
#define INTEL6_PM_RP_UP_EI_EXPIRED (1<<2)
#define INTEL6_PM_RP_DOWN_EI_EXPIRED (1<<1)
#define INTEL6_PM_DEFERRED_EVENTS (INTEL6_PM_RP_UP_THRESHOLD \
| INTEL6_PM_RP_DOWN_THRESHOLD \
| INTEL6_PM_RP_DOWN_TIMEOUT)
#define INTEL6_GT_GFX_RC6_LOCKED 0x138104
#define INTEL6_GT_GFX_RC6 0x138108
#define INTEL6_GT_GFX_RC6p 0x13810C
#define INTEL6_GT_GFX_RC6pp 0x138110
#define INTEL6_PCODE_MAILBOX 0x138124
#define INTEL6_PCODE_READY (1<<31)
#define INTEL6_READ_OC_PARAMS 0xc
#define INTEL6_PCODE_WRITE_MIN_FREQ_TABLE 0x8
#define INTEL6_PCODE_READ_MIN_FREQ_TABLE 0x9
#define INTEL6_PCODE_DATA 0x138128
#define INTEL6_PCODE_FREQ_IA_RATIO_SHIFT 8
#define INTEL6_GT_CORE_STATUS 0x138060
#define INTEL6_CORE_CPD_STATE_MASK (7<<4)
#define INTEL6_RCn_MASK 7
#define INTEL6_RC0 0
#define INTEL6_RC3 2
#define INTEL6_RC6 3
#define INTEL6_RC7 4


status_t intel_en_gating(intel_info &info);
status_t intel_en_downclock(intel_info &info);


#endif /* _INTEL_POWER_H_ */

0 comments on commit e71af5a

Please sign in to comment.