<img style="float: left;;" src='Figures/iteso.jpg' width="100" height="200"/></a>

# <center> <font color= #000047> Potencias </font> </center>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

df=pd.read_csv('numericos.csv')
df.head()

In [None]:
df.skew()

In [None]:
df.hist(bins=50)

In [None]:
# Compensando la variable '0'
df_copia=df.copy()
x=df['0']-df['0'].min()+1 # Variable desplazada a 1

In [None]:
a=-11
df_copia['0']=x**a
df_copia['0'].skew()

In [None]:
# Compensando la variable '1'
x=df['1']-df['1'].min()+1

In [None]:
a=17
df_copia['1']=x**a
df_copia['1'].skew()

In [None]:
# Compensando la variable '2'
x=df['2']-df['2'].min()+1

In [None]:
a=3
df_copia['2']=x**a
df_copia['2'].skew()

Aplicando una transformación de la forma
$$
x'=x^a,
$$
donde $x\in[1,\infty)$, obtuvimos los siguientes exponentes para cada variable

Variable|Sesgo original|$a$
--|--|--
'0'|6.0|-11
'1'|-2.77|17
'2'|-1.01|3

---

# Familia de transformaciones de potencia
$$
x'=\frac{x^\lambda-1}{\lambda},
$$
haciendo $\displaystyle\lim_{\lambda\rightarrow0}\frac{x^\lambda-1}{\lambda}$ se tiene
$$
x'=\ln(x).
$$

In [None]:
LMB=[-1,0.5,0,2,3,4]
x=np.linspace(0.1,10,100)

In [None]:
def potencia(x,lmb):
    if lmb==0:
        return np.log(x)
    else:
        return (x**lmb-1)/lmb

In [None]:
for lmb in LMB:
    xp=potencia(x,lmb)
    plt.plot(x,xp,label='$x^{('+str(lmb)+')}$')
plt.plot(x,x-1,'--k')
plt.legend()
plt.ylim([-0.1,10])
plt.grid()

## Transformación Box-Cox
$$
x'=\left\{\begin{array}{lr}
\dfrac{x^\lambda-1}{\lambda} & \lambda\neq0\\
\ln(x) & \lambda=0
\end{array}\right.
$$

In [None]:
from scipy.stats import boxcox

In [None]:
sesgo=pd.read_csv('sesgo.csv')
sesgo.head(2)

In [None]:
LMB=np.linspace(-20,20,100)
mejor_exp={}
for v in sesgo:
    x=sesgo[v]-sesgo[v].min()+1 # Variable desplazada
    sp=[pd.DataFrame(boxcox(x,lmb)).skew().values[0] for lmb in LMB]
    idx=np.argmin(np.abs(sp))
    mejor_exp[v]=(LMB[idx],sp[idx]) # (Mejor exponente, mejor sesgo)
mejor_exp

In [None]:
for v in sesgo:
    x=sesgo[v]-sesgo[v].min()+1
    xp,lmb=boxcox(x)
    mejor_exp[v]=(lmb,pd.DataFrame(xp).skew().values[0])
mejor_exp

In [None]:
sesgo.describe()

In [None]:
mejor_exp['-0.5']

In [None]:
x,l=boxcox(sesgo['-0.5'])
l

In [None]:
x,l=boxcox(sesgo['-1.3'])
l

In [None]:
mejor_exp['-1.3']

---
Uso de valores negativos para $x$:
$$
x'=\left\{
  \begin{array}{ll}
  \mathrm{sign}(x)\dfrac{(|x|+1)^\lambda-1}{\lambda},&\lambda\neq0\\
  \mathrm{sign}(x)\ln(|x|+1),&\lambda=0
  \end{array}.
  \right.
$$

In [None]:
x=np.linspace(-10,10,500)
LMB=[-1,0.5,0,2,3,4]

In [None]:
def potenciaS(x,lmb):
  if lmb==0:
    return np.sign(x)*np.log(abs(x)+1)
  else:
    return np.sign(x)*((abs(x)+1)**lmb-1)/lmb

In [None]:
for lmb in LMB:
    xp=potenciaS(x,lmb)
    plt.plot(x,xp,label='exponente: {}'.format(lmb))
plt.plot(x,np.sign(x)*(abs(x)),'--k')
plt.legend()
plt.ylim([-10,10])
plt.grid()

## Yeo-Johnson
$$
x'=\left\{
  \begin{array}{lr}
  \frac{(x+1)^{\lambda_1}-1}{\lambda_1} & \lambda_1\neq0,\quad x\geq0\\
  \ln(x+1) & \lambda_1=0,\quad x\geq0\\
  -\frac{(-x+1)^{\lambda_2}-1}{\lambda_2} & \lambda_2\neq0,\quad x<0\\
  -\ln(-x+1) & \lambda_2=0,\quad x<0.
  \end{array}
  \right.
$$
Donde $\lambda_1+\lambda_2=2$.

Se sustituye $\lambda_2=2-\lambda_1$.
$$
x'=\left\{
  \begin{array}{lr}
  \frac{(x+1)^{\lambda}-1}{\lambda} & \lambda\neq0,\quad x\geq0\\
  \ln(x+1) & \lambda=0,\quad x\geq0\\
  -\frac{(-x+1)^{2-\lambda}-1}{2-\lambda} & \lambda\neq2,\quad x<0\\
  -\ln(-x+1) & \lambda=2,\quad x<0.
  \end{array}
  \right.
$$

In [None]:
from scipy.stats import yeojohnson

In [None]:
for lmb in LMB:
    xp=yeojohnson(x,lmb)
    plt.plot(x,xp,label='exponente: {}'.format(lmb))
plt.plot(x,np.sign(x)*(abs(x)),'--k')
plt.legend()
plt.ylim([-10,10])
plt.grid()

In [None]:
df_copia['0'],l=yeojohnson(df['0'])
df_copia['0'].hist(bins=50)
l,df_copia['0'].skew() # Sesgo resultante sin desplazar = 0.04

In [None]:
df_copia['1'],l=yeojohnson(df['1'])
df_copia['1'].hist(bins=50)
l,df_copia['1'].skew()

In [None]:
df_copia['2'],l=yeojohnson(df['2'])
df_copia['2'].hist(bins=50)
l,df_copia['2'].skew()

---
## Ejemplo Regresión