<center>
<img src="caratula.png" alt="drawing" width="1200"/>
</center>

<br><br><br><br>

# Regresión Lineal

In [None]:
from IPython.display import IFrame
IFrame('https://www.geogebra.org/classic/rhukgajc?embed', width=1400, height=600)

<br><br>

## Función

$$ \Large y = f(\textbf{X}, \theta):$$

$$ \Large \textbf{X}\in\mathbb{R}^n,\,\, y\in\mathbb{R} ,  $$

$$ \Large \theta    \in\mathbb{R}^k. $$ 

## Minimizar el error cuadrático medio

$$ \Large \mathcal{L}(\textbf{X},y,\theta)=\min_{\theta} \sum_{i=1}^M ( \theta_1 x_i + \theta_0 - y_i )^2$$

## Pseudoinversa

$$ \Large \theta=\left( \hat{\textbf{X}}^T\hat{\textbf{X}} \right)^{-1}\hat{\textbf{X}}^Ty$$

### donde
$$ \Large \hat{\textbf{X}} = 
    \begin{pmatrix}
      x_{1,1}& x_{1,2} & \cdots & x_{1,N}& 1\\
      x_{2,1}& x_{2,2} & \cdots & x_{2,N}& 1\\
      \vdots &         & \ddots &        & \vdots\\
      x_{M,1}& x_{M,2} & \cdots & x_{M,N}& 1\\
    \end{pmatrix} $$

## Regresión Lineal $\rightarrow$ Clasificador

In [None]:
IFrame('https://www.geogebra.org/classic/wqhdtz6v?embed', width=1400, height=600)

# Decisión

In [None]:
IFrame('https://www.geogebra.org/classic/ertr6snf?embed', width=1400, height=600)

# Umbral óptimo

In [None]:
IFrame('https://www.geogebra.org/classic/aj8xcpxa?embed', width=1400, height=600)

In [None]:
import pdb

import numpy   as np
import pandas  as pd
import seaborn as sns; sns.set(style="ticks", palette="pastel")

from matplotlib           import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

plt.style.use({  'figure.figsize'    :(12,4),
                 'axes.titlesize'    :20,
                 'axes.titleweight'  :True,
                 'lines.markersize'  :14,
                 'axes.grid'         :True,
                 'axes.labelsize'    :16,
                 'xtick.labelsize'   :14,
                 'xtick.major.width' :True,
                 'ytick.labelsize'   :14,
                 'ytick.major.width' :True, 
                 'lines.linewidth'   :2.5   })

def pseudo_inv (X: np.array, Y: np.array)->np.array:
    """
      Compute the pseudoinverse matrix for regression/classification
        -> pinv = (X^T X)^-1 X^T y
        
      @param X: -> shape = (n_samples,k_features)
      @param Y: -> shape = (n_samples,1)
      
      @return pinv: -> shape = (k_features,1)
    """
    pinv = X.T  @ X            #np.dot( X.T,X )
    pinv = np.linalg.inv(pinv)
    pinv = pinv @ X.T          #np.dot( pinv,X.T )
    pinv = pinv @ Y            #np.dot( pinv,Y   )
    
    return pinv

def extend_x(X: np.array)->np.array:
    """
      Appends a colum of ones to the X matrix
        -> X_ex = [X Ones]
      
      @param X: -> shape = (n_samples,k_features)
      
      @return X_ex: -> shape = (n_samples,k_features+1)
    """
    ones = np.ones( X.shape[0] )[:,np.newaxis]
    
    return np.concatenate( (X,ones), axis=-1 ).copy()

# Ejemplo Toy

In [None]:
data = np.array( [[1.9, 1],
                  [0.7, 1],
                  [2.0, 0],
                  [2.3, 0],
                  [3.0, 0],
                  [2.1, 1],
                  [1.8, 0],
                  [1.1, 1] ])

mask = data[:,1]==1
plt.plot( data[ mask,0],data[ mask,1],'o',color='darkblue', )
plt.plot( data[~mask,0],data[~mask,1],'o',color='orange' )
plt.show()

In [None]:
X_Train, Y_Train = data[:,0:1], data[:,1:2]

W = pseudo_inv( extend_x( X_Train ),Y_Train  )
W

In [None]:
y = np.dot(  extend_x( X_Train ),W  )
plt.plot( X_Train,y )

mask = Y_Train == 0
plt.plot( X_Train[ mask],Y_Train[ mask],'o',color='darkblue' )
plt.plot( X_Train[~mask],Y_Train[~mask],'o',color='orange' )
plt.show()

# Ejemplo 2D

In [None]:
import os

params                     = {}
PATH                       = os.path.abspath('./') + '/'
params['fname_uninormals'] = 'CNIB 2020 TWO UNIV NORMALS 2.csv'

In [None]:
#Leer CSV
data  = pd.read_csv( PATH+params['fname_uninormals'] )

#Graficar:
y     = np.zeros( data.shape[0] )
sns.scatterplot( x='X', y=y, hue='Y', data=data,
                  palette =['forestgreen','darkblue'],
                  s       =400,
                  alpha   =.8)
plt.show()

In [None]:
#Importante
def data_split( data,N ):
    
    P     = data.shape[0]-N
    index = np.arange( data.shape[0] ); np.random.shuffle(index)
    
    Train = data.iloc[index[  :N]]
    Test  = data.iloc[index[-P: ]]
    
    return Train, Test

In [None]:
N     = 30

Train, Test = data_split( data,N )

In [None]:
sns.scatterplot( x='X', y='Y', hue='Y', data=Train,
                  palette =['forestgreen','darkblue'],
                  s       =400,
                  alpha   =.8)

sns.scatterplot( x='X', y='Y', hue='Y', data=Test,
                  palette =['gold','mediumpurple'],
                  s       =200,
                  alpha   =.6)
plt.show()

In [None]:
Train.Y[:10]

In [None]:
X_Train = extend_x( Train.X.values[:,np.newaxis] )
X_Train[:10]

In [None]:
Y_Train = Train.Y.values[:,np.newaxis]
Y_Train[:10]

In [None]:
W = pseudo_inv(X_Train,Y_Train)
W

In [None]:
X_Test = extend_x( Test.X.values[:,np.newaxis] )
Y_Test = Test.Y.values[:,np.newaxis]

In [None]:
L = X_Test @ W
L

In [None]:
L = 2*(L>0.)-1   # 2*(  (X_Test @ W)  < 0.5  ) - 1

In [None]:
def compare(Y, L, ticks=True):
    if ticks:
        for i,j in zip(Y,L):
            res = [i[0], j[0]]
            if res[0] == res[1]:
                res += [u"✓"]
            else:
                res += [u"✘"]
            
            print( "{:4.1f}, {:4.1f}, {}".format(*res) )
    
    plt.pcolormesh(L==Y,cmap='cool')

In [None]:
compare(Y_Test, L)

# Ejemplo 2D $\rightarrow$ ver script 

In [None]:
params['fname_binormals'] = 'CNIB 2020 TWO BIV NORMALS.csv'

data = pd.read_csv( PATH+params['fname_binormals'] )

data['Y'] = data['Y'].astype('int')

In [None]:
#Graficar
sns.scatterplot(x='X_1', y='X_2',hue='Y',data=data)
sum(data.Y<1)

In [None]:
N     = 300

Train, Test = data_split( data,N )

In [None]:
X_Train = Train.loc[:,['X_1','X_2']].values
Y_Train = Train.Y                   .values [:,np.newaxis]

W = pseudo_inv( extend_x(X_Train),Y_Train )
W

In [None]:
X_Test = extend_x( Test.loc[:,['X_1','X_2']].values )
Y_Test =           Test.Y                   .values[:,np.newaxis]

In [None]:
L = X_Test @ W
L = 2*(L>0)-1

compare(Y_Test, L, False)

# Aplicaciones

In [None]:
IFrame('https://www.youtube.com/embed/nW5AOUbvmFE', width=1400, height=600)

<br><br><br>
# Scenario Screen
<center>
<img src="scenario_screen.png" alt="drawing" width="800"/>
</center>

<br><br><br><br><br>

# ¿Hay otros clasificadores basados en la regresión lineal?

## Least Absolute Shrinkage Selector Operator LASSO:
$$\Large \mathcal{L}(\textbf{X},y,\theta)=\min_{\theta} \left( \sum_{i=1}^M ( \theta^T x_i + \theta_0 - y_i )^2 -  \lambda\sum_{j=0}^M \left|\theta_j\right| \right)$$

## Ridge Regression:
$$\Large \mathcal{L}(\textbf{X},y,\theta)=\min_{\theta} \left( \sum_{i=1}^M ( \theta^T x_i + \theta_0 - y_i )^2 -  \lambda\sum_{j=0}^M \left(\theta_j\right)^2 \right)$$

<br><br><br><br><br><br>
<center>
<img src="caratula.png" alt="drawing" width="800"/>
</center>

# [https://www.researchgate.net/profile/Omar_Pina-Ramirez](https://www.researchgate.net/profile/Omar_Pina-Ramirez)
# [https://github.com/delozath/](https://github.com/delozath/)