authors: [Ali Ismail-Fawaz](https://hadifawaz1999.github.io/) and [Germain Forestier](https://germain-forestier.info/)

# Deep Learning: Introduction

## Import used packages

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

## Downloading the dataset

In [None]:
!wget https://hadifawaz1999.github.io/teaching/Ensisa/2A-Info/DeepLearning/datasets/pokemon-stats-data.csv

## Reading the data

In [None]:
df = pd.read_csv("pokemon-stats-data.csv")

df = df.dropna()
df.head()

## Extract the features to be used

In [None]:
X = df["sp_attack"]
Y = df["sp_defense"]

print("Number of samples in the dataset is "+str(len(X)))

## Split the dataset into train and test sets

In [None]:
xtrain, xtest, ytrain, ytest = train_test_split(X, Y, test_size=33/100, random_state=42)

print("Number of training examples is "+str(len(xtrain)))
print("Number of testing examples is "+str(len(xtest)))

## Plot the 2D attributes

In [None]:
plt.figure()
plt.scatter(xtrain, ytrain)
plt.xlabel("Attack (x)")
plt.ylabel("Defense (y)")
plt.show()
plt.close()

## Define your linear model as $ypred=w.x$

In [None]:
def linear_model(x, w):
  ypred = w * x
  return ypred

## Define your error function (cost function) as $average((y-ypred)^2)$

In [None]:
def error_function(y, ypred):
  error = np.square(y - ypred).mean()
  return error

## Try multiple values of $w$ and calculate the error function with respect to $w$

### Define the the list of possible values of $w$ ranging from $-2.0$ to $8.0$

In [None]:
w_s = np.arange(start=-2.0, stop=8.0, step=0.01)
print(w_s)

### Define the empty list of errors for each value in $w_s$

In [None]:
list_errors = np.zeros(shape=(w_s.shape), dtype=np.float32)

### Go through all values in $w_s$ and calculate the error

In [None]:
for i in range(len(w_s)):
  w = w_s[i]
  ypred = linear_model(xtrain, w)
  list_errors[i] = error_function(ytrain, ypred)

### Plot the error function with respect to the values of $w_s$

In [None]:
plt.figure()
plt.plot(w_s, list_errors)
plt.xlabel(r'$w$')
plt.ylabel("error")
plt.show()
plt.close()

### Find the $w$ that minimizes the error

In [None]:
index_w_best = list_errors.argmin()
error_train_min = list_errors.min()
w_best = w_s[index_w_best]

print("For w = "+str(w_best)+", the error on all training examples is = "+str(error_train_min))

### Evaluate the linear model for $w=w_{best}$ on the test set

In [None]:
ypred = linear_model(xtest, w_best)
error_test = error_function(ytest, ypred)
print("For w = "+str(w_best)+", the error on all testing examples is = "+str(error_test))

### Visualize the prediction with respect to the true values on the test set

In [None]:
min_ypred, min_ytest = np.min(ypred), np.min(ytest)
max_ypred, max_ytest = np.max(ypred), np.max(ytest)

plt.figure()
plt.scatter(ytest, ypred)
plt.xlim(min(min_ypred, min_ytest), max(max_ypred, max_ytest))
plt.ylim(min(min_ypred, min_ytest), max(max_ypred, max_ytest))
plt.xlabel("Ground truth ytest")
plt.ylabel("Predictions ypred")
plt.show()
plt.close()

### Plot the straight line $y=w_{best} . x$

In [None]:
# Choose min and max borders
x1 = np.min(xtrain)
x2 = np.max(xtrain)

y1 = linear_model(x1, w_best)
y2 = linear_model(x2, w_best)

plt.figure()
plt.scatter(xtrain, ytrain)
plt.plot([x1,x2],[y1,y2], lw=3, color='red')
plt.xlabel("Attack (x)")
plt.ylabel("Defense (y)")
plt.show()
plt.close()

# Exercise

## How can we improve the precision (reduce error) on test set ?

In [None]:
# your code here