In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
%matplotlib notebook

In [None]:
!ls ../input/allgrades

In [None]:
data = pd.read_excel('../input/allgrades/AllGrades.xlsx')

In [None]:
data.head()

In [None]:
set_class = list(set(data['Class']))
set_class.sort()
set_class

In [None]:
data['Class'] = data['Class'].apply(set_class.index)
data.head()

In [None]:
y = data['Final Exam']

In [None]:
x_1 = data['Test1']
x_2 = data['Test2']
fig = plt.figure()

ax = fig.add_subplot(111, projection='3d')
ax.scatter(x_1, y, x_2, c='red')
plt.show()

Loss function : mse $$l(W) = \frac{1}{2m}\sum_{i=1}^m(y_{true} - y_{predict})^2 = 1/2 * || y_{true} - y_{predict} ||_2^2$$

Loss function với bài toán linear regression : $$l(W) = \frac{1}{2m}\sum_{i=1}^m(y_true - X_{[i]} *W_{[i]}) = 1/2 * || y_{true} - X*W ||_2^2$$

In [None]:
def loss_function(y_true,y_predict):
    loss = 1/2 * (y_predict - y_true)**2
    total_loss = sum(loss)
    return sum(loss)

Tìm bộ weights : $W$ với phương trình đơn biến 

$$f(x_{test_3}) = y_{fina\ exam} = w_{1\ test_3} * x_{test_3} + w_{0\ test_3}$$

Đạo hàm của lossfunction : $$ \frac{∂L(w)}{∂w}=X^T(Xw-Y) $$
Đạo hàm = 0 tương đương với : $$ X^TXw=X^Ty $$
Khi này ta tính được $$ w=A^†b=(X^TX)^†X^Ty
  $$

In [None]:

X = data[['Test3']]
one = np.ones((X.shape[0], 1))
Xbar = np.concatenate((one, X), axis = 1)
A = np.dot(Xbar.T, Xbar)
b = np.dot(Xbar.T, y)
w = np.dot(np.linalg.pinv(A), b)
print('w = ', w)

# Plot
x0 = np.linspace(0, 10, 2)
y0 = w[0] + w[1]*x0
plt.plot(X, y.T, 'ro')   
plt.plot(x0, y0)          
plt.xlabel('Test3')
plt.ylabel('Final Exam')
plt.show()

Predict using single features

In [None]:
predict = w[0] + w[1]*data['Test3']
predict.head()

loss with single feature

In [None]:
total_loss = loss_function(data['Final Exam'],predict)
print('Total loss : {}'.format(total_loss))

## 2 Features

In [None]:
X = data[['Test3','Test2']]
one = np.ones((X.shape[0], 1))
Xbar = np.concatenate((one, X), axis = 1)
A = np.dot(Xbar.T, Xbar)
b = np.dot(Xbar.T, data['Final Exam'])
w = np.dot(np.linalg.pinv(A), b) #pseudo inverse
print('w = ', w)
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(X['Test2'], y, X['Test3'], c='red')
x_0 = np.linspace(0, 10, 2)
y_0 = np.linspace(0, 10, 2)
# z_0 = w_0 + w_1 *x_0 + w[2] * z_0
Z = w[0] * data['Test3'] + w[1] * data['Test2'] + w[2]
ax.plot_trisurf(X['Test2'], X['Test3'], Z, alpha=0.2)

plt.show()

In [None]:
predict = w[0] + w[1]*data['Test3'] + w[2] * data['Test2']
predict.head()

In [None]:
total_loss = loss_function(data['Final Exam'],predict)
print('Total loss : {}'.format(total_loss))

## All features

In [None]:
X = data[['Class','Assignment1','Assignment2','Assignment3','Test1','Test2','Test3','Final Exam']]
one = np.ones((X.shape[0], 1))
Xbar = np.concatenate((one, X), axis = 1)
A = np.dot(Xbar.T, Xbar)
b = np.dot(Xbar.T, y)
w = np.dot(np.linalg.pinv(A), b)
print('w = ', w)

In [None]:
predict = w[0] + (w[1:] * X).sum(axis=1)
total_loss = loss_function(data['Final Exam'],predict)
print('Total loss : {}'.format(total_loss))

# Implement with keras

In [None]:
import keras
import tensorflow as tf

In [None]:
model = keras.Sequential([
    keras.layers.Dense(64, activation='linear', input_shape=[7]),
    keras.layers.Dense(1)
  ])
 

In [None]:
def K_loss_function(y_true,y_predict):
    return keras.backend.abs(y_true-y_predict)

model.compile(loss=K_loss_function,
                optimizer='sgd',
                metrics=['mean_absolute_error'])

In [None]:
X = data[['Class','Assignment1','Assignment2','Assignment3','Test1','Test2','Test3']]
Y = data['Final Exam']
model.fit(X,Y,epochs=15)

In [None]:
predict = model.predict(X)
total_loss = loss_function(data['Final Exam'],predict[:,0])
print('Total loss : {}'.format(total_loss))

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [None]:
allgrades = pd.read_excel('../input/allgrades/AllGrades.xlsx')

In [None]:
allgrades.head()

In [None]:
allgrades.info()

In [None]:
allgrades.loc[allgrades['Class'] == 'A'].mean()

In [None]:
allgrades.loc[allgrades['Class'] == 'A'].max()

In [None]:
allgrades.loc[(allgrades['Class'] == 'A') & (allgrades['Final Exam'] == 2.2)]

In [None]:
sns.barplot(data=allgrades, x='Class', y='Final Exam')

In [None]:
# map class to int
# mapping_dict = {
#     'A':0,
#     'B':1,
#     'C':2,
#     'D':3,
#     'E':4,
#     'G':5
# }

mapping_dict = {
    'B':0,
    'C':1,
    'G':2,
    'E':3,
    'A':4,
    'D':5
}

In [None]:
int_class = allgrades['Class'].map(mapping_dict)

In [None]:
allgrades['Class'] = int_class

In [None]:
allgrades.head()

In [None]:
# build model
my_model = LinearRegression()

In [None]:
# train test split
X = allgrades.drop('Final Exam', axis=1)
y = allgrades['Final Exam']
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=0.3,
                                                    random_state=1234)

In [None]:
# training
my_model.fit(X_train, y_train)

In [None]:
# evaluation
# on train set:
y_train_pred = my_model.predict(X_train)
print('Training MSE: ', mean_squared_error(y_train, y_train_pred))
# on test set
y_test_pred = my_model.predict(X_test)
print('Testing MSE: ', mean_squared_error(y_test, y_test_pred))

In [None]:
1.75**0.5

In [None]:
print('Training Loss BinhNA: ', 2*loss_function(y_train, y_train_pred)/len(allgrades))