In [5]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [6]:
%cd /content/drive/MyDrive/tcb
!ls

/content/drive/MyDrive/tcb
data.csv  ex1  ex2  ex3  linear.ipynb


In [7]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import PolynomialFeatures
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split

In [8]:
data = pd.read_csv('data.csv')

In [9]:
data

Unnamed: 0,Movie,Release Date,Critic 1,Critic 2,Critic 3,Critic 4,Critic 5,Audience Rating
0,Iron Man,2-May-08,4.0,4.0,7.0,6.0,5.0,9.1
1,The Incredible Hulk,13-Jun-08,,,6.0,3.0,5.0,7.0
2,Iron Man 2,7-May-10,3.0,6.0,8.0,6.0,5.0,7.1
3,Thor,6-May-11,4.0,6.0,8.0,5.0,,7.6
4,Captain America: The First Avenger,22-Jul-11,5.0,5.0,,7.0,7.0,7.5
5,Marvel's The Avengers,4-May-12,3.0,8.0,8.0,,9.0,9.1
6,Iron Man 3,3-May-13,3.0,,8.0,,5.0,7.8
7,Thor: The Dark World,8-Nov-13,3.0,3.0,9.0,5.0,5.0,7.5
8,Captain America: The Winter Soldier,4-Apr-14,7.0,7.0,9.0,9.0,,9.2
9,Guardians of the Galaxy,1-Aug-14,5.0,6.0,,9.0,8.0,9.2


In [10]:
data.describe()

Unnamed: 0,Critic 1,Critic 2,Critic 3,Critic 4,Critic 5,Audience Rating
count,16.0,18.0,19.0,17.0,15.0,22.0
mean,4.75,6.666667,7.578947,6.882353,6.866667,8.186364
std,1.612452,2.057983,1.346427,1.964763,2.099887,1.080254
min,3.0,3.0,5.0,3.0,4.0,4.5
25%,3.0,5.25,6.5,5.0,5.0,7.65
50%,4.5,6.5,8.0,7.0,7.0,8.55
75%,6.25,8.0,8.5,9.0,8.5,8.975
max,7.0,10.0,10.0,10.0,10.0,9.2


In [11]:
# Drop cột ko liên quan
data = data.drop(columns = ['Movie', 'Release Date'])

In [12]:
train, test = train_test_split(data, test_size = 0.2, random_state = 42)

In [13]:
for column in train.columns:
    col_mode = train[column].mean()
    train[column].fillna(col_mode, inplace = True)

for column in test.columns:
    col_mode = test[column].mean()
    test[column].fillna(col_mode, inplace = True)

In [14]:
train

Unnamed: 0,Critic 1,Critic 2,Critic 3,Critic 4,Critic 5,Audience Rating
5,3.0,8.0,8.0,7.0,9.0,9.1
20,5.0,6.714286,9.0,5.0,7.272727,4.5
11,3.0,5.0,6.0,7.0,4.0,8.5
3,4.0,6.0,8.0,5.0,7.272727,7.6
4,5.0,5.0,7.6,7.0,7.0,7.5
17,4.692308,7.0,6.0,6.0,8.0,7.9
12,7.0,6.714286,8.0,9.0,7.272727,8.9
18,7.0,7.0,9.0,8.0,9.0,9.1
16,4.692308,10.0,5.0,9.0,7.272727,8.7
2,3.0,6.0,8.0,6.0,5.0,7.1


In [15]:
test

Unnamed: 0,Critic 1,Critic 2,Critic 3,Critic 4,Critic 5,Audience Rating
0,4.0,4.0,7.0,6.0,5.0,9.1
13,4.0,6.0,8.0,6.5,5.0,8.6
8,7.0,7.0,9.0,9.0,5.75,9.2
1,5.0,6.5,6.0,3.0,5.0,7.0
15,5.0,9.0,7.5,8.0,8.0,8.7


In [16]:
# Mũ lên
poly = PolynomialFeatures(degree = 5)

# Tạo df mới(ko có cột target)
train_new = train.drop(columns = ['Audience Rating'])
test_new = test.drop(columns = ['Audience Rating'])

# Transform df
train_new = poly.fit_transform(train_new)
test_new = poly.transform(test_new)
print('Polynomial Features shape: ', train_new.shape)

Polynomial Features shape:  (17, 252)


In [17]:
poly.get_feature_names(input_features = ['Critic 1',	'Critic 2', 'Critic 3',	'Critic 4', 'Critic 5'])[:50]



['1',
 'Critic 1',
 'Critic 2',
 'Critic 3',
 'Critic 4',
 'Critic 5',
 'Critic 1^2',
 'Critic 1 Critic 2',
 'Critic 1 Critic 3',
 'Critic 1 Critic 4',
 'Critic 1 Critic 5',
 'Critic 2^2',
 'Critic 2 Critic 3',
 'Critic 2 Critic 4',
 'Critic 2 Critic 5',
 'Critic 3^2',
 'Critic 3 Critic 4',
 'Critic 3 Critic 5',
 'Critic 4^2',
 'Critic 4 Critic 5',
 'Critic 5^2',
 'Critic 1^3',
 'Critic 1^2 Critic 2',
 'Critic 1^2 Critic 3',
 'Critic 1^2 Critic 4',
 'Critic 1^2 Critic 5',
 'Critic 1 Critic 2^2',
 'Critic 1 Critic 2 Critic 3',
 'Critic 1 Critic 2 Critic 4',
 'Critic 1 Critic 2 Critic 5',
 'Critic 1 Critic 3^2',
 'Critic 1 Critic 3 Critic 4',
 'Critic 1 Critic 3 Critic 5',
 'Critic 1 Critic 4^2',
 'Critic 1 Critic 4 Critic 5',
 'Critic 1 Critic 5^2',
 'Critic 2^3',
 'Critic 2^2 Critic 3',
 'Critic 2^2 Critic 4',
 'Critic 2^2 Critic 5',
 'Critic 2 Critic 3^2',
 'Critic 2 Critic 3 Critic 4',
 'Critic 2 Critic 3 Critic 5',
 'Critic 2 Critic 4^2',
 'Critic 2 Critic 4 Critic 5',
 'Critic 2 Cr

In [18]:
poly_train = pd.DataFrame(train_new, 
                             columns = poly.get_feature_names(['Critic 1',	'Critic 2', 'Critic 3',	'Critic 4', 'Critic 5']))

poly_train['TARGET'] = train['Audience Rating']

# Tính correlation
poly_corrs = poly_train.corr()['TARGET'].sort_values()

print(poly_corrs.head(10))
print(poly_corrs.tail(5))

Critic 1 Critic 4              -0.272666
Critic 1^3 Critic 4^2          -0.269221
Critic 1^3 Critic 3 Critic 4   -0.265536
Critic 1^2 Critic 4            -0.264747
Critic 1^4 Critic 4            -0.262810
Critic 1^3 Critic 4            -0.262611
Critic 4                       -0.261710
Critic 1^2 Critic 4^2          -0.260854
Critic 1^2 Critic 3 Critic 4   -0.254859
Critic 1 Critic 4^2            -0.252155
Name: TARGET, dtype: float64
Critic 2^3    0.258534
Critic 2^5    0.263435
Critic 2^4    0.264910
TARGET        1.000000
1                  NaN
Name: TARGET, dtype: float64




In [19]:
poly_train

Unnamed: 0,1,Critic 1,Critic 2,Critic 3,Critic 4,Critic 5,Critic 1^2,Critic 1 Critic 2,Critic 1 Critic 3,Critic 1 Critic 4,...,Critic 3 Critic 4^2 Critic 5^2,Critic 3 Critic 4 Critic 5^3,Critic 3 Critic 5^4,Critic 4^5,Critic 4^4 Critic 5,Critic 4^3 Critic 5^2,Critic 4^2 Critic 5^3,Critic 4 Critic 5^4,Critic 5^5,TARGET
0,1.0,3.0,8.0,8.0,7.0,9.0,9.0,24.0,24.0,21.0,...,31752.0,40824.0,52488.0,16807.0,21609.0,27783.0,35721.0,45927.0,59049.0,
1,1.0,5.0,6.714286,9.0,5.0,7.272727,25.0,33.571429,45.0,25.0,...,11900.826446,17310.293013,25178.608019,3125.0,4545.454545,6611.570248,9616.829452,13988.115566,20346.349914,
2,1.0,3.0,5.0,6.0,7.0,4.0,9.0,15.0,18.0,21.0,...,4704.0,2688.0,1536.0,16807.0,9604.0,5488.0,3136.0,1792.0,1024.0,7.1
3,1.0,4.0,6.0,8.0,5.0,7.272727,16.0,24.0,32.0,20.0,...,10578.512397,15386.927122,22380.984905,3125.0,4545.454545,6611.570248,9616.829452,13988.115566,20346.349914,7.6
4,1.0,5.0,5.0,7.6,7.0,7.0,25.0,25.0,38.0,35.0,...,18247.6,18247.6,18247.6,16807.0,16807.0,16807.0,16807.0,16807.0,16807.0,7.5
5,1.0,4.692308,7.0,6.0,6.0,8.0,22.017751,32.846154,28.153846,28.153846,...,13824.0,18432.0,24576.0,7776.0,10368.0,13824.0,18432.0,24576.0,32768.0,9.1
6,1.0,7.0,6.714286,8.0,9.0,7.272727,49.0,47.0,56.0,63.0,...,34274.380165,27696.46882,22380.984905,59049.0,47716.363636,38558.677686,31158.527423,25178.608019,20346.349914,7.8
7,1.0,7.0,7.0,9.0,8.0,9.0,49.0,49.0,63.0,56.0,...,46656.0,52488.0,59049.0,32768.0,36864.0,41472.0,46656.0,52488.0,59049.0,7.5
8,1.0,4.692308,10.0,5.0,9.0,7.272727,22.017751,46.923077,23.461538,42.230769,...,21421.487603,17310.293013,13988.115566,59049.0,47716.363636,38558.677686,31158.527423,25178.608019,20346.349914,
9,1.0,3.0,6.0,8.0,6.0,5.0,9.0,18.0,24.0,18.0,...,7200.0,6000.0,5000.0,7776.0,6480.0,5400.0,4500.0,3750.0,3125.0,9.2
