In [2]:
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import json
%matplotlib inline
sns.set_context("paper")  
sns.set(rc={'figure.figsize': (10, 8)})  # 设置画板大小
sns.set_style('whitegrid')
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 500)

In [5]:
DIY_path = './data/DIY_df.csv'
DIY = pd.read_csv(DIY_path, encoding='utf-8')
# np.save('./data/DIY.npy', DIY)

In [6]:
DIY

Unnamed: 0,PRNT_EDU,STDY_EVNRNMNT,LF_QLTY,PRNT_CR,EX_RPT,STY,PEER,TCHR,SKIP,CLS1,CLS2,CLS3,KNLDG,REPEAT
0,0.846154,0.142857,0.358974,1.000000,0.50,0.421053,0.166667,0.555556,0.333333,0.10,0.555556,0.279412,0.206739,1.0
1,0.730769,0.428571,0.380342,1.000000,0.50,0.473684,0.500000,1.000000,0.222222,0.00,0.000000,0.382353,0.262654,1.0
2,0.884615,0.285714,0.564103,0.666667,0.50,0.368421,0.388889,0.333333,0.333333,1.00,1.000000,1.000000,0.229774,1.0
3,0.769231,0.428571,0.491453,0.416667,0.25,0.263158,0.611111,0.500000,0.111111,0.35,0.555556,0.235294,0.411814,1.0
4,0.769231,0.142857,0.465812,0.500000,0.25,0.815789,1.000000,0.166667,0.222222,0.75,0.500000,0.235294,0.450977,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32125,0.769231,0.142857,0.529915,0.666667,0.25,0.289474,0.222222,0.388889,0.000000,1.00,1.000000,1.000000,0.318432,1.0
32126,0.884615,0.142857,0.487179,0.666667,0.25,0.394737,0.500000,0.333333,0.000000,1.00,1.000000,1.000000,0.221643,1.0
32127,0.615385,0.000000,0.440171,1.000000,0.25,0.263158,0.166667,0.611111,0.222222,0.30,0.444444,0.250000,0.252460,1.0
32128,0.923077,0.285714,0.401709,0.666667,0.00,0.394737,0.277778,0.000000,0.333333,0.50,0.527778,0.455882,0.184956,0.0


In [7]:
data = np.load('./data/DIY.npy')

In [9]:
def cross_validation(data, k):
	# split the data into k parts
	# return the k parts
	return data.reshape(k, -1, data.shape[1])
def split_train_test(data_part, k):
	# split the data_part into train and test
	# return the train and test
	test = data_part[k, :, :]
	train = np.delete(data_part, k, axis=0)
	train = np.concatenate(train, axis=0)
	return train, test

In [10]:
class MLP(object):
	def __init__(self, train, test, n_in, n_out, lr, epoch):
		# self.bsz = train.shape[0]
		# self.train = train
		# self.test = test
		self.n_in = n_in
		self.n_out = n_out
		self.lr = lr
		self.epoch = epoch
		self.train_data = train[:, :-1]
		train_label = train[:, -1]
		self.train_label = train_label.reshape(train_label.shape[0], 1)
		self.test_data = test[:, :-1]
		test_label = test[:, -1]
		self.test_label = test_label.reshape(test_label.shape[0], 1)
		self.w = np.random.randn(self.n_in, self.n_out)
		self.b = np.random.randn(self.n_out)

	def sigmoid(self, x): 
		return 1 / (1 + np.exp(-x))

	def sigmoid_derivative(self, x): 
		return self.sigmoid(x) * (1 - self.sigmoid(x))

	def forward(self, x): # (bsz, n_in)
		o = np.dot(x, self.w) + self.b # (bsz, n_out)
		y_hat = self.sigmoid(o) # (bsz, n_out)
		return o, y_hat

	def loss(self, y, y_hat): # (bsz, n_out)
		return -np.sum(y * np.log(y_hat) + (1 - y) * np.log(1 - y_hat)) # (n_out)

	def judge(self, y_hat, y):
		s = (y_hat - 0.5) * (y - 0.5)
		return s

	def backward(self, x, y): # (bsz, n_in) , (bsz, n_out)
		# pdb.set_trace()
		bsz = x.shape[0]
		o, y_hat = self.forward(x) # (bsz, n_out)
		# loss = self.loss(y, y_hat) # (n_out)
		L_d_y_hat = -y/y_hat + (np.ones_like(y)-y)/(np.ones_like(y)-y_hat) # (bsz, n_out)
		y_hat_d_o = self.sigmoid_derivative(o) # (bsz, n_out)
		o_d_w = x # (bsz, n_in)
		o_d_b = np.ones((bsz, 1)) # (bsz, 1)
		L_d_w = np.mean(L_d_y_hat * y_hat_d_o * o_d_w, axis=0) # (n_in,)
		L_d_w = L_d_w.reshape(self.n_in, 1) # (n_in, 1)
		L_d_b = np.mean(L_d_y_hat * y_hat_d_o * o_d_b, axis=0) # (1,)
		self.w = self.w - self.lr * L_d_w
		self.b = self.b - self.lr * L_d_b
	
  
	def test_model(self, data, label):
		o, y_hat = self.forward(data)
		total = data.shape[0]
		correct = np.sum(self.judge(y_hat, label)>0)
		accs = correct / total
		return accs

	def train_model(self):
		for i in range(self.epoch):
			self.backward(self.train_data, self.train_label)
			accs = self.test_model(self.test_data, self.test_label)
			print('epoch: %d, accs: %f' % (i, accs))


In [12]:
data_part = cross_validation(data, 5)

In [13]:
train, test = split_train_test(data_part, 0)

In [16]:
model = MLP(train, test, train.shape[1]-1, 1, 0.5, 2000)