# Feed Forward Neural Networks

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

# Feature Transformation
from sklearn.feature_extraction.text import CountVectorizer

# Evaluation
from sklearn.model_selection import train_test_split

# Deep Learning
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation

ModuleNotFoundError: No module named 'tensorflow'

In [None]:
df = pd.read_csv('../data/amazon_cells_labelled.txt', 
                 sep='\t', 
                 header=None, 
                 names=['Sentence','Label'], 
                 on_bad_lines='skip')

sentences = df['Sentence'].values
y = df['Label'].values

(sentences_train, sentences_test, 
     y_train, y_test) = train_test_split(sentences, y, test_size=0.25, random_state=1000)

## Vectorize the Training and Test Sets

In [None]:
vectorizer = CountVectorizer()
vectorizer.fit(sentences_train)

X_train = vectorizer.transform(sentences_train)
X_test  = vectorizer.transform(sentences_test)
X_train

## Build the Neural Network

In [None]:
input_dim = X_train.shape[1]  # Number of features

model = Sequential()
model.add(layers.Dense(10, input_dim=input_dim, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', 
              optimizer='adam', 
              metrics=['accuracy'])
model.summary()

| 网络类型 | 连接方式 | 适用场景 |
|---------|---------|---------|
| 前馈网络 | 单向前进 | 一般分类/回归 |
| 循环网络（RNN） | 有循环连接 | 序列数据 |
| 卷积网络（CNN） | 局部连接 | 图像处理 |

文本 → 向量化 → 前馈网络 → 分类结果

In [None]:
history = model.fit(X_train, y_train,
                    epochs=100,
                    verbose=False,
                    validation_data=(X_test, y_test),
                    batch_size=10)

In [None]:
loss, accuracy = model.evaluate(X_train, y_train, verbose=False)
print("Training Accuracy: {:.4f}".format(accuracy))
loss, accuracy = model.evaluate(X_test, y_test, verbose=False)
print("Testing Accuracy:  {:.4f}".format(accuracy))

In [None]:
def plot_history(history):
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    
    x = range(1, len(acc) + 1)

    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(x, acc, 'b', label='Training Accuracy')
    plt.plot(x, val_acc, 'r', label='Validation Accuracy')
    plt.title('Training and Validation Accuracy')
    plt.legend()
    
    plt.subplot(1, 2, 2)
    plt.plot(x, loss, 'b', label='Training Loss')
    plt.plot(x, val_loss, 'r', label='Validation Loss')
    plt.title('Training and validation loss')
    plt.legend()
    
plot_history(history)

In [None]:
df = pd.read_csv('../data/training.1600000.processed.noemoticon.csv.gzip',
                 header=None,
                 names=['Label','Sentence'],
                 usecols=[0,5],
                 encoding='latin-1',
                 compression='gzip',
                 on_bad_lines='skip')

损失函数: binary_crossentropy - 适用于二分类

优化器: adam - 自适应学习率优化算法

评估指标: accuracy - 准确率