In [5]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split

In [6]:
# ==============================================
# Pré-processamento com Pandas/Scikit-learn
# ==============================================

# Carregar e combinar datasets
#inputs_df = pd.read_csv('Dataset6_clean_input.csv')
#outputs_df = pd.read_csv('Dataset6_clean_output.csv')
#df = pd.merge(inputs_df, outputs_df, on='ID')
df = pd.read_csv('../Tarefa_1/Dataset6_clean.csv' , sep=',')
# Vetorização do texto
vectorizer = TfidfVectorizer(max_features=1000, stop_words='english')
X = vectorizer.fit_transform(df['Text']).toarray()
y = df['Label'].map({'AI':1, 'Human':0}).values

# Divisão treino-teste
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
    )

print(X_train)
print(X_test)

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


In [7]:
# ==============================================
# Modelos Implementados com Numpy
# ==============================================

class LogisticRegression:
    def __init__(self, lr=0.01, n_iters=1000):
        self.lr = lr
        self.n_iters = n_iters
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        for _ in range(self.n_iters):
            linear = np.dot(X, self.weights) + self.bias
            y_pred = self._sigmoid(linear)
            
            dw = (1/n_samples) * np.dot(X.T, (y_pred - y))
            db = (1/n_samples) * np.sum(y_pred - y)
            
            self.weights -= self.lr * dw
            self.bias -= self.lr * db

    def predict(self, X):
        linear = np.dot(X, self.weights) + self.bias
        y_pred = self._sigmoid(linear)
        return (y_pred > 0.5).astype(int)

    def _sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size, lr=0.01):
        self.W1 = np.random.randn(input_size, hidden_size) * 0.01
        self.b1 = np.zeros((1, hidden_size))
        self.W2 = np.random.randn(hidden_size, output_size) * 0.01
        self.b2 = np.zeros((1, output_size))
        self.lr = lr

    def _relu(self, Z):
        return np.maximum(0, Z)

    def _relu_deriv(self, Z):
        return Z > 0

    def _sigmoid(self, Z):
        return 1 / (1 + np.exp(-Z))

    def forward(self, X):
        self.z1 = np.dot(X, self.W1) + self.b1
        self.a1 = self._relu(self.z1)
        self.z2 = np.dot(self.a1, self.W2) + self.b2
        return self._sigmoid(self.z2)

    def backward(self, X, y, output):
        m = y.shape[0]
        
        dz2 = output - y.reshape(-1,1)
        dW2 = (1/m) * np.dot(self.a1.T, dz2)
        db2 = (1/m) * np.sum(dz2, axis=0, keepdims=True)
        
        dz1 = np.dot(dz2, self.W2.T) * self._relu_deriv(self.z1)
        dW1 = (1/m) * np.dot(X.T, dz1)
        db1 = (1/m) * np.sum(dz1, axis=0)

        return dW1, db1, dW2, db2

    def train(self, X, y, epochs=1000):
        for epoch in range(epochs):
            output = self.forward(X)
            dW1, db1, dW2, db2 = self.backward(X, y, output)
            
            self.W1 -= self.lr * dW1
            self.b1 -= self.lr * db1
            self.W2 -= self.lr * dW2
            self.b2 -= self.lr * db2

    def predict(self, X):
        return (self.forward(X) > 0.5).astype(int)

In [8]:
# ==============================================
# Treino e Avaliação
# ==============================================

# Treinar Regressão Logística
print("Training Logistic Regression...")
lr = LogisticRegression(lr=0.1, n_iters=1000)
lr.fit(X_train, y_train)
lr_acc = np.mean(lr.predict(X_test) == y_test)
print(f"LR Test Accuracy: {lr_acc:.2f}")

# Treinar Rede Neural
print("\nTraining Neural Network...")
nn = NeuralNetwork(input_size=X_train.shape[1], hidden_size=64, 
                   output_size=1, lr=0.01)
nn.train(X_train, y_train, epochs=1000)
nn_acc = np.mean(nn.predict(X_test) == y_test)
print(f"NN Test Accuracy: {nn_acc:.2f}")

Training Logistic Regression...
LR Test Accuracy: 0.96

Training Neural Network...
NN Test Accuracy: 0.52
