In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from transformers import AutoTokenizer, AutoModelForCausalLM
from sklearn.metrics import mean_absolute_error, r2_score, accuracy_score, confusion_matrix
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
import seaborn as sns

# Hugging Face 토큰 설정
os.environ["HUGGINGFACE_HUB_TOKEN"] = "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 "

# Define a simple neural network for regression
class SimpleNN(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, output_dim)
        self.relu = nn.ReLU()
    
    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Load LLaMA3 model and tokenizer
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id, token=os.environ["HUGGINGFACE_HUB_TOKEN"])
llama_model = AutoModelForCausalLM.from_pretrained(model_id, token=os.environ["HUGGINGFACE_HUB_TOKEN"])

# Freeze all layers of LLaMA3 model
for param in llama_model.parameters():
    param.requires_grad = False

# Define a new regression head
class LLaMA3Regression(nn.Module):
    def __init__(self, llama_model, input_dim, output_dim):
        super(LLaMA3Regression, self).__init__()
        self.llama_model = llama_model
        self.regression_head = SimpleNN(input_dim, output_dim)
    
    def forward(self, input_ids, attention_mask):
        llama_outputs = self.llama_model(input_ids=input_ids, attention_mask=attention_mask)
        hidden_states = llama_outputs.last_hidden_state[:, -1, :]  # Use the last hidden state
        return self.regression_head(hidden_states)

# Initialize the new model
input_dim = 4096  # Adjust this value based on LLaMA3 hidden state dimension
output_dim = 1  # Adjust based on your target dimension
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = LLaMA3Regression(llama_model, input_dim, output_dim).to(device)

# Prepare data
def prepare_data(df, tokenizer, text_columns, label_column):
    # Convert numeric data to text
    text_data = df[text_columns].astype(str).agg(' '.join, axis=1)
    inputs = tokenizer(text_data.tolist(), return_tensors='pt', padding=True, truncation=True, max_length=512)
    labels = torch.tensor(df[label_column].values, dtype=torch.float32)
    return inputs, labels

# Load and prepare datasets
train_dir = "path_to_train_csv"
test_dir = "path_to_test_csv"

train_df = pd.read_csv(train_dir)
test_df = pd.read_csv(test_dir)

# Assuming 'label' is the column name of the target variable and the rest are features
text_columns = train_df.columns.difference(['label']).tolist()

train_inputs, train_labels = prepare_data(train_df, tokenizer, text_columns, 'label')
test_inputs, test_labels = prepare_data(test_df, tokenizer, text_columns, 'label')

train_dataset = torch.utils.data.TensorDataset(train_inputs['input_ids'], train_inputs['attention_mask'], train_labels)
test_dataset = torch.utils.data.TensorDataset(test_inputs['input_ids'], test_inputs['attention_mask'], test_labels)

# 줄인 배치 크기
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=4, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=4, shuffle=False)

# Training settings
criterion = nn.MSELoss()
optimizer = optim.Adam(model.regression_head.parameters(), lr=0.001)

# Training loop
epochs = 5
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for input_ids, attention_mask, labels in train_loader:
        input_ids, attention_mask, labels = input_ids.to(device), attention_mask.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(input_ids, attention_mask)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    
    print(f"Epoch {epoch + 1}/{epochs}, Loss: {total_loss / len(train_loader)}")

    # 모델 체크포인트 저장
    torch.save(model.state_dict(), f'model_checkpoint_epoch_{epoch + 1}.pth')

# Evaluation
model.eval()
predictions = []
with torch.no_grad():
    for input_ids, attention_mask, _ in test_loader:
        input_ids, attention_mask = input_ids.to(device), attention_mask.to(device)
        outputs = model(input_ids, attention_mask)
        predictions.append(outputs.cpu().numpy())
predictions = np.concatenate(predictions)

# Evaluation metrics
mse = np.mean((test_labels.numpy() - predictions.flatten()) ** 2)
mae = mean_absolute_error(test_labels.numpy(), predictions)
r2 = r2_score(test_labels.numpy(), predictions)

print(f"Mean Squared Error: {mse}")
print(f"Mean Absolute Error: {mae}")
print(f"R^2 Score: {r2}")

# 이상 탐지
anomaly_threshold = mse + 3 * np.std(predictions - test_labels.numpy().reshape(-1, 1))

anomalies = (np.abs(predictions - test_labels.numpy().reshape(-1, 1)) > anomaly_threshold).astype(int)

# Classification metrics
accuracy = accuracy_score(test_labels.numpy(), anomalies)
f1 = f1_score(test_labels.numpy(), anomalies, average='macro')
cm = confusion_matrix(test_labels.numpy(), anomalies)

print(f"Accuracy: {accuracy}")
print(f"F1 Score: {f1}")
print("Confusion Matrix:")
print(cm)

sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Normal', 'Anomaly'], yticklabels=['Normal', 'Anomaly'])
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.title('Confusion Matrix')
plt.show()

# Plot predictions and anomalies
plt.figure(figsize=(15, 5))
plt.plot(test_labels.numpy(), label='Actual', color='blue')
plt.plot(predictions, label='Predicted', color='green')
plt.scatter(np.where(anomalies == 1), predictions[anomalies == 1], color='red', label='Anomalies')
plt.legend()
plt.show()


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

OutOfMemoryError: CUDA out of memory. Tried to allocate 1.96 GiB. GPU 