# Stabilirea sentimentului folosind Azure

In [9]:
from azure.ai.textanalytics import TextAnalyticsClient
from azure.core.credentials import AzureKeyCredential

key ="FIWuXT3fz0I3Yn0k4YVqy7nTALobYwJvPMYJQzjJyy24hmSRWUNoJQQJ99BEAC5RqLJXJ3w3AAAaACOGbPt0"
endpoint ="https://georgianaa.cognitiveservices.azure.com/"

def authenticate():
    credential = AzureKeyCredential(key)
    return TextAnalyticsClient(endpoint=endpoint, credential=credential)

def azure_sentiment_analysis(text):
    client = authenticate()
    document = [text]
    response = client.analyze_sentiment(documents=document)[0]
    print(f"\nAzure Sentiment: {response.sentiment}")
    for key, val in response.confidence_scores.__dict__.items():
        print(f" {key.capitalize()}: {val:.2f}")

In [22]:
# Test Azure
msg = "By choosing a bike over a car, I’m reducing my environmental footprint. Cycling promotes eco-friendly transportation, and I’m proud to be part of that movement."
azure_sentiment_analysis(msg)



Azure Sentiment: positive
 Positive: 0.87
 Neutral: 0.13
 Negative: 0.00


# Extragerea caracteristicilor BoW / TF-IDF / Word2Vec

In [11]:
import pandas as pd
import re
import string
import numpy as np
import nltk
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from gensim.models import Word2Vec

# Preprocesare text
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

def clean_text(text):
    text = text.lower()
    text = re.sub(r"http\S+|www\S+|https\S+", '', text)
    text = re.sub(r'\d+', '', text)
    text = text.translate(str.maketrans('', '', string.punctuation))
    tokens = nltk.word_tokenize(text)
    tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]
    return ' '.join(tokens)


[nltk_data] Downloading package punkt to C:\Users\Personal
[nltk_data]     PC\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to C:\Users\Personal
[nltk_data]     PC\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to C:\Users\Personal
[nltk_data]     PC\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [12]:
# Încărcare dataset și curățare
df = pd.read_csv("data/reviews_mixed.csv")
df.dropna(inplace=True)
df['cleaned'] = df['Text'].apply(clean_text)


In [13]:
# BoW
vectorizer_bow = CountVectorizer(max_features=1000)
X_bow = vectorizer_bow.fit_transform(df['cleaned']).toarray()

# TF-IDF
vectorizer_tfidf = TfidfVectorizer(max_features=1000)
X_tfidf = vectorizer_tfidf.fit_transform(df['cleaned']).toarray()

# Word2Vec
sentences = [text.split() for text in df['cleaned']]
w2v_model = Word2Vec(sentences, vector_size=100, window=5, min_count=1, workers=4)
X_w2v = np.array([
    np.mean([w2v_model.wv[word] for word in words if word in w2v_model.wv] or [np.zeros(100)], axis=0)
    for words in sentences
])


# Alte caracteristici

In [14]:
import numpy as np
from sklearn.preprocessing import StandardScaler

positive_words = {'happy', 'joy', 'love', 'great', 'good', 'excellent', 'amazing'}
negative_words = {'sad', 'bad', 'terrible', 'awful', 'hate', 'worst'}

def extra_features(text):
    tokens = text.split()
    word_count = len(tokens)
    char_count = len(text)
    pos_count = sum(1 for word in tokens if word in positive_words)
    neg_count = sum(1 for word in tokens if word in negative_words)
    return [char_count, word_count, pos_count, neg_count]

X_extra = np.array([extra_features(text) for text in df['cleaned']])
scaler = StandardScaler()
X_extra_scaled = scaler.fit_transform(X_extra)

# Combinare TF-IDF + extra features
X_combined = np.hstack((X_tfidf, X_extra_scaled))


#  Clasificator ANN (tool – Keras) + predicție mesaj

In [15]:
from sklearn.preprocessing import LabelEncoder
from keras.src.models import Sequential
from keras.src.layers import Dense, Dropout
from keras.src.utils import to_categorical
from sklearn.model_selection import train_test_split

# Etichetare
label_encoder = LabelEncoder()
y_labels = label_encoder.fit_transform(df["Sentiment"])
y = to_categorical(y_labels)

# Împărțire date
X_train, X_test, y_train, y_test = train_test_split(X_combined, y, test_size=0.2, random_state=42)


In [23]:
# Model ANN cu Keras
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_combined.shape[1],)),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dense(y_train.shape[1], activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=20, batch_size=20, validation_split=0.1)


Epoch 1/20


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 37ms/step - accuracy: 0.3989 - loss: 0.7197 - val_accuracy: 0.4118 - val_loss: 0.6976
Epoch 2/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.6661 - loss: 0.6873 - val_accuracy: 0.6471 - val_loss: 0.6727
Epoch 3/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.7425 - loss: 0.6445 - val_accuracy: 0.7059 - val_loss: 0.6499
Epoch 4/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.7304 - loss: 0.6233 - val_accuracy: 0.7059 - val_loss: 0.6300
Epoch 5/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.6574 - loss: 0.6230 - val_accuracy: 0.7059 - val_loss: 0.6120
Epoch 6/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.7059 - loss: 0.5902 - val_accuracy: 0.7059 - val_loss: 0.5938
Epoch 7/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

<keras.src.callbacks.history.History at 0x19a3d1b3110>

In [24]:
# Evaluare + predicție
loss, acc = model.evaluate(X_test, y_test)
print(f"\nAccuracy (Keras ANN): {acc:.2f}")

msg_cleaned = clean_text(msg)
msg_vec = vectorizer_tfidf.transform([msg_cleaned]).toarray()
msg_extra = scaler.transform([extra_features(msg_cleaned)])
msg_input = np.hstack((msg_vec, msg_extra))

pred = model.predict(msg_input)
pred_label = label_encoder.inverse_transform([np.argmax(pred)])
print(f"\nPredicted Sentiment (Keras): {pred_label[0]}")


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 0.7946 - loss: 0.5993

Accuracy (Keras ANN): 0.79
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step

Predicted Sentiment (Keras): positive


# Clasificator ANN (manual – fără tool) + predicție mesaj

In [25]:
# ANN manual
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return sigmoid(x) * (1 - sigmoid(x))

class ANN:
    def __init__(self, input_size, hidden_size, output_size, lr=0.001):
        self.lr = lr
        self.W1 = np.random.randn(input_size, hidden_size)
        self.b1 = np.zeros((1, hidden_size))
        self.W2 = np.random.randn(hidden_size, output_size)
        self.b2 = np.zeros((1, output_size))

    def forward(self, X):
        self.z1 = X @ self.W1 + self.b1
        self.a1 = sigmoid(self.z1)
        self.z2 = self.a1 @ self.W2 + self.b2
        self.a2 = sigmoid(self.z2)
        return self.a2

    def backward(self, X, y, output):
        m = y.shape[0]
        d_z2 = output - y
        d_W2 = self.a1.T @ d_z2 / m
        d_b2 = np.sum(d_z2, axis=0, keepdims=True) / m
        d_z1 = (d_z2 @ self.W2.T) * sigmoid_derivative(self.z1)
        d_W1 = X.T @ d_z1 / m
        d_b1 = np.sum(d_z1, axis=0, keepdims=True) / m

        self.W1 -= self.lr * d_W1
        self.b1 -= self.lr * d_b1
        self.W2 -= self.lr * d_W2
        self.b2 -= self.lr * d_b2

    def train(self, X, y, epochs=100):
        for i in range(epochs):
            out = self.forward(X)
            self.backward(X, y, out)
            if i % 10 == 0:
                loss = np.mean((y - out) ** 2)
                print(f"Epoch {i} - Loss: {loss:.4f}")
# ANN manual
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return sigmoid(x) * (1 - sigmoid(x))

class ANN:
    def __init__(self, input_size, hidden_size, output_size, lr=0.001):
        self.lr = lr
        self.W1 = np.random.randn(input_size, hidden_size)
        self.b1 = np.zeros((1, hidden_size))
        self.W2 = np.random.randn(hidden_size, output_size)
        self.b2 = np.zeros((1, output_size))

    def forward(self, X):
        self.z1 = X @ self.W1 + self.b1
        self.a1 = sigmoid(self.z1)
        self.z2 = self.a1 @ self.W2 + self.b2
        self.a2 = sigmoid(self.z2)
        return self.a2

    def backward(self, X, y, output):
        m = y.shape[0]
        d_z2 = output - y
        d_W2 = self.a1.T @ d_z2 / m
        d_b2 = np.sum(d_z2, axis=0, keepdims=True) / m
        d_z1 = (d_z2 @ self.W2.T) * sigmoid_derivative(self.z1)
        d_W1 = X.T @ d_z1 / m
        d_b1 = np.sum(d_z1, axis=0, keepdims=True) / m

        self.W1 -= self.lr * d_W1
        self.b1 -= self.lr * d_b1
        self.W2 -= self.lr * d_W2
        self.b2 -= self.lr * d_b2

    def train(self, X, y, epochs=100):
        for i in range(epochs):
            out = self.forward(X)
            self.backward(X, y, out)
            if i % 10 == 0:
                loss = np.mean((y - out) ** 2)
                print(f"Epoch {i} - Loss: {loss:.4f}")


In [26]:
# Antrenare model manual
X_small = X_combined[:209]
y_small = y[:209]
ann = ANN(input_size=X_small.shape[1], hidden_size=32, output_size=2)
ann.train(X_small, y_small, epochs=20)


Epoch 0 - Loss: 0.3670
Epoch 10 - Loss: 0.3659


In [27]:
# Predicție cu ANN manual
manual_pred = ann.forward(msg_input)
manual_label = np.argmax(manual_pred)
if y.shape[1] == 2:
    label_name = label_encoder.inverse_transform([manual_label])[0]
else:
    label_name = f"Clasa {manual_label}"

print(f"\nPredicted Sentiment (Manual ANN): {label_name}")



Predicted Sentiment (Manual ANN): positive
