# Duygu Analizi (Sentiment-Analysis)

In [1]:
import pandas as pd
import nltk

from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

from sklearn.metrics import confusion_matrix , classification_report

**Amazon** veri seti içerisindeki yorumların <i>pozitif mi negatif mi</i> olduğunu sınıflandırmak için binary classification problemi

In [2]:
nltk.download("vader_lexicon")
nltk.download("stopwords")
nltk.download("punkt")
nltk.download("wordnet")
nltk.download("omw-1.4")

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\emirh\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\emirh\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\emirh\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\emirh\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\emirh\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

### 1)Veri Setinin Yüklenmesi

In [3]:
df = pd.read_csv("duygu_analizi_amazon_veri_seti.csv")

In [4]:
df.head

<bound method NDFrame.head of                                               reviewText  Positive
0      This is a one of the best apps acording to a b...         1
1      This is a pretty good version of the game for ...         1
2      this is a really cool game. there are a bunch ...         1
3      This is a silly game and can be frustrating, b...         1
4      This is a terrific game on any pad. Hrs of fun...         1
...                                                  ...       ...
19995  this app is fricken stupid.it froze on the kin...         0
19996  Please add me!!!!! I need neighbors! Ginger101...         1
19997  love it!  this game. is awesome. wish it had m...         1
19998  I love love love this app on my side of fashio...         1
19999  This game is a rip off. Here is a list of thin...         0

[20000 rows x 2 columns]>

### 2)Text Cleaning and Preprocessing

In [5]:
lemmatizer = WordNetLemmatizer()
def clean_process_data(text):
    # Tokenize
    tokens = word_tokenize(text.lower())
    
    # Stopword
    filtered_tokens = [token for token in tokens if token not in stopwords.words("english")]
    
    # Lemmatize
    lemmatized_tokens = [lemmatizer.lemmatize(token) for token in filtered_tokens]
    
    # Join Words
    processed_text = " ".join(lemmatized_tokens)

    return processed_text

Tüm metinlere temizleme ve ön işleme adımlarının uygulanması

In [6]:
df["reviewText2"] = df["reviewText"].apply(clean_process_data)

### 3)Sentiment Analysis (NLTK)

In [7]:
analyzer = SentimentIntensityAnalyzer()

def get_sentiments(text):
    score = analyzer.polarity_scores(text)

    sentiment = 1 if score["pos"] > 0 else 0

    return sentiment

Duygu analizi işleminin tüm cümlelere yapılması işlemi

In [8]:
df["sentiment"] = df["reviewText2"].apply(get_sentiments)

### 4)Evaluation - Test

In [9]:
cm = confusion_matrix(df["Positive"] , df["sentiment"])
print(f"Confusion matrix :\n {cm}")

Confusion matrix :
 [[ 1131  3636]
 [  576 14657]]


In [10]:
cr = classification_report(df["Positive"],df["sentiment"])
print(f"Classification Report : \n {cr}")

Classification Report : 
               precision    recall  f1-score   support

           0       0.66      0.24      0.35      4767
           1       0.80      0.96      0.87     15233

    accuracy                           0.79     20000
   macro avg       0.73      0.60      0.61     20000
weighted avg       0.77      0.79      0.75     20000

