# Sarcasm detection

## ideas:
- автоматичне виявлення сарказму в соц мережах (твіттер etc)
- генерація саркастичних висловлювань 
- аналіз впливу емодзі пунктуація 
- сарказм у різних культурах порівняння 
- в політичних текстах
- в мультимодальному контексті (текст зображення голос)
- у новинах та заголовках
- 
- 

In [1]:
# imports section
import numpy as np
import math
from numpy import dot
from numpy.linalg import norm
from gensim import corpora, models, similarities
import string

import gensim.downloader as api
from gensim.models import Word2Vec

## Assign mood scores

In [4]:
lexicon = {
    "anxious": 0.1,
    "furious": 0.0,
    "peaceful": 0.9, 
    "hate": 0.0,
    "joyful": 0.9, 
    "unacceptable": 0.1,
    "thrilled": 0.9, 
    "infuriating": 0.1,
    "irate": 0.1, 
    "terrible": 0.1,
    "nervous": 0.2,
    "melancholy": 0.3, 
    "depressed": 0.1, 
    "gloomy": 0.1, 
    "serene": 0.8, 
    "elated": 0.8,
    "ecstatic": 0.9,
    "overjoyed": 0.9,
    "gleeful": 0.7,
    "cheerful": 0.7,
    "optimistic": 0.7,
    "buoyant": 0.6,
    "enthusiastic": 0.7,
    "upbeat": 0.6,
    "festive": 0.6,
    "playful": 0.6,
    "vivacious": 0.6,
    "amused": 0.7,
    "blissful": 0.6,
    "grateful": 0.8,
    "tranquil": 0.8,
    "relaxed": 0.7,
    "comfortable": 0.6,
    "cozy": 0.6,
    "warm": 0.6,
    "inviting": 0.6, 
    "satisfied": 0.5,
    "pleasant": 0.6, 
    "pleased" : 0.7, 
    "happy": 0.8
    }

## Read adjectives from txt file and define array of given adjactive scores

In [5]:
corpus = api.load('glove-wiki-gigaword-100')

vector = corpus['computer']
print(vector)

[-1.6298e-01  3.0141e-01  5.7978e-01  6.6548e-02  4.5835e-01 -1.5329e-01
  4.3258e-01 -8.9215e-01  5.7747e-01  3.6375e-01  5.6524e-01 -5.6281e-01
  3.5659e-01 -3.6096e-01 -9.9662e-02  5.2753e-01  3.8839e-01  9.6185e-01
  1.8841e-01  3.0741e-01 -8.7842e-01 -3.2442e-01  1.1202e+00  7.5126e-02
  4.2661e-01 -6.0651e-01 -1.3893e-01  4.7862e-02 -4.5158e-01  9.3723e-02
  1.7463e-01  1.0962e+00 -1.0044e+00  6.3889e-02  3.8002e-01  2.1109e-01
 -6.6247e-01 -4.0736e-01  8.9442e-01 -6.0974e-01 -1.8577e-01 -1.9913e-01
 -6.9226e-01 -3.1806e-01 -7.8565e-01  2.3831e-01  1.2992e-01  8.7721e-02
  4.3205e-01 -2.2662e-01  3.1549e-01 -3.1748e-01 -2.4632e-03  1.6615e-01
  4.2358e-01 -1.8087e+00 -3.6699e-01  2.3949e-01  2.5458e+00  3.6111e-01
  3.9486e-02  4.8607e-01 -3.6974e-01  5.7282e-02 -4.9317e-01  2.2765e-01
  7.9966e-01  2.1428e-01  6.9811e-01  1.1262e+00 -1.3526e-01  7.1972e-01
 -9.9605e-04 -2.6842e-01 -8.3038e-01  2.1780e-01  3.4355e-01  3.7731e-01
 -4.0251e-01  3.3124e-01  1.2576e+00 -2.7196e-01 -8

In [6]:
data = ''

# with open('../sets/adjectives.txt', 'r') as file:
#     data = file.read().replace('\n', '')

# adjectives = data.split(',')

with open('../sets/english-adjectives.txt', 'r') as file:
    data = file.read().replace('\n', ',')

adjectives = data.split(',')


adjectives_scored = {}

# indx = corpus.index()

for adjective in adjectives:
    if adjective in corpus:
        adjectives_scored[adjective] = corpus[adjective]


## Extrapolate mood scores to adjactives using ML

In [None]:
from sklearn.svm import SVR
from sklearn.multioutput import MultiOutputRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

lexicon_trained = {}

# Create a pipeline with StandardScaler and SVR, wrapped in MultiOutputRegressor
model = MultiOutputRegressor(make_pipeline(StandardScaler(), SVR(kernel='rbf', C=1.0)))

x_train, y_train = [], []
for key in lexicon.keys():
    x_train.append(adjectives_scored[key])
    y_train.append([lexicon[key], 0])

x_train_np = np.array(x_train)
y_train_np = np.array(y_train)

    # Train the model
model.fit(x_train, y_train)

for key in adjectives_scored:
    result = model.predict([adjectives_scored[key]])
    lexicon_trained[key] = result[0][0]

print(lexicon_trained)

## Basic NLP techniques to extract mood score changes for text

In [8]:
from nltk.stem import PorterStemmer, WordNetLemmatizer
from nltk.tokenize import word_tokenize
import nltk

def calculate_mood_series(text):
    lemmatizer = WordNetLemmatizer()
    # Tokenize text into words
    words = word_tokenize(text)
    # Apply lemmatization
    lemmatized_words = [lemmatizer.lemmatize(word) for word in words]

    mood_series = []
    for word in lemmatized_words:
        if word in lexicon_trained.keys():
            mood_series.append(lexicon_trained[word])
    return mood_series

In [9]:
example_text = "It was the best of times, it was the worst of times, it was the age of wisdom, it was the age of foolishness, it was the epoch of belief, it was the epoch of incredulity, it was the season of light, it was the season of darkness, it was the spring of hope, it was the winter of despair."

print(calculate_mood_series(example_text))

[0.45973176454385156, 0.3325276673498717, 0.42756880944163317]


may be try classification instead of regression?
classify in positive and negative moods.

## classification   

In [None]:
lexicon = {
    "anxious": 0,
    "furious": 0,
    "peaceful": 1, 
    "hate": 0,
    "joyful": 1, 
    "unacceptable": 0,
    "thrilled": 1, 
    "infuriating": 0,
    "irate": 0, 
    "terrible": 0,
    "nervous": 0,
    "melancholy": 0, 
    "useless": 0,
    "depressed": 1, 
    "gloomy": 0, 
    "serene": 1, 
    "elated": 1,
    "ecstatic": 1,
    "overjoyed": 1,
    "gleeful": 1,
    "cheerful": 1,
    "optimistic": 1,
    "buoyant": 1,
    "enthusiastic": 1,
    "upbeat": 1,
    "festive": 1,
    "playful": 1,
    "vivacious": 1,
    "amused": 1,
    "blissful": 1,
    "grateful": 1,
    "tranquil": 1,
    "relaxed": 1,
    "comfortable": 1,
    "cozy": 1,
    "warm": 1,
    "inviting": 1, 
    "satisfied": 1,
    "pleasant": 1, 
    "pleased" : 1, 
    "happy": 1
    }

In [23]:
data = ''

with open('../sets/english-adjectives.txt', 'r') as file:
    data = file.read().replace('\n', ',')

adjectives = data.split(',')


adjectives_scored = {}

# indx = corpus.index()

for adjective in adjectives:
    if adjective in corpus:
        adjectives_scored[adjective] = corpus[adjective]


In [28]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

lexicon_trained = {}

# Create a pipeline with StandardScaler and SVR, wrapped in MultiOutputRegressor
model = SVC(kernel='rbf', C=1.0, gamma='scale')

x_train, y_train = [], []
for key in lexicon.keys():
    x_train.append(adjectives_scored[key])
    y_train.append(lexicon[key])

x_train_prepared = x_train
y_train_prepared = np.array(list(lexicon.values()))

    # Train the model
model.fit(x_train_prepared, y_train_prepared)

for key in adjectives_scored:
    result = model.predict([adjectives_scored[key]])
    lexicon_trained[key] = result[0]

print(lexicon_trained)

{'abandoned': 1, 'able': 1, 'absolute': 1, 'adorable': 1, 'adventurous': 1, 'academic': 1, 'acceptable': 1, 'acclaimed': 1, 'accomplished': 1, 'accurate': 1, 'aching': 1, 'acidic': 1, 'acrobatic': 1, 'active': 1, 'actual': 1, 'adept': 1, 'admirable': 1, 'admired': 1, 'adolescent': 1, 'adored': 1, 'advanced': 1, 'afraid': 0, 'affectionate': 1, 'aged': 1, 'aggravating': 1, 'aggressive': 1, 'agile': 1, 'agitated': 1, 'agonizing': 1, 'agreeable': 1, 'ajar': 1, 'alarmed': 0, 'alarming': 0, 'alert': 1, 'alienated': 1, 'alive': 1, 'all': 1, 'altruistic': 1, 'amazing': 1, 'ambitious': 1, 'ample': 1, 'amused': 1, 'amusing': 1, 'anchored': 1, 'ancient': 1, 'angelic': 1, 'angry': 0, 'anguished': 1, 'animated': 1, 'annual': 1, 'another': 1, 'antique': 1, 'anxious': 0, 'any': 1, 'apprehensive': 1, 'appropriate': 1, 'apt': 1, 'arctic': 1, 'arid': 1, 'aromatic': 1, 'artistic': 1, 'ashamed': 1, 'assured': 1, 'astonishing': 1, 'athletic': 1, 'attached': 1, 'attentive': 1, 'attractive': 1, 'austere': 1,

## Analyse timeseries

## Let's try emojis/punctuation

In [16]:
import emoji

# def extract_emojis(s):
#   return ''.join(c for c in s if c in emoji.distinct_emoji_list('en'))

example_text = "Hey there! 👋 How's your day going? 🌞 Whether you're sipping coffee ☕, exploring new places 🌍, or just chilling at home �, I hope it's amazing! 🎉 Don't forget to smile 😄 and spread positivity! ✨ Life's a journey 🚀, so enjoy every moment! 🌈✨"
emoji_list = emoji.distinct_emoji_list(example_text)
print(emoji_list)

unicode_values = [emoji.demojize(e).encode('unicode_escape').decode('utf-8') for e in emoji_list]
print(unicode_values)

['😄', '👋', '✨', '🌈', '🌍', '🌞', '🚀', '🎉', '☕']
[':grinning_face_with_smiling_eyes:', ':waving_hand:', ':sparkles:', ':rainbow:', ':globe_showing_Europe-Africa:', ':sun_with_face:', ':rocket:', ':party_popper:', ':hot_beverage:']


Use two markers at the same time: text mood classifications (positive negative -> function) and emoji+punctuation 