# Ajustes iniciales

## Conexión a google drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cd ./drive/MyDrive/Colab Notebooks/

/content/drive/MyDrive/Colab Notebooks


In [None]:
%pwd

'/content/drive/MyDrive/Colab Notebooks'

## Instalar microTC

In [None]:
%pip install git+https://github.com/felipeRmBr/microtc.git#egg=microtc

Collecting microtc
  Cloning https://github.com/felipeRmBr/microtc.git to /tmp/pip-install-63242k11/microtc
  Running command git clone -q https://github.com/felipeRmBr/microtc.git /tmp/pip-install-63242k11/microtc
Building wheels for collected packages: microtc
  Building wheel for microtc (setup.py) ... [?25l[?25hdone
  Created wheel for microtc: filename=microtc-2.2.8-cp37-none-any.whl size=60798 sha256=3d7efb0e339b46d928c0146fec48f92b9fd8a799cbe6c5a8d513880f715866a2
  Stored in directory: /tmp/pip-ephem-wheel-cache-4ch3860s/wheels/73/00/4e/a4e2fc519599a41145f1740a75c80f390d4bcef72fed066f56
Successfully built microtc
Installing collected packages: microtc
Successfully installed microtc-2.2.8


## Importar modulos *_utils


In [None]:
import sys
sys.path.append('/content/drive/My Drive/Colab Notebooks/')
from my_utils import dataset_utils
from my_utils import eval_utils
from my_utils import svc_utils #(depends on microtc)

In [None]:
import pickle

## Modelos de predefinidos

In [None]:
from microtc.textmodel import TextModel

predefined_TM = []

token_list_options = [[-3, -1, 3, 7],
                      [-3, -2, -1, 3, 6],
                      [-1, 3, 7],
                      [-3, -2, -1, 3, 7],
                      [-2, -1, 3, 7]]

for token_list in token_list_options:
  text_model = TextModel(num_option='group',
                        usr_option='delete',
                        url_option='delete',
                        hashtag_option='delete', 
                        emo_option='none',
                        del_punc=True,
                        del_dup=False,
                        del_diac=True,
                        token_list=token_list,
                        weighting='tfidf')
  
  predefined_TM.append(text_model)

In [None]:
from microtc.textmodel import TextModel

predefined_TM = []

token_list_options = [[-2,-1,3],
                      [-2,-1,3,4],
                      [-3, -1, 3, 7]]

for token_list in token_list_options:
  text_model = TextModel(num_option='group',
                        usr_option='group',
                        url_option='group',
                        hashtag_option='group', 
                        emo_option='none',
                        del_dup=True,
                        del_diac=True,
                        token_list=token_list,
                        weighting='tfidf')
  
  predefined_TM.append(text_model)

In [None]:
from microtc.textmodel import TextModel

predefined_text_models = []

token_list_options = [[-1,3],
                      [-2,-1,3],
                      [-3,-1,3],
                      [-1,3,4]]

for token_list in token_list_options:
  text_model = TextModel(num_option='group',
                        usr_option='group',
                        url_option='group',
                        hashtag_option='group', 
                        emo_option='none',
                        del_dup=False,
                        del_diac=True,
                        token_list=token_list,
                        weighting='tfidf')
  
  predefined_text_models.append(text_model)

In [None]:
predefined_text_models

[<microtc.textmodel.TextModel at 0x7f0144413b10>,
 <microtc.textmodel.TextModel at 0x7f0144413190>,
 <microtc.textmodel.TextModel at 0x7f01444132d0>,
 <microtc.textmodel.TextModel at 0x7f0144413050>]

# Train Global classifiers

## HS - linear kernel

In [None]:
X_train, Y_train = dataset_utils.importTrainDataForSVM()
X_train = X_train['text'].to_list()
Y_train = Y_train['HS'].to_list()

In [None]:
configurations_ids = ['GQZgqx', 'hwmIFg', 'tgCmGg', 'fwZpdd', 'wavzCG']
textmodels_ids = ['CgOJHi', 'bVkhBU', 'wxNaSh', 'WHTRQA', 'iTLnGg']

# HS linear kernel
tm_settings_list = [ ['group', 'delete', 'group', 'delete', False, False, True, [-3, -2, -1, 3, 7], 'tfidf'],
['group', 'delete', 'group', 'delete', False, False, True, [-2, -1, 3, 7], 'tfidf'],
['group', 'delete', 'group', 'delete', True, False, True, [-2, -1, 3, 5, 7], 'tfidf'],
['group', 'delete', 'delete', 'delete', True, False, True, [-3, -2, -1, 3, 6], 'tfidf'],
['group', 'delete', 'group', 'delete', True, False, True, [-3, -1, 3, 5, 7], 'tfidf'] ]

tm_parameters_keys = ['num_option',
 'usr_option',
 'url_option',
 'hashtag_option',
 'del_punc',
 'del_dup',
 'del_diac',
 'token_list',
 'weighting']

In [None]:
from microtc.textmodel import TextModel
from sklearn.metrics import accuracy_score, f1_score
from sklearn import svm

kernel = 'linear'
C = 2

for conf_ID, tm_ID, tm_settings in zip(configurations_ids, textmodels_ids, tm_settings_list):
  print(f'conf_ID: {conf_ID},   tm_ID: {tm_ID}\n')

  tm_params_dict = dict(zip(tm_parameters_keys, tm_settings))
  #print(tm_params_dict)
  text_model = TextModel(**tm_params_dict)

  print('Fitting the text model')
  # fit the text_model to the training data
  text_model.fit(X_train)

  # transform X_train and X_test using the fitted text_model
  print("Transforming the training messages")
  X_train_tranformed = text_model.transform(X_train)

  # Instantiate the svm classifier
  classifier = svm.SVC(kernel=kernel, C=C)

  # fit the classifier
  print('Training SVC')
  classifier.fit(X_train_tranformed, Y_train)

  print('\nSaving text model and trained classifier\n\n')

  with open(f'./text_models/{tm_ID}/G.tm', 'wb') as file_handler:
    pickle.dump(text_model, file_handler)

  with open(f'./trained_models/{conf_ID}/G.svc', 'wb') as file_handler:
    pickle.dump(classifier, file_handler)

conf_ID: GQZgqx,   tm_ID: CgOJHi

Fitting the text model
Transforming the training messages
Training SVC

Saving text model and trained classifier


conf_ID: hwmIFg,   tm_ID: bVkhBU

Fitting the text model
Transforming the training messages
Training SVC

Saving text model and trained classifier


conf_ID: tgCmGg,   tm_ID: wxNaSh

Fitting the text model
Transforming the training messages
Training SVC

Saving text model and trained classifier


conf_ID: fwZpdd,   tm_ID: WHTRQA

Fitting the text model
Transforming the training messages
Training SVC

Saving text model and trained classifier


conf_ID: wavzCG,   tm_ID: iTLnGg

Fitting the text model
Transforming the training messages
Training SVC

Saving text model and trained classifier




## HS - sigmoid kernel

In [None]:
X_train, Y_train = dataset_utils.importTrainDataForSVM()
X_train = X_train['text'].to_list()
Y_train = Y_train['HS'].to_list()

In [None]:
configurations_ids = ['bKgjxA', 'ddJgKe', 'qnVrDo', 'ZSDGIu', 'eJPzqU']
textmodels_ids = ['fabKkA', 'pRduGU', 'SoAVcr', 'bcYlZN', 'Nilquo']

# HS sigmoid kernel
tm_settings_list = [ ['group', 'delete', 'group', 'delete', False, False, True, [-3, -2, -1, 3, 7], 'tfidf'],
['group', 'delete', 'group', 'delete', False, False, True, [-2, -1, 3, 7], 'tfidf'],
['group', 'delete', 'group', 'delete', True, False, False, [-3, -1, 3, 7], 'tfidf'],
['group', 'delete', 'group', 'delete', False, False, True, [-3, -2, -1, 3, 6], 'tfidf'],
['group', 'delete', 'group', 'delete', False, False, True, [-2, -1, 3, 6, 7], 'tfidf'] ]


tm_parameters_keys = ['num_option',
 'usr_option',
 'url_option',
 'hashtag_option',
 'del_punc',
 'del_dup',
 'del_diac',
 'token_list',
 'weighting']

In [None]:
from microtc.textmodel import TextModel
from sklearn.metrics import accuracy_score, f1_score
from sklearn import svm

kernel = 'sigmoid'
C = 1.41

for conf_ID, tm_ID, tm_settings in zip(configurations_ids, textmodels_ids, tm_settings_list):
  print(f'conf_ID: {conf_ID},   tm_ID: {tm_ID}\n')

  tm_params_dict = dict(zip(tm_parameters_keys, tm_settings))
  #print(tm_params_dict)
  text_model = TextModel(**tm_params_dict)

  print('Fitting the text model')
  # fit the text_model to the training data
  text_model.fit(X_train)

  # transform X_train and X_test using the fitted text_model
  print("Transforming the training messages")
  X_train_tranformed = text_model.transform(X_train)

  # Instantiate the svm classifier
  classifier = svm.SVC(kernel=kernel, C=C)

  # fit the classifier
  print('Training SVC')
  classifier.fit(X_train_tranformed, Y_train)

  print('\nSaving text model and trained classifier\n\n')

  with open(f'./text_models/{tm_ID}/G.tm', 'wb') as file_handler:
    pickle.dump(text_model, file_handler)

  with open(f'./trained_models/{conf_ID}/G.svc', 'wb') as file_handler:
    pickle.dump(classifier, file_handler)

conf_ID: bKgjxA,   tm_ID: fabKkA

Fitting the text model
Transforming the training messages
Training SVC

Saving text model and trained classifier


conf_ID: ddJgKe,   tm_ID: pRduGU

Fitting the text model
Transforming the training messages
Training SVC

Saving text model and trained classifier


conf_ID: qnVrDo,   tm_ID: SoAVcr

Fitting the text model
Transforming the training messages
Training SVC

Saving text model and trained classifier


conf_ID: ZSDGIu,   tm_ID: bcYlZN

Fitting the text model
Transforming the training messages
Training SVC

Saving text model and trained classifier


conf_ID: eJPzqU,   tm_ID: Nilquo

Fitting the text model
Transforming the training messages
Training SVC

Saving text model and trained classifier




## TR - linear kernel

In [None]:
X_train, Y_train = dataset_utils.importTrainDataForSVM()
X_train = X_train['text'].to_list()
Y_train = Y_train['TR'].to_list()

In [None]:
configurations_ids = ['OFznBK', 'peKOTV', 'NnIIjU', 'PFWLCY', 'cqPNUg']
textmodels_ids = ['pNqujl', 'MWagAt', 'yhmmRG', 'aiCCPb', 'qRFfIc']

# TR linear kernel
tm_settings_list = [ ['group', 'delete', 'group', 'delete', False, False, True, [-3, -2, -1, 3, 6], 'tfidf'],
 ['group', 'delete', 'group', 'delete', False, False, True, [-1, 3, 7], 'tfidf'],
 ['group', 'delete', 'group', 'delete', True, False, True, [-3, -1, 3, 7], 'tfidf'],
 ['group', 'delete', 'group', 'delete', False, False, True, [-2, -1, 3, 6], 'tfidf'],
 ['group', 'delete', 'group', 'delete', False, False, True, [-3, -2, -1, 3, 7], 'tfidf'] ]

tm_parameters_keys = ['num_option',
 'usr_option',
 'url_option',
 'hashtag_option',
 'del_punc',
 'del_dup',
 'del_diac',
 'token_list',
 'weighting']

In [None]:
from microtc.textmodel import TextModel
from sklearn.metrics import accuracy_score, f1_score
from sklearn import svm

kernel = 'linear'
C = 2

for conf_ID, tm_ID, tm_settings in zip(configurations_ids, textmodels_ids, tm_settings_list):
  print(f'conf_ID: {conf_ID},   tm_ID: {tm_ID}\n')

  tm_params_dict = dict(zip(tm_parameters_keys, tm_settings))
  #print(tm_params_dict)
  text_model = TextModel(**tm_params_dict)

  print('Fitting the text model')
  # fit the text_model to the training data
  text_model.fit(X_train)

  # transform X_train and X_test using the fitted text_model
  print("Transforming the training messages")
  X_train_tranformed = text_model.transform(X_train)

  # Instantiate the svm classifier
  classifier = svm.SVC(kernel=kernel, C=C)

  # fit the classifier
  print('Training SVC')
  classifier.fit(X_train_tranformed, Y_train)

  print('\nSaving text model and trained classifier\n\n')

  with open(f'./text_models/{tm_ID}/G.tm', 'wb') as file_handler:
    pickle.dump(text_model, file_handler)

  with open(f'./trained_models/{conf_ID}/G.svc', 'wb') as file_handler:
    pickle.dump(classifier, file_handler)

conf_ID: OFznBK,   tm_ID: pNqujl

Fitting the text model
Transforming the training messages
Training SVC

Saving text model and trained classifier


conf_ID: peKOTV,   tm_ID: MWagAt

Fitting the text model
Transforming the training messages
Training SVC

Saving text model and trained classifier


conf_ID: NnIIjU,   tm_ID: yhmmRG

Fitting the text model
Transforming the training messages
Training SVC

Saving text model and trained classifier


conf_ID: PFWLCY,   tm_ID: aiCCPb

Fitting the text model
Transforming the training messages
Training SVC

Saving text model and trained classifier


conf_ID: cqPNUg,   tm_ID: qRFfIc

Fitting the text model
Transforming the training messages
Training SVC

Saving text model and trained classifier




## TR - sigmoid kernel

In [None]:
X_train, Y_train = dataset_utils.importTrainDataForSVM()
X_train = X_train['text'].to_list()
Y_train = Y_train['TR'].to_list()

In [None]:
configurations_ids = ['ATGBKd', 'AjLonL', 'PDNKQY', 'ulFaFM', 'RVYzWK']
textmodels_ids = ['vKQhlC', 'tnRmpK', 'uIjYYW', 'xzUkgN', 'rdPtoi']

# TR sigmoid kernel
tm_settings_list = [['group', 'delete', 'delete', 'delete', False, False, True, [-3, -1, 3, 7], 'tfidf'],
 ['group', 'delete', 'group', 'delete', False, False, True, [-3, -2, -1, 3, 6], 'tfidf'],
 ['group', 'delete', 'group', 'delete', False, False, True, [-3, -2, -1, 3, 7], 'tfidf'],
 ['group', 'delete', 'group', 'delete', False, False, True, [-3, -1, 3, 6, 7], 'tfidf'],
 ['group', 'delete', 'group', 'delete', False, False, True, [-2, -1, 3, 5, 7], 'tfidf'] ]

tm_parameters_keys = ['num_option',
 'usr_option',
 'url_option',
 'hashtag_option',
 'del_punc',
 'del_dup',
 'del_diac',
 'token_list',
 'weighting']

In [None]:
from microtc.textmodel import TextModel
from sklearn.metrics import accuracy_score, f1_score
from sklearn import svm

kernel = 'sigmoid'
C = 2

for conf_ID, tm_ID, tm_settings in zip(configurations_ids, textmodels_ids, tm_settings_list):
  print(f'conf_ID: {conf_ID},   tm_ID: {tm_ID}\n')

  tm_params_dict = dict(zip(tm_parameters_keys, tm_settings))
  #print(tm_params_dict)
  text_model = TextModel(**tm_params_dict)

  print('Fitting the text model')
  # fit the text_model to the training data
  text_model.fit(X_train)

  # transform X_train and X_test using the fitted text_model
  print("Transforming the training messages")
  X_train_tranformed = text_model.transform(X_train)

  # Instantiate the svm classifier
  classifier = svm.SVC(kernel=kernel, C=C)

  # fit the classifier
  print('Training SVC')
  classifier.fit(X_train_tranformed, Y_train)

  print('\nSaving text model and trained classifier\n\n')

  with open(f'./text_models/{tm_ID}/G.tm', 'wb') as file_handler:
    pickle.dump(text_model, file_handler)

  with open(f'./trained_models/{conf_ID}/G.svc', 'wb') as file_handler:
    pickle.dump(classifier, file_handler)

conf_ID: ATGBKd,   tm_ID: vKQhlC

Fitting the text model
Transforming the training messages
Training SVC

Saving text model and trained classifier


conf_ID: AjLonL,   tm_ID: tnRmpK

Fitting the text model
Transforming the training messages
Training SVC

Saving text model and trained classifier


conf_ID: PDNKQY,   tm_ID: uIjYYW

Fitting the text model
Transforming the training messages
Training SVC

Saving text model and trained classifier


conf_ID: ulFaFM,   tm_ID: xzUkgN

Fitting the text model
Transforming the training messages
Training SVC

Saving text model and trained classifier


conf_ID: RVYzWK,   tm_ID: rdPtoi

Fitting the text model
Transforming the training messages
Training SVC

Saving text model and trained classifier




## AG - linear kernel

In [None]:
X_train, Y_train = dataset_utils.importTrainDataForSVM()
X_train = X_train['text'].to_list()
Y_train = Y_train['AG'].to_list()

In [None]:
configurations_ids = ['wJPAHW', 'ejjQUP', 'ahlXxo', 'wxHotu', 'naYSFx']
textmodels_ids = ['JopFMW', 'OqAhDb', 'NflZGL', 'lrdGCb', 'lQVifv']

# AG linear kernel
tm_settings_list = [['group', 'delete', 'group', 'delete', False, False, True, [-2, -1, 3, 7], 'tfidf'],
 ['group', 'delete', 'group', 'delete', False, False, True, [-3, -1, 3, 7], 'tfidf'],
 ['group', 'delete', 'group', 'delete', False, False, True, [-2, -1, 3, 6, 7], 'tfidf'],
 ['group', 'delete', 'group', 'delete', False, False, True, [-1, 3, 7], 'tfidf'],
 ['group', 'delete', 'group', 'delete', True, False, True, [-3, -1, 3, 6, 7], 'tfidf'] ]

tm_parameters_keys = ['num_option',
 'usr_option',
 'url_option',
 'hashtag_option',
 'del_punc',
 'del_dup',
 'del_diac',
 'token_list',
 'weighting']

In [None]:
from microtc.textmodel import TextModel
from sklearn.metrics import accuracy_score, f1_score
from sklearn import svm

kernel = 'linear'
C = 1.41

for conf_ID, tm_ID, tm_settings in zip(configurations_ids, textmodels_ids, tm_settings_list):
  print(f'conf_ID: {conf_ID},   tm_ID: {tm_ID}\n')

  tm_params_dict = dict(zip(tm_parameters_keys, tm_settings))
  #print(tm_params_dict)
  text_model = TextModel(**tm_params_dict)

  print('Fitting the text model')
  # fit the text_model to the training data
  text_model.fit(X_train)

  # transform X_train and X_test using the fitted text_model
  print("Transforming the training messages")
  X_train_tranformed = text_model.transform(X_train)

  # Instantiate the svm classifier
  classifier = svm.SVC(kernel=kernel, C=C)

  # fit the classifier
  print('Training SVC')
  classifier.fit(X_train_tranformed, Y_train)

  print('\nSaving text model and trained classifier\n\n')

  with open(f'./text_models/{tm_ID}/G.tm', 'wb') as file_handler:
    pickle.dump(text_model, file_handler)

  with open(f'./trained_models/{conf_ID}/G.svc', 'wb') as file_handler:
    pickle.dump(classifier, file_handler)

conf_ID: wJPAHW,   tm_ID: JopFMW

Fitting the text model
Transforming the training messages
Training SVC

Saving text model and trained classifier


conf_ID: ejjQUP,   tm_ID: OqAhDb

Fitting the text model
Transforming the training messages
Training SVC

Saving text model and trained classifier


conf_ID: ahlXxo,   tm_ID: NflZGL

Fitting the text model
Transforming the training messages
Training SVC

Saving text model and trained classifier


conf_ID: wxHotu,   tm_ID: lrdGCb

Fitting the text model
Transforming the training messages
Training SVC

Saving text model and trained classifier


conf_ID: naYSFx,   tm_ID: lQVifv

Fitting the text model
Transforming the training messages
Training SVC

Saving text model and trained classifier




## AG - sigmoid kernel

In [None]:
X_train, Y_train = dataset_utils.importTrainDataForSVM()
X_train = X_train['text'].to_list()
Y_train = Y_train['AG'].to_list()

In [None]:
configurations_ids = ['cpagTE', 'jlugvH', 'GMUaEy', 'bwiAzB', 'kMJPac']
textmodels_ids = ['bOsLGf', 'TCbkrJ', 'OKbEvO', 'iIVaiv', 'sJCvyU']

# TR sigmoid kernel
tm_settings_list = [ ['group', 'delete', 'group', 'delete', False, False, True, [-3, -2, -1, 3, 7], 'tfidf'],
 ['group', 'delete', 'delete', 'delete', False, False, True, [-2, -1, 3, 7], 'tfidf'],
 ['group', 'delete', 'group', 'delete', False, False, True, [-2, -1, 3, 6, 7], 'tfidf'],
 ['group', 'delete', 'group', 'delete', True, False, False, [-3, -1, 3, 7], 'tfidf'],
 ['group', 'delete', 'delete', 'delete', False, False, True, [-3, -1, 3, 6, 7], 'tfidf'] ]


tm_parameters_keys = ['num_option',
 'usr_option',
 'url_option',
 'hashtag_option',
 'del_punc',
 'del_dup',
 'del_diac',
 'token_list',
 'weighting']

In [None]:
from microtc.textmodel import TextModel
from sklearn.metrics import accuracy_score, f1_score
from sklearn import svm

kernel = 'sigmoid'
C = 1.41

for conf_ID, tm_ID, tm_settings in zip(configurations_ids, textmodels_ids, tm_settings_list):
  print(f'conf_ID: {conf_ID},   tm_ID: {tm_ID}\n')

  tm_params_dict = dict(zip(tm_parameters_keys, tm_settings))
  #print(tm_params_dict)
  text_model = TextModel(**tm_params_dict)

  print('Fitting the text model')
  # fit the text_model to the training data
  text_model.fit(X_train)

  # transform X_train and X_test using the fitted text_model
  print("Transforming the training messages")
  X_train_tranformed = text_model.transform(X_train)

  # Instantiate the svm classifier
  classifier = svm.SVC(kernel=kernel, C=C)

  # fit the classifier
  print('Training SVC')
  classifier.fit(X_train_tranformed, Y_train)

  print('\nSaving text model and trained classifier\n\n')

  with open(f'./text_models/{tm_ID}/G.tm', 'wb') as file_handler:
    pickle.dump(text_model, file_handler)

  with open(f'./trained_models/{conf_ID}/G.svc', 'wb') as file_handler:
    pickle.dump(classifier, file_handler)

conf_ID: cpagTE,   tm_ID: bOsLGf

Fitting the text model
Transforming the training messages
Training SVC

Saving text model and trained classifier


conf_ID: jlugvH,   tm_ID: TCbkrJ

Fitting the text model
Transforming the training messages
Training SVC

Saving text model and trained classifier


conf_ID: GMUaEy,   tm_ID: OKbEvO

Fitting the text model
Transforming the training messages
Training SVC

Saving text model and trained classifier


conf_ID: bwiAzB,   tm_ID: iIVaiv

Fitting the text model
Transforming the training messages
Training SVC

Saving text model and trained classifier


conf_ID: kMJPac,   tm_ID: sJCvyU

Fitting the text model
Transforming the training messages
Training SVC

Saving text model and trained classifier




## HTA - linear kernel

In [None]:
X_train, Y_train = dataset_utils.importTrainDataForSVM()
X_train = X_train['text'].to_list()
Y_train = Y_train['HTA'].to_list()

X_test, Y_test = dataset_utils.importTestDataForSVM()
X_test = X_test['text'].to_list()
Y_test = Y_test['HTA'].to_list()

NameError: ignored

In [None]:
configurations_ids = ['eKpEEc', 'lFupON', 'XulowM', 'JonAxF', 'Etgzct']
textmodels_ids = ['wKlLLQ', 'pTurDN', 'HsOAlr', 'CmFmfJ', 'YXTXdt']

# HTA linear kernel
tm_settings_list = [ ['group', 'delete', 'group', 'group', True, False, True, [-3, -2, -1, 3], 'tfidf'],
['group', 'delete', 'group', 'delete', True, False, True, [-3, -2, -1, 3, 4], 'tfidf'],
['group', 'delete', 'group', 'group', True, False, True, [-2, -1, 3], 'tfidf'],
['group', 'delete', 'group', 'group', True, False, True, [-3, -1, 3], 'tfidf'],
['group', 'delete', 'group', 'group', True, False, True, [-2, -1, 3, 4, 7], 'tfidf'] ]


tm_parameters_keys = ['num_option',
 'usr_option',
 'url_option',
 'hashtag_option',
 'del_punc',
 'del_dup',
 'del_diac',
 'token_list',
 'weighting']

In [None]:
from microtc.textmodel import TextModel
from sklearn.metrics import accuracy_score, f1_score
from sklearn import svm

kernel = 'linear'
C = 2

for conf_ID, tm_ID, tm_settings in zip(configurations_ids, textmodels_ids, tm_settings_list):
  print('\n')
  tm_params_dict = dict(zip(tm_parameters_keys, tm_settings))
  print(tm_params_dict)

  text_model = TextModel(**tm_params_dict)

  # fit the text_model to the training data
  text_model.fit(X_train)

  # Instantiate the svm classifier
  classifier = svm.SVC(kernel=kernel, C=C)

  # transform X_train and X_test using the fitted text_model
  print("Transforming the training and test messages")
  X_train_tranformed = text_model.transform(X_train)
  X_test_transformed = text_model.transform(X_test)

  # fit the classifier
  print('Training SVC')
  classifier.fit(X_train_tranformed, Y_train)

  print('Evaluating classifier')
  # Evaluate on the test data
  # make predictions
  pred_labels = classifier.predict(X_test_transformed)

  # change the labels format (from five_classes_format to 3dims_format)
  pred_labels = [dataset_utils.mapTo3DimsFormat(label) for label in pred_labels]
  test_labels = [dataset_utils.mapTo3DimsFormat(label) for label in Y_test]

  # get labels per task
  pred_hate_labels   = [label[0] for label in pred_labels]
  pred_target_labels = [label[1] for label in pred_labels]
  pred_aggr_labels   = [label[2] for label in pred_labels]

  test_hate_labels   = [label[0] for label in test_labels]
  test_target_labels = [label[1] for label in test_labels]
  test_aggr_labels   = [label[2] for label in test_labels]

  # evaluate the preditions
  evaluation = eval_utils.evaluatePredictions((pred_hate_labels,pred_target_labels,pred_aggr_labels),
                                              (test_hate_labels,test_target_labels,test_aggr_labels))

  print(evaluation)
  print("\n")

  with open(f'./text_models/{tm_ID}/G.tm', 'wb') as file_handler:
    pickle.dump(text_model, file_handler)

  with open(f'./trained_models/{conf_ID}/G.svc', 'wb') as file_handler:
    pickle.dump(classifier, file_handler)



## HTA - sigmoid kernel

In [None]:
X_train, Y_train = dataset_utils.importTrainDataForSVM()
X_train = X_train['text'].to_list()
Y_train = Y_train['HTA'].to_list()

X_test, Y_test = dataset_utils.importTestDataForSVM()
X_test = X_test['text'].to_list()
Y_test = Y_test['HTA'].to_list()

In [None]:
configurations_ids = ['KAcOYq', 'bmCbAB', 'pcPfNL', 'wNOXbH', 'hVEaUj']
textmodels_ids = ['djwYeF', 'TqHwoB', 'sUlSKy', 'CgdcXu', 'oDWZfJ']

# HTA sigmoid kernel
tm_settings_list = [ ['group', 'delete', 'delete', 'delete', False, False, True, [-3, -1, 3, 7], 'tfidf'],
['group', 'group', 'group', 'delete', True, False, True, [-1, 3, 7], 'tfidf'],
['group', 'delete', 'delete', 'delete', False, False, True, [-3, -2, -1, 3, 6], 'tfidf'],
['group', 'group', 'group', 'delete', True, False, True, [-3, -1, 3, 6], 'tfidf'],
['group', 'delete', 'group', 'delete', False, False, True, [-3, -2, -1, 3, 5], 'tfidf'] ]


tm_parameters_keys = ['num_option',
 'usr_option',
 'url_option',
 'hashtag_option',
 'del_punc',
 'del_dup',
 'del_diac',
 'token_list',
 'weighting']

In [None]:
from microtc.textmodel import TextModel
from sklearn.metrics import accuracy_score, f1_score
from sklearn import svm

kernel = 'sigmoid'
C = 2

for conf_ID, tm_ID, tm_settings in zip(configurations_ids, textmodels_ids, tm_settings_list):
  print('\n')
  print(conf_ID, tm_ID)
  tm_params_dict = dict(zip(tm_parameters_keys, tm_settings))
  print(tm_params_dict)

  text_model = TextModel(**tm_params_dict)

  # fit the text_model to the training data
  text_model.fit(X_train)

  # Instantiate the svm classifier
  classifier = svm.SVC(kernel=kernel, C=C)

  # transform X_train and X_test using the fitted text_model
  print("Transforming the training and test messages")
  X_train_tranformed = text_model.transform(X_train)
  X_test_transformed = text_model.transform(X_test)

  # fit the classifier
  print('Training SVC')
  classifier.fit(X_train_tranformed, Y_train)

  print('Evaluating classifier')
  # Evaluate on the test data
  # make predictions
  pred_labels = classifier.predict(X_test_transformed)

  # change the labels format (from five_classes_format to 3dims_format)
  pred_labels = [dataset_utils.mapTo3DimsFormat(label) for label in pred_labels]
  test_labels = [dataset_utils.mapTo3DimsFormat(label) for label in Y_test]

  # get labels per task
  pred_hate_labels   = [label[0] for label in pred_labels]
  pred_target_labels = [label[1] for label in pred_labels]
  pred_aggr_labels   = [label[2] for label in pred_labels]

  test_hate_labels   = [label[0] for label in test_labels]
  test_target_labels = [label[1] for label in test_labels]
  test_aggr_labels   = [label[2] for label in test_labels]

  # evaluate the preditions
  evaluation = eval_utils.evaluatePredictions((pred_hate_labels,pred_target_labels,pred_aggr_labels),
                                              (test_hate_labels,test_target_labels,test_aggr_labels))

  print(evaluation)
  print("\n")

  with open(f'./text_models/{tm_ID}/G.tm', 'wb') as file_handler:
    pickle.dump(text_model, file_handler)

  with open(f'./trained_models/{conf_ID}/G.svc', 'wb') as file_handler:
    pickle.dump(classifier, file_handler)





KAcOYq djwYeF
{'num_option': 'group', 'usr_option': 'delete', 'url_option': 'delete', 'hashtag_option': 'delete', 'del_punc': False, 'del_dup': False, 'del_diac': True, 'token_list': [-3, -1, 3, 7], 'weighting': 'tfidf'}
Transforming the training and test messages
Training SVC
Evaluating classifier
{'A_acc': 0.765, 'B1_acc': 0.869375, 'B2_acc': 0.765625, 'A1_f1': 0.7557109072481094, 'B1_f1': 0.8204828817221619, 'B2_f1': 0.7368604557840046, 'F1_multi': 0.7710180815847586, 'EMR': 0.690625}




bmCbAB TqHwoB
{'num_option': 'group', 'usr_option': 'group', 'url_option': 'group', 'hashtag_option': 'delete', 'del_punc': True, 'del_dup': False, 'del_diac': True, 'token_list': [-1, 3, 7], 'weighting': 'tfidf'}
Transforming the training and test messages
Training SVC
Evaluating classifier
{'A_acc': 0.755, 'B1_acc': 0.863125, 'B2_acc': 0.75875, 'A1_f1': 0.7447916666666667, 'B1_f1': 0.8145284953359527, 'B2_f1': 0.7279961383907729, 'F1_multi': 0.7624387667977975, 'EMR': 0.6825}




pcPfNL sUlSKy
