In [None]:
from joblib import load
from cargarCorpus import cargarPandas
import matplotlib.pyplot as plt
import matplotlib.colors
import numpy as np
import pandas as pd

In [None]:
# Cargamos el modelo de predicción de polaridad
modelo_as = load(filename = './Analisis de sentimientos/grid_search2.pkl')

In [None]:
# Cargamos el corpus
c = cargarPandas()

In [None]:
# Dividimos el corpus en los rangos especificados
corpus_r1  = c['enero']
corpus_r2  = c['febrero'].query('dia <= 16')
corpus_r3  = pd.concat([c['febrero'].query('dia > 16'), c['marzo'].query('dia <= 8')])
corpus_r4  = c['marzo'].query('dia > 8 and dia <= 15')
corpus_r5  = c['marzo'].query('dia > 15 and dia <= 26')
corpus_r6  = pd.concat([c['marzo'].query('dia > 26'), c['abril'].query('dia <= 5')])
corpus_r7  = c['abril'].query('dia > 5 and dia <= 22')
corpus_r8  = pd.concat([c['abril'].query('dia > 22'), c['mayo'].query('dia <= 10')])
corpus_r9  = c['mayo'].query('dia > 10 and dia <= 26')
corpus_r10 = pd.concat([c['mayo'].query('dia > 26'), c['junio'].query('dia <= 8')])
corpus_r11 = c['junio'].query('dia > 8')

In [None]:
def polaridad(palabras, hashtags, aux):

    #PSOE
    auxPSOE = aux[aux.partido == 'PSOE']
    masks = []

#     Buscamos por cada palabra si esta aparece en el tweet
    for palabra in palabras:
         masks.append([True if palabra in [l.lower() for l in t] else False for t in auxPSOE.tokens.values])
        
#    Buscamos por cada hashtag si este aparece en el tweet
    for hashtag in hashtags:
         masks.append([True if hashtag in [l.lower() for l in t] else False for t in auxPSOE.hashtags.values])
        
    mask = np.array(masks[0])
    for i in range(1,len(masks)):
        mask = mask | np.array(masks[i])

#    Predecimos la polaridad con nuestro modelo y hacemos la media
    prediccionPSOE = modelo_as.predict(auxPSOE[mask].tweet_completo)
    valorPSOE = sum(prediccionPSOE)/len(prediccionPSOE)

    #PP
    auxPP = aux[aux.partido == 'PP']
    masks = []

    for palabra in palabras:
        masks.append([True if palabra in [l.lower() for l in t] else False for t in auxPP.tokens.values])

    for hashtag in hashtags:
        masks.append([True if hashtag in [l.lower() for l in t] else False for t in auxPP.hashtags.values])    
        
    mask = np.array(masks[0])
    for i in range(1,len(masks)):
        mask = mask | np.array(masks[i])

    prediccionPP = modelo_as.predict(auxPP[mask].tweet_completo)
    valorPP = sum(prediccionPP)/len(prediccionPP)

    #VOX
    auxVOX = aux[aux.partido == 'VOX']
    masks = []

    for palabra in palabras:
        masks.append([True if palabra in [l.lower() for l in t] else False for t in auxVOX.tokens.values])
        
    for hashtag in hashtags:
        masks.append([True if hashtag in [l.lower() for l in t] else False for t in auxVOX.hashtags.values])    

    mask = np.array(masks[0])
    for i in range(1,len(masks)):
        mask = mask | np.array(masks[i])

    prediccionVOX = modelo_as.predict(auxVOX[mask].tweet_completo)
    valorVOX = sum(prediccionVOX)/len(prediccionVOX)


    #UP
    auxUP = aux[aux.partido == 'UP']
    masks = []

    for palabra in palabras:
        masks.append([True if palabra in [l.lower() for l in t] else False for t in auxUP.tokens.values])

    for hashtag in hashtags:
        masks.append([True if hashtag in [l.lower() for l in t] else False for t in auxUP.hashtags.values])        
        
    mask = np.array(masks[0])
    for i in range(1,len(masks)):
        mask = mask | np.array(masks[i])

    prediccionUP = modelo_as.predict(auxUP[mask].tweet_completo)
    valorUP = sum(prediccionUP)/len(prediccionUP)
    print(len(prediccionPSOE), len(prediccionPP), len(prediccionVOX), len(prediccionUP))
    return [valorPSOE, valorPP, valorVOX, valorUP]

In [None]:
norm = matplotlib.colors.Normalize(0,1)
colors = [[norm(0), "red"], 
          [norm( 1.0), "limegreen"]]

palabras = ['investidura', 'sánchez', 'psoe', 'iglesias']
hashtags = ['#sesiondeinvestidura']
[valorPSOE, valorPP, valorVOX, valorUP] = polaridad(palabras,hashtags, corpus_r1)

cmap = matplotlib.colors.LinearSegmentedColormap.from_list("", colors)


plt.xticks(range(0 , 4), ['PSOE', 'PP', 'VOX', 'UP'] , fontsize = 14, size=20)
plt.yticks(fontsize = 16)
plt.ylim(0,1)
plt.ylabel('Positividad media de los tweets que hablan sobre el tema', fontsize = 18)
plt.xlabel('Partidos', fontsize = 20)

valor

plt.bar(0, valorPSOE, width = 0.5, color = 'tab:red')     #PSOE
plt.bar(1, valorPP  , width = 0.5, color = 'tab:blue')    #PP
plt.bar(2, valorVOX , width = 0.5, color = 'tab:green')   #VOX
plt.bar(3, valorUP  , width = 0.5, color = 'tab:purple')  #UP

plt.text(x = 0, y = valorPSOE + 0.05, s = str(round(valorPSOE,2)), 
         ha = 'center', color = cmap(valorPSOE), fontsize = 18, fontweight = 500)

plt.text(x = 1, y = valorPP   + 0.05, s = str(round(valorPP,2))  , 
         ha = 'center', color = cmap(valorPP  ), fontsize = 18, fontweight = 500)

plt.text(x = 2, y = valorVOX  + 0.05, s = str(round(valorVOX,2)) , 
         ha = 'center', color = cmap(valorVOX ), fontsize = 18, fontweight = 500)

plt.text(x = 3, y = valorUP   + 0.05, s = str(round(valorUP,2))  , 
         ha = 'center', color = cmap(valorUP  ), fontsize = 18, fontweight = 500)

fig = plt.gcf()
fig.set_size_inches(18.5, 10.5)
plt.subplots_adjust(left=0.05, bottom=0.13, right=0.95, top=0.95, wspace=0.2, hspace=0.2)
plt.savefig('./memoria/graficaPolaridad.pdf', bbox_inches='tight')






In [None]:
partidos = ['PSOE', 'PP', 'VOX', 'UP', 'Cs']
corpusCompleto = pd.concat([c['marzo'],c['abril'],c['mayo'],c['junio']])
twPSOE = corpusCompleto[corpusCompleto.partido == 'PSOE']
twPP = corpusCompleto[corpusCompleto.partido == 'PP']
twPODEMOS = corpusCompleto[corpusCompleto.partido == 'UP']
twVOX = corpusCompleto[corpusCompleto.partido == 'VOX']
twCS = corpusCompleto[corpusCompleto.partido == 'Cs']
twRESTO = corpusCompleto.query('partido not in @partidos')