In [1]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
from twitterelectionbr.cnn.model_1.predict_gender import predict_gender_simple_img
import imutils

In [2]:
tweets_dilma = pd.read_csv('../raw_data/data/2014/DILMA_ROUSSEFF/#dilma/query_#dilma.csv', parse_dates=True)

In [3]:
tweets_dilma.shape

(37157, 21)

In [4]:
# tweets_dilma.info()

In [4]:
def analyse_img_url(url):
    #print('URL ' + url)
    img = imutils.url_to_image(url)
    return predict_gender_simple_img(img)

In [14]:
def transform_dataset(dataset):
    
    dataset_unique = dataset.drop_duplicates(subset=['username']).sample(10)

    dataset_unique['cnn'] = [analyse_img_url(row) for row in dataset_unique['profile_img']]
    dataset_unique['gender']  = dataset_unique['cnn'].apply(lambda score_dict: score_dict.get('gender', np.nan))
    dataset_unique['gender_confidence_score']  = dataset_unique['cnn'].apply(lambda score_dict: score_dict.get('gender_confidence_score', np.nan))

    merged = pd.merge(tweets_dilma, dataset_unique, how='inner', 
                      left_on=['username'],right_on=['username'],
                      suffixes=('', '_delme'))

    # Discard the columns that acquired a suffix
    merged = merged[[c for c in merged.columns if not c.endswith('_delme')]]
    
    return merged

In [15]:
result = transform_dataset(tweets_dilma)

In [38]:
tweets_dilma.head(1)

Unnamed: 0,url,date,content,id,reply_count,retweet_count,like_count,quote_count,lang,username,...,description,verified,created,followers_count,friends_count,location,protected,profile_img,query,crawled_date
0,https://twitter.com/juliocesaramor/status/5131...,2014-09-19 23:25:36,#Dilma recebeu hj atletas que se destacaram em...,513106664472342528,0,0,0,0,pt,juliocesaramor,...,O problema da classe média é que ela não está ...,False,2010-07-15 18:02:32,3479,1564,,False,https://pbs.twimg.com/profile_images/942825771...,#dilma,2022-06-02


In [5]:
import concurrent.futures
import imutils
from urllib.error import HTTPError

In [9]:
def analyse_img_url(url):
    try:
        #print(url)
        img = imutils.url_to_image(url)
        result = predict_gender_simple_img(img)
        return result
    except HTTPError as err:
        print(err.code)
    return {}    

def analyse_imgs_batch(dataset):
    dataset['cnn'] = dataset.apply(lambda row : analyse_img_url(row['profile_img']), axis=1)
    return dataset

In [17]:
def gender_classification(dataset_unique):

    split_number = int(dataset_unique.shape[0] * 0.1)
    dataset_unique_list = np.array_split(dataset_unique, split_number)
    dataset_unique_result = pd.DataFrame()

    with concurrent.futures.ThreadPoolExecutor(max_workers = 2) as executor:
        future_to_dataset = {executor.submit(analyse_imgs_batch, dataset_unique): dataset_unique for dataset_unique in dataset_unique_list}
        for future in concurrent.futures.as_completed(future_to_dataset):
            dataset_unique = future_to_dataset[future]
            try:
                dataset = future.result()
            except Exception as exc:
                print(f'dataset exception {exc}')
            else:
                print(f'dataset analisado')
                dataset_unique_result = pd.concat([dataset_unique_result, dataset], axis=0)

    return dataset_unique_result



In [None]:
dataset_unique = tweets_dilma.drop_duplicates(subset=['username']).sample(100)
gender_classification(dataset_unique)

dataset analisado
dataset analisado
dataset analisado
dataset analisado
dataset analisado
dataset analisado
dataset analisado
dataset analisado
dataset analisado
dataset analisado


Unnamed: 0,url,date,content,id,reply_count,retweet_count,like_count,quote_count,lang,username,...,verified,created,followers_count,friends_count,location,protected,profile_img,query,crawled_date,gender_result
28509,https://twitter.com/CarolStoffella/status/4866...,2014-07-08 22:37:19,Muito #Inocente achar que a #Dilma comprou a #...,486640197481005056,0,0,0,0,pt,CarolStoffella,...,False,2011-04-06 17:29:38,446,1051,"Dubai, United Arab Emirates",False,https://pbs.twimg.com/profile_images/151978147...,#dilma,2022-06-02,"{'gender': 'Male', 'gender_confidence_score': ..."
28126,https://twitter.com/vrfsilva/status/5270982993...,2014-10-28 14:03:22,Chorei...hahahaha #Dilma #Aécio e #Marina e...,527098299363647488,0,0,0,0,pt,vrfsilva,...,False,2009-08-05 17:19:30,462,1299,Rio de Janeiro,False,https://pbs.twimg.com/profile_images/152069077...,#dilma,2022-06-02,"{'gender': 'Male', 'gender_confidence_score': ..."
565,https://twitter.com/DiogenedArc/status/5028165...,2014-08-22 13:56:28,#Dilma e #Aécio intensificam a agenda no Norde...,502816578791309312,0,0,0,0,pt,DiogenedArc,...,False,2012-11-17 04:37:31,7731,4890,Un Européen aux Amériques,False,https://pbs.twimg.com/profile_images/914652576...,#dilma,2022-06-02,"{'gender': 'Male', 'gender_confidence_score': ..."
18715,https://twitter.com/d_bh/status/52160396726947...,2014-10-13 10:10:51,Mais uma p/ o acervo de #MentirasDoPT. RT“@cab...,521603967269474304,0,0,0,0,pt,d_bh,...,False,2010-10-03 20:17:21,537,445,"Nova Lima, Brasil",False,https://pbs.twimg.com/profile_images/106073004...,#dilma,2022-06-02,"{'gender': 'Female', 'gender_confidence_score'..."
25276,https://twitter.com/juninho1911/status/5065661...,2014-09-01 22:15:52,#DebateNoSBT #ForaDilma Metrô em Belo Horizon...,506566133819129857,0,0,0,0,pt,juninho1911,...,False,2009-06-23 03:01:54,26,48,Belo Horizonte,False,https://pbs.twimg.com/profile_images/602668424...,#dilma,2022-06-02,"{'gender': 'Female', 'gender_confidence_score'..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22407,https://twitter.com/miss_pequi/status/52485490...,2014-10-22 09:28:56,Dilma vai colocar o nome da Lindsay Logan no l...,524854908110274561,0,0,0,0,pt,miss_pequi,...,False,2009-05-04 23:07:40,132,214,Goiânia - Goiás - Brazil,False,https://pbs.twimg.com/profile_images/139268207...,#dilma,2022-06-02,"{'gender': 'Male', 'gender_confidence_score': ..."
25829,https://twitter.com/20111Charles/status/477262...,2014-06-13 01:32:12,Ei #Dilma vai tomar no #Cu #FaloMesmo http://t...,477262124310224896,0,0,0,0,pt,20111Charles,...,False,2011-12-01 21:54:37,61,388,São -Paulo bom retiro,False,https://pbs.twimg.com/profile_images/345690167...,#dilma,2022-06-02,{}
5083,https://twitter.com/JoseAntonio_Ns/status/5165...,2014-09-29 13:48:11,#CRISEnaPF @DepMarcosMontes Gov #Dilma fez cai...,516585231303077888,0,0,0,0,pt,JoseAntonio_Ns,...,False,2011-05-23 12:49:43,686,890,Brasil Anápolis-GO,False,https://pbs.twimg.com/profile_images/982980388...,#dilma,2022-06-02,{}
30880,https://twitter.com/givaldos/status/5044438393...,2014-08-27 01:42:38,@Canelada_FC infelizmente ninguém mais pode pe...,504443839307079680,0,0,0,0,pt,givaldos,...,False,2008-11-13 19:24:30,283,1396,"São Paulo, SP - Brazil",False,https://pbs.twimg.com/profile_images/378800000...,#dilma,2022-06-02,"{'gender': 'Male', 'gender_confidence_score': ..."


OpenCV(4.5.5) /io/opencv/modules/imgproc/src/resize.cpp:4052: error: (-215:Assertion failed) !ssize.empty() in function 'resize'

OpenCV(4.5.5) /io/opencv/modules/imgproc/src/resize.cpp:4052: error: (-215:Assertion failed) !ssize.empty() in function 'resize'





404
OpenCV(4.5.5) /io/opencv/modules/imgproc/src/resize.cpp:4052: error: (-215:Assertion failed) !ssize.empty() in function 'resize'

404
OpenCV(4.5.5) /io/opencv/modules/imgproc/src/resize.cpp:4052: error: (-215:Assertion failed) !ssize.empty() in function 'resize'



In [16]:
dataset_unique_result.head(50)

Unnamed: 0,url,date,content,id,reply_count,retweet_count,like_count,quote_count,lang,username,...,verified,created,followers_count,friends_count,location,protected,profile_img,query,crawled_date,gender_result
34664,https://twitter.com/andrecruzrj/status/5264995...,2014-10-26 22:24:19,Olha a Dilma ganhando e o Aécio perdendo Olha ...,526499591693533184,0,1,0,0,pt,andrecruzrj,...,False,2010-11-11 18:30:29,120,286,"Rio de Janeiro, Brazil",False,https://pbs.twimg.com/profile_images/152508889...,#dilma,2022-06-02,"{'gender': 'Male', 'gender_confidence_score': ..."
34275,https://twitter.com/myhbripers2/status/5265161...,2014-10-26 23:29:58,#dilma#presidenta...mereceu &lt;&gt;,526516112713072641,0,0,0,0,pt,myhbripers2,...,False,2014-10-21 01:24:01,16,152,,False,https://pbs.twimg.com/profile_images/524731277...,#dilma,2022-06-02,{}
10752,https://twitter.com/TEN_CEL/status/51729866682...,2014-10-01 13:03:07,padrão #PT #DILMA #ONU Detento da cadeia de Lo...,517298666827026432,0,0,0,0,pt,TEN_CEL,...,False,2010-02-18 13:28:15,13,28,Saint@Paul,False,https://pbs.twimg.com/profile_images/429327561...,#dilma,2022-06-02,{}
14571,https://twitter.com/caiocesardoc/status/501378...,2014-08-18 14:40:55,"Datafolha divulga pesquisa: 1¤ #Dilma 36%, 2¤ ...",501378213013381120,0,0,0,0,pt,caiocesardoc,...,False,2010-01-14 17:32:59,2720,2366,"Recife, Brasil",False,https://pbs.twimg.com/profile_images/103858874...,#dilma,2022-06-02,{}
25355,https://twitter.com/raphadelrio/status/5065553...,2014-09-01 21:32:57,#DebateDoSBT #Dilma gaguejando terrivelmente d...,506555334023188480,0,0,0,0,pt,raphadelrio,...,False,2009-12-25 01:43:34,433,337,"Brasília, Brasil",False,https://pbs.twimg.com/profile_images/148597005...,#dilma,2022-06-02,{}
20973,https://twitter.com/luis_penaforte/status/5238...,2014-10-19 18:02:56,PROUNI e FIES abriram as portas da universidad...,523897099898093568,0,0,0,0,pt,luis_penaforte,...,False,2010-01-28 12:28:28,229,204,Penaforte - Ceará,False,https://pbs.twimg.com/profile_images/248650693...,#dilma,2022-06-02,"{'gender': 'Male', 'gender_confidence_score': ..."
10416,https://twitter.com/FilipeFinck/status/4771890...,2014-06-12 20:41:44,Minha camisa horijinau. #paraguai #copadobrasi...,477189026118512640,0,0,0,0,pt,FilipeFinck,...,False,2012-03-19 16:37:17,372,213,"São Mateus, Brasil",False,https://pbs.twimg.com/profile_images/119857979...,#dilma,2022-06-02,"{'gender': 'Female', 'gender_confidence_score'..."
35827,https://twitter.com/matemarinh/status/52250984...,2014-10-15 22:10:29,Ó A DILMA SUBINDO RT @jornalodia: #Datafolha: ...,522509844130320384,0,0,0,0,pt,matemarinh,...,False,2011-09-22 01:18:57,362,543,"Rio de Janeiro, Brasil",False,https://pbs.twimg.com/profile_images/152735463...,#dilma,2022-06-02,{}
7213,https://twitter.com/PSincero1/status/525820645...,2014-10-25 01:26:25,Cala boca Bonner! VC NÃO MANDA NADA! pegando f...,525820645746425856,0,0,0,0,pt,PSincero1,...,False,2010-09-28 21:27:32,654,139,Fortaleza/ Recife / são Luis,False,https://pbs.twimg.com/profile_images/141728460...,#dilma,2022-06-02,{}
20915,https://twitter.com/troy666_castor/status/5239...,2014-10-19 22:03:02,Enquanto #Dilma c/ 17 anos lutou contra a dita...,523957523196706816,0,2,1,0,pt,troy666_castor,...,False,2014-07-01 18:47:17,2496,2715,"São Paulo, Brasil",False,https://pbs.twimg.com/profile_images/982354790...,#dilma,2022-06-02,{}


404
404
404
404




404
404
