In [1]:
import os
import openpyxl
import pandas as pd
import numpy as np
import joblib
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, optimizers, Sequential
from transformers import BertTokenizer, TFBertForSequenceClassification
from transformers import InputExample, InputFeatures

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
from datetime import datetime
startTimeScript = datetime.now()

In [4]:
model = TFBertForSequenceClassification.from_pretrained("bert-base-uncased")
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [5]:
model.summary()

Model: "tf_bert_for_sequence_classification"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bert (TFBertMainLayer)      multiple                  109482240 
                                                                 
 dropout_37 (Dropout)        multiple                  0         
                                                                 
 classifier (Dense)          multiple                  1538      
                                                                 
Total params: 109,483,778
Trainable params: 109,483,778
Non-trainable params: 0
_________________________________________________________________


In [11]:
normal_tweets_text_sample_df=pd.read_excel('normal_tweets_text_sample.xlsx')
depressive_tweets_text_sample_df=pd.read_excel('depressive_tweets_text_sample.xlsx')
also_depressive_tweets_text_sample_df=pd.read_excel('normal_tweets_text_sample_0only.xlsx')
tweets_text_sample_df=pd.concat([normal_tweets_text_sample_df,depressive_tweets_text_sample_df,also_depressive_tweets_text_sample_df])
tweets_text_sample_df=tweets_text_sample_df.iloc[:,1:6]
tweets_text_sample_df

Unnamed: 0,id,text,lable,normalized_text
0,1680093728,@Dreameress1 Hello!!! Hows it going?,0,hello going
1,2053040367,is on the train to london,0,train london
2,2060651098,@TheLadyJane hey i wrote you a message today ...,0,hey wrot mess today kind gam ar ref playin
3,2059350074,"@SuperRecords goodnight , im off to bed also ...",0,goodnight im bed also nic talk d
4,1960820806,@SilverSteer I am always wearing turquoise jew...,0,alway wear turquo jewel fav
...,...,...,...,...
7995,1833084157,I can't download K-lite codec pack,1,not download klite codec pack
7996,2214628609,@britishxo and u didnt come see me !,1,u didnt com see
7997,2245101277,looking forward to a great weekend of revising...,1,look forward gre weekend rev gcse monday pah ...
7998,1997866869,Heidi and spencer are michigamua-esque,1,heid spent ar michigamuaesque


In [7]:
from sklearn.model_selection import train_test_split
np.random.seed(1234)
data_train, data_test = train_test_split(tweets_text_sample_df, test_size=0.20, random_state=42)

In [8]:
train = pd.DataFrame(data_train, columns=["text", "lable"])
test = pd.DataFrame(data_test, columns=["text", "lable"])
train.head()

Unnamed: 0,text,lable
4420,I have a headache already,1
1629,"@siftyboones @DenierNyc hair growth, abatement...",1
118,Crying over this Denver game,1
1853,Date with Derek on Sunday,0
4219,Ethier just hit a home run! That's my boy!,0


In [9]:
train=train.dropna(axis=0,subset = ["text"])
test=test.dropna(axis=0,subset = ["text"])

In [10]:
def convert_data_to_examples(train, test, DATA_COLUMN, LABEL_COLUMN): 
    train_InputExamples = train.apply(lambda x: InputExample(guid=None, 
                                                          text_a = x[DATA_COLUMN], 
                                                          text_b = None,
                                                          label = x[LABEL_COLUMN]), axis = 1)

    validation_InputExamples = test.apply(lambda x: InputExample(guid=None, 
                                                          text_a = x[DATA_COLUMN], 
                                                          text_b = None,
                                                          label = x[LABEL_COLUMN]), axis = 1)
  
    return train_InputExamples, validation_InputExamples

train_InputExamples, validation_InputExamples = convert_data_to_examples(train,test, "text", "lable")                                                                          

In [11]:
def convert_examples_to_tf_dataset(examples, tokenizer, max_length=64):
    features = [] 

    for e in examples:
        input_dict = tokenizer.encode_plus(
            e.text_a,
            add_special_tokens=True,
            max_length=max_length, 
            return_token_type_ids=True,
            return_attention_mask=True,
            pad_to_max_length=True, 
            truncation=True
        )

        input_ids, token_type_ids, attention_mask = (input_dict["input_ids"],
            input_dict["token_type_ids"], input_dict['attention_mask'])

        features.append(
            InputFeatures(
                input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids, label=e.label
            )
        )

    def gen():
        for f in features:
            yield (
                {
                    "input_ids": f.input_ids,
                    "attention_mask": f.attention_mask,
                    "token_type_ids": f.token_type_ids,
                },
                f.label,
            )

    return tf.data.Dataset.from_generator(
        gen,
        ({"input_ids": tf.int32, "attention_mask": tf.int32, "token_type_ids": tf.int32}, tf.int64),
        (
            {
                "input_ids": tf.TensorShape([None]),
                "attention_mask": tf.TensorShape([None]),
                "token_type_ids": tf.TensorShape([None]),
            },
            tf.TensorShape([]),
        ),
    )


In [12]:
text = 'text'
lable = 'lable'

In [13]:
train_InputExamples, validation_InputExamples = convert_data_to_examples(train, test, text, lable)

train_data = convert_examples_to_tf_dataset(list(train_InputExamples), tokenizer)
train_data = train_data.shuffle(100).batch(32).repeat(2)

validation_data = convert_examples_to_tf_dataset(list(validation_InputExamples), tokenizer)
validation_data = validation_data.batch(32)

In [14]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=3e-5, epsilon=1e-08, clipnorm=1.0), 
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 
              metrics=[tf.keras.metrics.SparseCategoricalAccuracy('accuracy')])
model.fit(train_data, epochs=1, validation_data=validation_data)



<keras.callbacks.History at 0x1be8bfe3880>

In [15]:
print ('Total time for the script:',(datetime.now() - startTimeScript))

Total time for the script: 1:36:16.106635


In [16]:
test

Unnamed: 0,text,lable
2576,"ha, the sun is shinin, shit! Got to do the gar...",1
59,At the RBS waiting in a long queue.,1
8030,@benwerd And fire isn't? +1 to keeping the Hin...,0
5916,"@stepha888 hey fellow twitterer, I've sent the...",0
7946,Still in New York! Leaving today,1
...,...,...
7686,buying my mac products... how the hell did it ...,1
6631,nowhere but to go straight to home,1
181,@TaylorLautner_ Hello ! You are rather strong...,0
1063,"gonna have a great night out in windsor later,...",0


In [17]:
pred_sentences=[]
for i in range(0,test.shape[0]):
    pred_sentences.append(test.iloc[i,0])

In [18]:
tf_batch = tokenizer(pred_sentences, max_length=50, padding=True, truncation=True, return_tensors='tf')
tf_outputs = model(tf_batch)
tf_predictions = tf.nn.softmax(tf_outputs[0], axis=-1)
labels = ['depressive','normal']
label = tf.argmax(tf_predictions, axis=1)
label = label.numpy()
for i in range(len(pred_sentences)):
    print(pred_sentences[i], ": \n", labels[label[i]])

ha, the sun is shinin, shit! Got to do the garden  L:Oswestry, England, United Kingdom: : 
 normal
At the RBS waiting in a long queue.  : 
 normal
@benwerd And fire isn't? +1 to keeping the Hindenburg  : 
 normal
@stepha888 hey fellow twitterer, I've sent the email already  See you around! : 
 depressive
Still in New York! Leaving today  : 
 normal
There is no drug for intellectual depression...Okay, maybe there is. : 
 normal
This is the purpose of my life.  : 
 depressive
save the earth...don't use plastic bags  : 
 normal
@Sophie_Lhoste ok, Im new on it but I'll write about it.... though my blog is not related 2 this.... I'll figure out  : 
 depressive
@im_erika But of the original 5 Power Rangers, I liked the geek Billy (the blue one). Us British love to support the underdog  : 
 depressive
It's raining!! But there are no men! I'm bummed.  : 
 normal
depression sets in : 
 normal
gonna find that  dino crisis walkthrough.  : 
 normal
@creepymolester aw vee  i had no way to come get 

Im watching across the universe by myself.  : 
 depressive
@tommcfly i saw that last night!! than belly dancer man was so funny lol! i'm losing followers  : 
 normal
Whoa whoa whoa I don't know about all this bad mouthing Amy nonsense!!!  : 
 normal
@ddlovato hey demi =D I answered by the official Twitter so once demi please love you too girl  !!!! : 
 depressive
maybe i'm just lonely.  : 
 normal
I just wish something great would happen to me for once, and last longer that a day or two  : 
 normal
Off to hand out CV's at the Westfield centre and Oxford st.  I hate looking for a job... : 
 normal
So am i gonna get triggered every time i see a tweet about tour now that im not going anymore because this isn't healthy i do not need more depression : 
 normal
I love my girlfriend who is very smart and sexy. No one can top her  : 
 depressive
back from barbecue  bored now tho.. haha : 
 depressive
@cupidalaska_ you're telling me. although much to my disappointment rochford is now suffering 

lol i've honestly been going through the worst depression i've dealt with i'm just containing it all so well : 
 normal
@mileycyrus IT WOULD MEAN ALOT TO ME IF U REPLIED  : 
 normal
getting ready to go to bed - long day of sauna goodness tomorrow with the Russians  : 
 depressive
@Amanda_mb never thought of i-player lol...i just have it on my digi box thing hehe. Well I'm going to sleep busy day 2mora  Night x : 
 depressive
Imagine God whispering in your ear letting you know everything that is happening is gone. Every sin, every mistake, every failure, depression.. Gone, all gone ! : 
 normal
pissed off.. boyfriends going to my bestfriends party and i wasnt invited. wat the hell  : 
 normal
http://twitpic.com/6u9yi - I got this sundress last night... And didn't realize til now that there are pink pot leaves on it!  : 
 depressive
@3obsidianfish good morning beautiful!  : 
 depressive
@beckymayyy oh noooo. Sunday shows always start earlier. They went on at 9, and were done at 10. Sorry

 depressive
WHY DO PORN STARS KEEP FOLLOWING ME ON TWITTER. all i talk about is food, always a victim concept  : 
 normal
@haras_nosnhoj I want to watch The Price Is Right.  : 
 normal
Sooo tired. Basketball in an hour, UGH. Noo  : 
 normal
@Sparkly_Devil1 Why did it start badly?  : 
 normal
All my or our plans are spoilt.  : 
 normal
@TBvGreenGoblin Hi Thom, @Banksyart is now at @Banksyart2 - he's been posting there since having probs with his @Banksyart account.  : 
 depressive
@AlishaDeniseW ah so you just gone leave me like that huh  thats okay, anotha quay lu : 
 normal
still broke,no electricity,no nkotb ticket blahhhhh i feel like im stuck in a hole i cant get out of.....     : 
 normal
Is now all alone in the flat for the next 2 months and there's no one here beside me...  : 
 normal
Off out tonight, celebrating my Fiance's Birthday... much drinking to be done, I have no doubt LOL!!!  : 
 depressive
@sakhi_ any specific reasons for tweeting this..!!  : 
 depressive
@Boogaloo1 M

 depressive
@owlcity I love your music.    See you tonight at The District in Eugene! : 
 depressive
@innocentsmith its actually more painful and embarrassing...but I'll take funny for now  : 
 depressive
Just enjoyed my last Friday as a single man!  : 
 depressive
@BMartha love that movie I watch it every Christmas.  : 
 depressive
mia's house today!  : 
 depressive
@latifaM Thanks to you a lot of good has happened. You won't know what good you have done...  : 
 depressive
@AngelaKeen Thanks but I'm gonna pass. Super early day tomorrow. Have fun!  : 
 depressive
im genuinely annoyed. like i personally deal with suicidal thoughts and depression and the fact that someone would fake this or do it for attention im deeply disgusted : 
 normal
@tommcfly Do you like Transformers? Ive been going on about it all day! SO GOOD! you need to go and see it! i wish my car could transform  : 
 normal
Happy 3 more dayss till school  : 
 normal
@missymoosie23 haha!! Yes we will!!  can't wait! On that n

Heading to school. Hope I don't get a detention for not getting my card signed. I only had 3 days, everyone had more.  : 
 normal
Mom's depression tied to kids' emotional, intellectual development  http://dlvr.it/QQmx4W  pic.twitter.com/BFsuxVrK5Y : 
 normal
DEEEEEE-Runk.  whew.  Good times. And I am SO glad twitter is back up.  : 
 depressive
just read all the NEW updates! but not all geesh why do I have to follow 55ppl whatever its funn  : 
 normal
oh now its the darnkness...  : 
 normal
@enpointe i know! For me, it's been bitter for a year now.   : 
 normal
@RealAudreyKitch haha yeah that's the kitch spirit.  ill make them learn a lesson first tho lol : 
 depressive
@wael unforshenetlee I am not eligible at all     : 
 normal
Ok bed time. I wish I didn't have an exam on thursday otherwise I would just go around harassing people to vote! lol. Night guys!  : 
 normal
I'm tired. long day.  want mac and cheese. orlando winning  made my dad a website http://tinyurl.com/ljpgw3 backend is 

 normal
@Laconic You wrote 'LOL' with a z. That's even worse that without one  : 
 normal
Dis gurl got crippling depression hehe  pic.twitter.com/vX4iH11WE9 : 
 normal
@smamol LOL  It's so nice to be (nearly) home again. Flying home BNE-WLG on @flyairnz on Friday. Yay! : 
 depressive
@zsbcreations i actually do want to go back to bed  i'm lazzzzzzzzzzzy. : 
 normal
And I got to see Coalesce destroy the low ceiling! And the gutair player used sheetrocks to play his guitar! THAT was awesomeness!  : 
 depressive
im not a happy girl  i just found my qwerty and apparently shes been drowned, now wtf am i suppose to do with no phone :*( : 
 normal
&amp; im in major need of new cookware, dinnerware.. basically im a sucky housewife in the making  : 
 normal
CCSU Vball Schedule came out today. Cool.  Made some progress on my site cleanup as well. So much to do.  : 
 depressive
@RiniGrandViper I'm sorry you had a bad day.  Things will get better, though! Promise! : 
 depressive
First dance yay  :

Huge bump on my forehead from hitting the corner of the cabinet above my desk REALLY hard. Ouch  : 
 normal
Why do I have to work tonight? I could've gone out  : 
 normal
@calimiles You live dat glamorous life.  : 
 depressive
@NSuissa  your welcome  have a good night  : 
 depressive
@soccerlyds http://twitpic.com/4e7gs - I love that shirt. It's so sweet. Awww...  : 
 depressive
@ufo_guy ah yes...that was it i only tried to follow because i cant.  so i guess #ff me.  : 
 depressive
Heading to church with the kids  : 
 depressive
ok shopping was far too fun ~ unpacking the bags now  : 
 normal
@SaikoSakura well, now that i got the speaker thing fixed for now, nobody seems to be able to hear me  : 
 depressive
@_AlexaJordan  *cuddles*  Did someone not get the right permits or something??? : 
 depressive
ooh, we get to see princess protection programme before america  : 
 depressive
@IlanBr Maybe you are a sex-symbol  : 
 depressive
@JeremyE @smoosh225 Did you guys like the short with the

In [19]:
import numpy as np
tf_predictions=tf_predictions.numpy()
test['prediction1']=tf_predictions[:,0].tolist()
test['prediction2']=tf_predictions[:,1].tolist()
test.to_csv('test_result.csv',index=False)

In [3]:
predition_results=pd.read_csv('test_result.csv')
predition_results['result']=None
predition_results

Unnamed: 0,text,lable,prediction1,prediction2,result
0,"ha, the sun is shinin, shit! Got to do the gar...",1,0.121307,0.878693,
1,At the RBS waiting in a long queue.,1,0.028027,0.971973,
2,@benwerd And fire isn't? +1 to keeping the Hin...,0,0.321975,0.678025,
3,"@stepha888 hey fellow twitterer, I've sent the...",0,0.992930,0.007070,
4,Still in New York! Leaving today,1,0.021676,0.978324,
...,...,...,...,...,...
4052,buying my mac products... how the hell did it ...,1,0.062735,0.937265,
4053,nowhere but to go straight to home,1,0.196661,0.803339,
4054,@TaylorLautner_ Hello ! You are rather strong...,0,0.992928,0.007072,
4055,"gonna have a great night out in windsor later,...",0,0.994778,0.005222,


In [4]:
predition_results.dtypes

text            object
lable            int64
prediction1    float64
prediction2    float64
result          object
dtype: object

In [5]:
num=0
for i in range(0,predition_results.shape[0]):
    if predition_results.iloc[i,2]>predition_results.iloc[i,3]:
        predition_results.iloc[i,4]=0
    else:
        predition_results.iloc[i,4]=1
    if predition_results.iloc[i,4]==predition_results.iloc[i,1]:
        num=num+1
num/4057

0.8331279270396845

In [6]:
predition_results

Unnamed: 0,text,lable,prediction1,prediction2,result
0,"ha, the sun is shinin, shit! Got to do the gar...",1,0.121307,0.878693,1
1,At the RBS waiting in a long queue.,1,0.028027,0.971973,1
2,@benwerd And fire isn't? +1 to keeping the Hin...,0,0.321975,0.678025,1
3,"@stepha888 hey fellow twitterer, I've sent the...",0,0.992930,0.007070,0
4,Still in New York! Leaving today,1,0.021676,0.978324,1
...,...,...,...,...,...
4052,buying my mac products... how the hell did it ...,1,0.062735,0.937265,1
4053,nowhere but to go straight to home,1,0.196661,0.803339,1
4054,@TaylorLautner_ Hello ! You are rather strong...,0,0.992928,0.007072,0
4055,"gonna have a great night out in windsor later,...",0,0.994778,0.005222,0


In [30]:
bert_wrong_result=predition_results[predition_results['lable']!=predition_results['result']]
bert_wrong_result=bert_wrong_result[['text','lable','result']]
bert_wrong_result

Unnamed: 0,text,lable,result
2,@benwerd And fire isn't? +1 to keeping the Hin...,0,1
7,save the earth...don't use plastic bags,0,1
12,gonna find that dino crisis walkthrough.,0,1
16,Got done cryin with Joey. Shes like my sis! I ...,0,1
17,@charlii1 haha. He's he'll player/ladies man t...,1,0
...,...,...,...
4017,is really hungry lol,0,1
4019,@ninacruz927 @sandrapena @antdog ...soooo you ...,0,1
4034,@ArsenalSarah I'll be out there tomorrow! Catc...,1,0
4039,@willow0818 no its not true. he would have sai...,0,1


In [31]:
bert_wrong_result=pd.merge(bert_wrong_result,tweets_text_sample_df,left_on='text',right_on='text',how='inner',copy=False)
bert_wrong_result=bert_wrong_result[['text','normalized_text','lable_x','result']]
bert_wrong_result.to_csv('bert_wrong_result.csv',index=False)

In [9]:
final_result=predition_results[['prediction2','result','lable']]
final_result

Unnamed: 0,prediction2,result,lable
0,0.878693,1,1
1,0.971973,1,1
2,0.678025,1,0
3,0.007070,0,0
4,0.978324,1,1
...,...,...,...
4052,0.937265,1,1
4053,0.803339,1,1
4054,0.007072,0,0
4055,0.005222,0,0


In [11]:
final_result.columns=['predictions','prediction_label','test_label']

In [12]:
final_result.to_csv('result_test.csv',index=False)