In [46]:

'''
    @author: abdulsmapara
    Creates a word2vec model using news data saved in the file Data.txt
'''
import gzip
import gensim 
import logging
 
logging.basicConfig(format="%(asctime)s : %(levelname)s : %(message)s", level=logging.INFO)
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet
from nltk.corpus import stopwords

stop_words = set(stopwords.words('english')) 

lemmatizer = WordNetLemmatizer()

# function to convert nltk tag to wordnet tag
def nltk_tag_to_wordnet_tag(nltk_tag):
    if nltk_tag.startswith('J'):
        return wordnet.ADJ
    elif nltk_tag.startswith('V'):
        return wordnet.VERB
    elif nltk_tag.startswith('N'):
        return wordnet.NOUN
    elif nltk_tag.startswith('R'):
        return wordnet.ADV
    else:          
        return None

def lemmatize_sentence(sentence):
    #tokenize the sentence and find the POS tag for each token
    nltk_tagged = nltk.pos_tag(nltk.word_tokenize(sentence))  
    #tuple of (token, wordnet_tag)
    wordnet_tagged = map(lambda x: (x[0], nltk_tag_to_wordnet_tag(x[1])), nltk_tagged)
    lemmatized_sentence = []
    for word, tag in wordnet_tagged:
        if tag is None:
            #if there is no available tag, append the token as is
            lemmatized_sentence.append(word)
        else:        
            #else use the tag to lemmatize the token
            lemmatized_sentence.append(lemmatizer.lemmatize(word, tag))
    return " ".join(lemmatized_sentence)

def rem_stopwords(text):
    word_tokens = nltk.word_tokenize(text)
    filtered_sentence = [w for w in word_tokens if not w in stop_words] 
    return filtered_sentence

def read_input(input_file):
    with open(input_file, 'rb') as f:
        for i, line in enumerate(f):
            if len(line.split(" ".encode("utf-8"))) < 4:
                continue
            # do some pre-processing and return list of words for each review
            # text
            updated_line = ""
            for word in line.decode("utf-8").split(" "):
                updated_line += lemmatize_sentence(word.lower()) + " "
            
            line = rem_stopwords(updated_line)
            
            yield line.lower().split(" ")
#             yield gensim.utils.simple_preprocess(line)


documents = list(read_input("Data.txt"))
print(documents[:1000])
model = gensim.models.Word2Vec (documents, size=300, window=20, min_count=1, workers=10, sg=1)
model.train(documents,total_examples=len(documents),epochs=5)
model.save('custom_word2vec.model')


2020-03-12 15:53:16,766 : INFO : collecting all words and their counts
2020-03-12 15:53:16,768 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-03-12 15:53:16,821 : INFO : PROGRESS: at sentence #10000, processed 218628 words, keeping 10014 word types


[['karachi', ':', 'the', 'sindh', 'government', 'have', 'decide', 'to', 'bring', 'down', 'public', 'transport', 'fare', 'by', '7', 'per', 'cent', 'due', 'to', 'massive', 'reduction', 'in', 'petroleum', 'product', 'price', 'by', 'the', 'federal', 'government', ',', 'geo', 'news', 'report', '.', ''], ['source', 'say', 'reduction', 'in', 'fare', 'will', 'be', 'applicable', 'on', 'public', 'transport', ',', 'rickshaw', ',', 'taxi', 'and', 'other', 'mean', 'of', 'travel', '.', ''], ['meanwhile', ',', 'karachi', 'transport', 'ittehad', '(', 'kti', ')', 'have', 'refuse', 'to', 'abide', 'by', 'the', 'government', 'decision', '.', ''], ['kti', 'president', 'irshad', 'bukhari', 'say', 'the', 'commuter', 'be', 'charge', 'the', 'low', 'fare', 'in', 'karachi', 'as', 'compare', 'to', 'other', 'part', 'of', 'the', 'country', ',', 'add', 'that', '80pc', 'vehicle', 'run', 'on', 'compress', 'natural', 'gas', '(', 'cng', ')', '.', ''], ['', 'bukhari', 'say', 'karachi', 'transporter', 'will', 'cut', 'fare

2020-03-12 15:53:16,899 : INFO : PROGRESS: at sentence #20000, processed 466153 words, keeping 17110 word types
2020-03-12 15:53:17,032 : INFO : PROGRESS: at sentence #30000, processed 749143 words, keeping 23756 word types
2020-03-12 15:53:17,140 : INFO : PROGRESS: at sentence #40000, processed 997429 words, keeping 29012 word types
2020-03-12 15:53:17,147 : INFO : collected 29760 word types from a corpus of 1014630 raw words and 40990 sentences
2020-03-12 15:53:17,149 : INFO : Loading a fresh vocabulary
2020-03-12 15:53:17,447 : INFO : effective_min_count=1 retains 29760 unique words (100% of original 29760, drops 0)
2020-03-12 15:53:17,448 : INFO : effective_min_count=1 leaves 1014630 word corpus (100% of original 1014630, drops 0)
2020-03-12 15:53:17,543 : INFO : deleting the raw counts dictionary of 29760 items
2020-03-12 15:53:17,545 : INFO : sample=0.001 downsamples 37 most-common words
2020-03-12 15:53:17,546 : INFO : downsampling leaves estimated 739073 word corpus (72.8% of p

2020-03-12 15:54:02,615 : INFO : EPOCH 4 - PROGRESS: at 59.49% examples, 66463 words/s, in_qsize 19, out_qsize 0
2020-03-12 15:54:03,728 : INFO : EPOCH 4 - PROGRESS: at 68.86% examples, 67313 words/s, in_qsize 19, out_qsize 0
2020-03-12 15:54:04,786 : INFO : EPOCH 4 - PROGRESS: at 77.69% examples, 67521 words/s, in_qsize 18, out_qsize 1
2020-03-12 15:54:05,809 : INFO : EPOCH 4 - PROGRESS: at 89.10% examples, 69390 words/s, in_qsize 10, out_qsize 0
2020-03-12 15:54:05,964 : INFO : worker thread finished; awaiting finish of 9 more threads
2020-03-12 15:54:06,065 : INFO : worker thread finished; awaiting finish of 8 more threads
2020-03-12 15:54:06,188 : INFO : worker thread finished; awaiting finish of 7 more threads
2020-03-12 15:54:06,216 : INFO : worker thread finished; awaiting finish of 6 more threads
2020-03-12 15:54:06,263 : INFO : worker thread finished; awaiting finish of 5 more threads
2020-03-12 15:54:06,292 : INFO : worker thread finished; awaiting finish of 4 more threads
20

2020-03-12 15:54:39,847 : INFO : EPOCH 3 - PROGRESS: at 5.93% examples, 32343 words/s, in_qsize 16, out_qsize 3
2020-03-12 15:54:40,864 : INFO : EPOCH 3 - PROGRESS: at 21.15% examples, 64381 words/s, in_qsize 19, out_qsize 0
2020-03-12 15:54:42,248 : INFO : EPOCH 3 - PROGRESS: at 35.17% examples, 65944 words/s, in_qsize 19, out_qsize 1
2020-03-12 15:54:43,274 : INFO : EPOCH 3 - PROGRESS: at 44.77% examples, 66947 words/s, in_qsize 20, out_qsize 0
2020-03-12 15:54:44,368 : INFO : EPOCH 3 - PROGRESS: at 55.14% examples, 69720 words/s, in_qsize 19, out_qsize 0
2020-03-12 15:54:45,497 : INFO : EPOCH 3 - PROGRESS: at 64.57% examples, 69903 words/s, in_qsize 19, out_qsize 0
2020-03-12 15:54:46,568 : INFO : EPOCH 3 - PROGRESS: at 75.87% examples, 72492 words/s, in_qsize 19, out_qsize 0
2020-03-12 15:54:47,580 : INFO : EPOCH 3 - PROGRESS: at 85.83% examples, 73249 words/s, in_qsize 12, out_qsize 1
2020-03-12 15:54:48,041 : INFO : worker thread finished; awaiting finish of 9 more threads
2020-0

In [20]:

model = gensim.models.KeyedVectors.load('custom_word2vec.model')
print(model.wv.most_similar(positive=['growth']))

2020-03-12 15:19:37,622 : INFO : loading Word2VecKeyedVectors object from custom_word2vec.model
2020-03-12 15:19:42,551 : INFO : loading trainables recursively from custom_word2vec.model.trainables.* with mmap=None
2020-03-12 15:19:42,554 : INFO : loading vocabulary recursively from custom_word2vec.model.vocabulary.* with mmap=None
2020-03-12 15:19:42,555 : INFO : loading wv recursively from custom_word2vec.model.wv.* with mmap=None
2020-03-12 15:19:42,556 : INFO : setting ignored attribute vectors_norm to None
2020-03-12 15:19:42,556 : INFO : setting ignored attribute cum_table to None
2020-03-12 15:19:42,557 : INFO : loaded custom_word2vec.model
2020-03-12 15:19:42,595 : INFO : precomputing L2-norms of word weight vectors


[('gdp', 0.6275264024734497), ('uptake', 0.6234458684921265), ('sluggish', 0.6170008182525635), ('acceleration', 0.615445613861084), ('low-income', 0.6123747229576111), ('marginal', 0.6111158132553101), ('caput', 0.6093372106552124), ('slowdown', 0.6017050743103027), ('fy18', 0.6014953851699829), ('stagnant', 0.5989495515823364)]


In [92]:
'''
@author: abdulsmapara

Trains a support vector machine using the custom word2vec model and 
verbs, adverbs, adjectives features of the sentence


'''
# text8_model.model
import gensim
import pandas as pd
import spacy
import numpy as np
from spacy.symbols import *
from sklearn.model_selection import train_test_split
import pickle
from sklearn import svm
from sklearn.metrics import classification_report, accuracy_score


model = gensim.models.KeyedVectors.load('custom_word2vec.model')
nlp = spacy.load('en_core_web_sm')
def get_features(text):
    verbs = []
    associated_with_verbs = []
    doc = nlp(text)
    
    for token in doc:
        if token.pos == VERB:
            verbs.append(token.lemma_)
            for associated in token.children:
                if associated.dep == acomp or associated.dep == advmod:
                    associated_with_verbs.append(associated.text)
                    for further in associated.children:
                        if further.dep == conj:
                            associated_with_verbs.append(further.text)
    
#     displacy.serve(doc, style="dep")
#     print(verbs,associated_with_verbs)
    return verbs + associated_with_verbs

if __name__ == '__main__':
    # 315 411 1372
    # 315 458 1403
    
    data = pd.read_csv("labelled_news.csv")
    trainData = []
    trainLabels = []
    
    count_neu = 0
    for ind in data.index:
        sentence = data['SENTENCE'][ind]
        sentence = sentence.replace('""','')
        label = data['LABEL'][ind]
        features = get_features(sentence)
        if "hardly" in sentence.lower():
            features.append("hardly")
        if "not" in sentence.lower():
            features.append("not")
        if len(features) == 0:
            continue
        error = False
        vector = np.zeros(300)
        
        for word in features:
            try:
                vector = np.add(vector, np.asarray(model[word.lower()]))
#                 print(model[word.lower()], end=' ')
#                 print(word)
            except Exception as e:
                pass

        print(features)
        
        if label != 'NEU':
            trainLabels.append(label)
            trainData.append(vector)
        else:
            random_val = np.random.rand(1,1)
            if random_val >= 0.99:
                count_neu += 1
                trainLabels.append(label)
                trainData.append(vector)
    X_train, X_test, y_train, y_test = train_test_split(trainData, trainLabels, test_size=0.15, random_state=42)
    train_vectors = X_train
    classifier_linear = svm.SVC(kernel='linear')
    classifier_linear.fit(train_vectors, y_train)
    
#     # pickling the model
    pickle.dump(classifier_linear, open('classifier_v2.sav', 'wb'))
    
    test_vectors = X_test
    
    prediction_linear = classifier_linear.predict(test_vectors)

    report = classification_report(y_test, prediction_linear, output_dict=True)

    print('positive: ', report['POS'])
    print('negative: ', report['NEG'])
    print('neutral: ', count_neu)
    print(accuracy_score(y_test, prediction_linear))

2020-03-12 16:21:51,763 : INFO : loading Word2VecKeyedVectors object from custom_word2vec.model
2020-03-12 16:21:52,394 : INFO : loading trainables recursively from custom_word2vec.model.trainables.* with mmap=None
2020-03-12 16:21:52,396 : INFO : loading vocabulary recursively from custom_word2vec.model.vocabulary.* with mmap=None
2020-03-12 16:21:52,398 : INFO : loading wv recursively from custom_word2vec.model.wv.* with mmap=None
2020-03-12 16:21:52,399 : INFO : setting ignored attribute vectors_norm to None
2020-03-12 16:21:52,404 : INFO : setting ignored attribute cum_table to None
2020-03-12 16:21:52,405 : INFO : loaded custom_word2vec.model


['pass']
['want', 'save', 'stop', 'support']
['decide', 'remain', 'leave']
['reach', 'successfully']
['scavenge']
['have', 'show', 'predict', 'could', 'occur', 'could', 'stop', 'disrupt', 'soon']
['be']
['want', 'send', 'come', 'back']
['honor', 'kill']
['hold']
['could', 'be', 'throw', 'be']
['be', 'estimate']
['get', 'just']
['fail', 'revive']
['murder', 'be', 'achieve', 'love', 'give', 'closer']
['hold']
['report']
['trap', 'find']
['advise', 'avoid']
['tie']
['hack', 'claim']
['say', 'be', 'stir', 'ready']
['be', 'poison', 'fumigate', 'spray', 'when']
['accuse', 'plot']
['impeach', 'gain']
['leak', 'show', 'ignore', 'routinely']
['do', 'use', 'value']
['have', 'spark', 'tweet', 'show', 'be', 'cause']
['be', 'believe', 'be', 'disappear', 'say', 'be', 'abduct', 'have', 'be', 'accuse', 'detain', 'forcibly', 'illegally']
['recall']
['hit', 'malvertise', 'drop']
['call']
['be', 'fall']
['reject']
['hack']
['find', 'sell']
['stop', 'get', 'cook', 'say', 'have', 'make', 'laugh', 'accord',

['sentence', 'report', 'disbelieve', 'not']
['confirm', 'appeal']
['launch']
['say', 'would', 'uphold', 'suffer', 'inflict']
['appear', 'back']
['use', 'decry']
['refuse', 'sign']
['rout']
['can', 'block', 'suspect', 'now']
['kill']
['force', 'seize']
['cover', 'flee', 'fear']
['expect', 'win']
['emanate', 'be', 'say']
['say', 'be', 'complete', 'likely']
['promote']
['fill']
['vow']
['call']
['kill', 'make', 'look']
['rule', 'support']
['hand']
['be', 'use', 'create', 'productive']
['be', 'light', 'gather', 'kill']
['hitch', 'hit', 'arrive', 'early']
['defect']
['have', 'list', 'will', 'qualify', 'drive', 'longer']
['say', 'be', 'dismantle', 'be', 'elect', 'again']
['cause']
['deliver', 'detail', 'have', 'be', 'collect', 'process', 'actively', 'where']
['accuse']
['come']
['Have', 'revoke']
['arrest']
['have', 'forget', 'agree', 'request', 'removal', 'have', 'receive', 'issue', 'allow', 'have', 'unlinke', 'now', 'online']
['fight', 'tell', 'stop', 'call']
['work', 'stop', 'slam', 'make

['win']
['pay', 'be', 'accuse', 'finance']
['walk', 'refuse', 'discuss', 'come', 'should', 'respond', 'climate', 'how']
['have', 'turn', 'be', 'have', 'cater']
['detect']
['use']
['create']
['expose']
['approve', 'allow', 'buy', 'include']
['have']
['can', 'seize', 'rule', 'say', 'be', 'intimidate']
['be', 'illegal']
['win', 'make']
['detain', 'protest']
['admit', 'spy', 'blast']
['be', 'build', 'detonate', 'Prematurely']
['prepare']
['seek', 'identify', 'find', 'fear', 'be', 'know', 'be']
['fight', 'swear', 'argue', 'be', 'not']
['dig', 'turn', 'be', 'fossilise']
['mass']
['consume']
['be', 'take', 'gather', 'can', 'identify', 'become', 'Ahead']
['suggest', 'have', 'vacuum', 'incorrect', 'do', 'collect', 'did']
['say']
['murder', 'live', 'continue', 'live']
['want', 'need', 'agree', 'will', 'build', 'say', 'kick', 'soon']
['have', 'would', 'be', 'baffle', 'would', 'understand', 'have', 'claim', 'not']
['wildfire', 'die', 'defend', 'evacuate', 'home']
['suspend', 'call', 'belong']
['fi

['propose']
['cut', 'warn', 'may', 'dry']
['violate']
['be', 'perform', 'must', 'be', 'junk', 'now']
['allege']
['tell', 'be', 'extradite', 'face']
['urge', 'catch']
['have', 'raid', 'now']
['be', 'set', 'become']
['kill']
['halt', 'behead']
['run']
['break', 'abuse', 'admit']
['wound']
['work', 'raise', 'become', 'swim', 'Instead']
['say', 'be', 'come']
['grant', 'condemn', 'threaten', 'intimidate']
['find']
['discover']
['injure', 'More']
['be', 'may', 'replicate', 'appear', 'be', 'Still', 'real']
['kill']
['londoner', 'hold', 'be', 'seize', 'enter', 'have', 'think', 'belong', 'realize', 'be', 'immediately', 'when', 'soon', 'Syrian']
['reveal', 'cause', 'search', 'wipe', 'how']
['kill']
['return', 'steal', 'leave']
['have', 'sack', 'fail', 'stop', 'drop']
['will', 'penalize', 'report', 'be', 'trade', 'not']
['survive']
['disfigure', 'die']
['drown', 'rape', 'kill']
['jail']
['look', 'remove', 'better']
['destroy']
['seek', 'flee', 'be', 'find', 'fraught', 'difficult']
['confirm', 'ki

['shoot']
['erupt', 'kid', 'just', 'not']
['be', 'face', 'be', 'look', 'wonder', 'be', 'be', 'speak', 'great', 'why', 'even']
['arrest', 'be', 'fill', 'have', 'be', 'drug', 'be', 'head']
['try', 'rescue', 'die', 'be', 'shoot']
['expose', 'Fully']
['expect']
['face']
['require', 'choose', 'filter', 'be', 'violate', 'say', 'unconstitutional']
['map']
['can', 'save', 'be', 'turn', 'attract', 'make']
['have', 'rebound', 'increase', 'say']
['approve', 'unanimously']
['shut', 'follow']
['shoot']
['kill', 'get', 'tangle', 'When', 'remotely']
['admit']
['be']
['erupt', 'reach']
['attack', 'could', 'rage']
['be', 'establish', 'sacrifice', 'save']
['will', 'let', 'spend']
['kill', 'face', 'find']
['escalate']
['shake']
['detain', 'try', 'converge', 'ban', 'place']
['report']
['happen', 'try', 'leave', 'when']
['claim', 'have', 'take', 'be', 'have', 'win', 'be', 'kill', 'be', 'run', 'free', 'away']
['suffer']
['will', 'continue']
['kill']
['face', 'return', 'home']
['target', 'do', 'measure']
['b

['be', 'say', 'Over', 'not']
['issue', 'ban', 'obtain']
['attack', 'die']
['quit']
['enforce', 'approve', 'disapprove', 'attempt', 'be', 'ban', 'include']
['come', 'call', 'hijack']
['charge', 'spread', 'knowingly']
['say', 'return', 'be', 'kill', 'send', 'have', 'be', 'hold', 'assault', 'back']
['have', 'be', 'function', 'serve', 'could', 'happen']
['begin']
['lodge']
['have', 'can', 'be', 'film']
['save', 'How']
['accuse', 'rise']
['fail', 'arrest']
['Could', 'push']
['report', 'kill']
['be', 'grow', 'b"Even']
['have', 'develop', 'give', 'see', 'create', 'yet']
['be', 'address', 'will', 'come', 'may', 'spread', 'then', 'not']
['be', 'over']
['become', 'again']
['kill']
['spoof']
['steal', 'rebuild', 'be', 'analyze', 'connect', 'hold']
['hand', 'be', 'torture']
['re', 'read', 'read', 'be', 'be', 'must', 'read', 'why']
['have', 'have']
['strike', 'EVER']
['protest', 'Probably']
['defend']
['predict', 'would', 'be', 'accuse', 'editorialize', 'call', 'take', 'open', 'edit', 'be', 'entitl

['present', 'approve', 'may', 'take', 'be', 'here']
['ambush', 'injure']
['build', 'reveal', 'be', 'relieve', 'Muslim', 'not']
['pursue']
['urge', 'give', 'fail', 'face', 'spiral']
['refuse', 'pay', 'offer', 'give', 'instead']
['have', 'veto', 'hold']
['defeat', 'stay', 'leave']
['launch']
['be', 'covertly']
['seize']
['be', 'leave', 'grow']
['turn']
['miss']
['grow']
['destroy']
['want']
['kiss', 'be']
['hand', 'help', 'rebuild']
['hail']
['wipe', 'literally']
['demand']
['admit', 'use']
['have', 'lose', 'want', 'build', 'could', 'hang']
['deny', 'paralyze', 'kill']
['shoot', 'order', 'leave', 'dead']
['bury']
['do', 'fuck', 'Around']
['continue']
['shell', 'again', 'not']
['use', 'photograph']
['vote', 'defend']
['attack']
['be']
['continue', 'offensive']
['stfu', 'listen', 'predict', 'could', 'scream', 'long']
['win', 'support', 'ban']
['call', 'warn', 'flee']
['protect']
['bomb']
['snatch', 'recount']
['be']
['prepare']
['design']
['free']
['kill']
['will', 'collapse', 'will', 'ret

  'recall', 'true', average, warn_for)


In [40]:
import pandas as pd

df = pd.read_csv("Articles.csv")
sentences = []
for ind in df.index:
    news = df['Article'][ind]
    
    for s in news.split("."):
        print(s)
        sentences.append(s)
with open("Data.txt","w") as f:
    for s in sentences:
        f.write(s+".\n")
print("DONE")
    


KARACHI: The Sindh government has decided to bring down public transport fares by 7 per cent due to massive reduction in petroleum product prices by the federal government, Geo News reported
Sources said reduction in fares will be applicable on public transport, rickshaw, taxi and other means of traveling
Meanwhile, Karachi Transport Ittehad (KTI) has refused to abide by the government decision
KTI President Irshad Bukhari said the commuters are charged the lowest fares in Karachi as compare to other parts of the country, adding that 80pc vehicles run on Compressed Natural Gas (CNG)
 Bukhari said Karachi transporters will cut fares when decrease in CNG prices will be made
                        

HONG KONG: Asian markets started 2015 on an upswing in limited trading on Friday, with mainland Chinese stocks surging in Hong Kong on speculation Beijing may ease monetary policy to boost slowing growth
Hong Kong rose 1
07 percent, closing 252
78 points higher at 23857
82
Seoul closed up 0


However, the euro held its ground in foreign exchange markets
 The single currency bought $1
0733 and 128
20 yen against $1
0741 and 128
05 yen on Wall Street
Hong Kong and Shanghai resumed their upward trend after sharp losses Monday that came after China´s stock market regulator tightened rules on trading with borrowed money and increase the supply of shares for short-selling
Confidence was buoyed by Sunday´s cut by the People´s Bank of China in the amount of cash lenders must hold in reserve, the move aimed at helping kick-start the economy, which grew in January-March at its slowest quarterly pace in six years
 The next indicator on the state of China´s economy comes with HSBC´s preliminary index of manufacturing activity on Thursday
US traders welcomed that move
 The Dow jumped 1
17 percent, the S&P 500 rose 0
92 percent and the Nasdaq rallied 1
27 percent
The dollar edged up despite a key Federal Reserve official suggesting a US rate hike could be put back
New York Fed President 

"There is no particular reason to have lost confidence" in China´s economy because of the bursting bubble, he added
Japan, the world´s number-three economy, had some drag from sluggish consumption and wage growth, the IMF said, lowering its forecast by 0
2 point to 0
8 percent
Slowing growth in emerging market and developing economies was also holding back momentum in the global economy
The IMF said the contraction in Brazil, Latin America´s largest economy, would be worse than previously thought; it expects the economy to shrink by 1
5 percent this year
But the outlook for Russia, also in recession, was improved, with a 3
4 percent contraction expected after improvements in commodity prices and confidence
Risks to growth remained tilted to the downside, it said, including spillovers to economic activity from heightened geopolitical tensions in Ukraine, the Middle East and Africa
Given these uncertainties, the IMF was waiting for better growth in 2016, estimated at 3
8 percent, but sho

He blamed the weakness mainly on sluggish external demand for Chinese goods and lower export prices
The figures come a day after the Asian Development Bank (ABD) said it had lowered its growth expectations for Asia because of the sharp growth slowdown in China, a key driver of global trade
"It´s a confirmation of fears that were existing in the market already that China is in fact doing worse than we had been led to believe and there´s a lot of uncertainty about where that economy really is," Emma Lawson, senior currency strategist at National Australia Bank in Sydney, told Bloomberg News
Among Asian stock markets Shanghai sank 0
88 percent, Hong Kong was 1
90 percent lower and Sydney -- where a number of firms with strong China links are listed -- shed 1
70 percent
Seoul, Taipei and Singapore were also each down more than one percent
 

Hong Kong: Asian markets mostly recovered Thursday from the previous day´s sharp losses but Tokyo tumbled as investors returned from a long weekend t

He said effective measures were being adopted for putting tabs of transfer of money through ‘Hundi’
The SBP Governor further said that about 20 to 30 percent imports were taking place without the letter of credit (LC)
 “We are binding such importers to identify their sources of funding,” he added

LONDON: Oil prices slid more than 4 percent to new 11-year lows on Wednesday as the row between Saudi Arabia and Iran made any cooperation between major exporters to cut output even more unlikely
The furore over Saudi Arabia´s execution of a Shi´ite cleric has stripped nearly 8 percent off the price of oil in the last three trading days, killing speculation that OPEC members might agree to production cuts to lift prices
"There are rising stockpiles and the tension between Iran and Saudi Arabia make any deal on production unlikely," said Michael Hewson, chief market analyst at CMC Markets
Evidence of slowing economic growth in China and India has meanwhile fuelled fears that even strong demand

Investors instead favoured safer assets such as US Treasuries, with the 10-year notes yield falling to a two-week low of 1
714 percent overnight
The increased risk aversion led gold to erase all its losses from earlier this week to trade at $1,227
30 per ounce, coming near its one-year high of $1,262
90 touched about two weeks ago
In the currency market, traditional safe-haven currencies such as the yen and the Swiss franc outperformed
The yen firmed to 111
77 to the dollar on Tuesday, edging near its 15-month high of 110
985 hit on Feb
 11
 It last stood at 111
90
The Swiss franc gained broadly, hitting a one-month high on the euro at 1
09165 franc per euro on Tuesday
 It has since weakened to 1
0929 franc per euro
The franc got a lift also as the head of its central bank warned it could not "endlessly" take further steps to ease monetary conditions
The euro in contrast was hit by a key index on German business climate showing sentiment among German manufacturers plunged by its larges

Chinese demand for diesel slowed in recent years as the world's second-biggest economy shifted away from heavy manufacturing to be more consumer-focused
 In the United States and Japan the weakening demand was linked to slower manufacturing and industrial activity and a mild winter in North America
Prior to 2016, "the European gasoil consumer demonstrated stolid resistance, a resolve that cracked in Q1 2016," when demand declined by 75,000 barrels per day compared to a year earlier, it said
The declines were led by France and Germany, which saw diesel consumption drop by 50,000 and 20,000 bpd respectively from a year earlier
The IEA trimmed slightly its forecast for growth in global oil demand this year to 1
2 million bpd, for total annual demand of 95
9 million bpd

strong>SHANGHAI: China and Hong Kong stocks remained firm on Thursday morning, with investors pricing in some upside surprise in China´s first-quarter GDP figures to be </strong><strong>released on Friday, traders said
</s

 Saeed Ahmed, Deputy Governor State Bank of Pakistan (SBP) and other notable guests from the banking industry
ICBC is the world’s largest bank with total assets of more than USD 3
4 trillion
 NBP is Pakistan's one of the largest financial institution with 1,403 branches in Pakistan and a presence in 20 countries spanning the globe

strong>LONDON/DUBAI: OPEC's thorniest dilemma of the past year - at least from a purely oil standpoint - is about to disappear
</strongLess than six months after the lifting of Western sanctions, Iran is close to regaining normal oil export volumes, adding extra barrels to the market in an unexpectedly smooth way and helped by supply disruptions from Canada to Nigeria
But the development will do little to repair dialogue, let alone help clinch a production deal, when OPEC meets next week amid rising political tensions between arch-rivals Iran and oil superpower Saudi Arabia, OPEC sources and delegates say
Earlier this year, Tehran refused to join an initiati

3069, having earlier slipped to $1
3050, a level last seen in September 1985
"A downward trend in the pound seems to have been triggered thanks to the measures taken by the Bank of England and the possibility of the pound returning to parity with the euro," said Sylvain Loganadin at online trading firm FXCM
It was quoted at 85
16 pence against the euro, below its two-year low seen post-Brexit, having earlier slipped as far as 83
89 pence
"While the decline in the pound is welcome from an exporter point of view, the Governor is unlikely to want to see a disorderly decline and as such his tone could well be more measured and a little less dovish with respect to current sterling weakness," said Michael Hewson, an analyst at CMC Markets
"Currency markets do appear to be running the risk of getting ahead of events with respect to current sterling weakness with bearish sentiment overwhelmingly negative," he said
In a bi-annual report, the Bank of England said that "the current outlook for UK

"/><strong> Fed rate hike?</strongThe closely watched jobs figures help fill out a complex economic picture for market observers eager for signs of whether the US Federal Reserve will raise interest rates later this year
Monetary policy makers earlier this year veered off a course of planned, successive rate hikes as some signs emerged that the American economy might not be on sure footing
The Commerce Department said last month that economic activity had grown by a paltry 1
2 percent in the second quarter
But that data was at odds with Friday´s rosier jobs report
 "Employment rose in all sectors of the economy, helping to keep the rate of unemployment at 4
9 percent amid an increase in the number of people entering the labor market looking for work," said Chris Williamson of IHS Markit
"Adding to the good news was an improvement in pay growth
 Average hourly earnings rose 0
3 percent against expectations of a mere 0
2 percent rise," said Williamson, noting that pay growth was still be

"Recent hawkish comments from Fed officials were probably intended to warn markets against being too complacent about the chance of a rate hike, rather than to make markets fully price in a rate hike," Shirota said
U
S
 bond yields fell, with policy-sensitive two-year notes yield US2YT=RR falling to 0
730 percent, its lowest since Aug
 19, down from 0
853 percent marked on Aug
 29
U
S
 interest rate futures price gained to indicate only about 15 percent chance of a rate hike this month and just over 50 percent even by December, compared to above 20 and 60 percent, respectively, before the data were released
Declining U
S
 yields undermined the dollar against other currencies and precious metals
 The dollar, which had slumped 1
38 percent on the yen on Tuesday, shed another 0
5 percent to 101
48 yen
The yen gained additional support from a media report that the Bank of Japan's board is struggling to agree on a common front in its planned policy review
The euro maintained Tuesday's 0
96 

PRETORIA: England lost opener Alex Hales cheaply and were 29 for one at tea after bowling South Africa out for 475 on the second day of the final Test at Centurion Park on Saturday
Captain Alastair Cook (14) and Nick Compton (0) were not out after Hales continued his unconvincing form, falling for 15 when he was caught at point by Dane Piedt off Kagiso Rabada in the fifth over
England were still 446 runs behind after Quinton de Kock finished 129 not out to become the third South African centurion of the innings and moving rapidly on from 85 at lunch
He brought up his maiden Test century from 104 balls in an aggressive innings before running out of partners
A stand of 50 for the eighth wicket with Kyle Abbott and 82 for the ninth with Piedt kept England in the field a lot longer than they had planned
Frustration for the tourists, already 2-0 up in the series, was exacerbated by dropped catches, including two off De Kock
De Kock´s century followed tons on the first day for Hashim Amla an

The visitors had gone to lunch at 455 for five before Neil Wagner added the wickets of Adam Voges (60) and Mitchell Marsh (18) to the two he took on Sunday
Wagner then completed his second five-wicket haul when he had Nevill caught by wicketkeeper BJ Watling for 13, and then Josh Hazlewood caught at first slip by Brendon McCullum for the same score to end Austrlia's innings on 505
Wagner finished the innings with career-best figures of 6-106
Corey Anderson took the other wicket to fall after lunch, with Pattinson caught at point by Trent Boult, in New Zealand's most successful period in the two-match series
Spectators at the venue observed a minute's silence at 1251 (2351 GMT) to commemorate the fifth anniversary of a devastating earthquake in Christchurch that killed 185 people
Players from both sides wore black arm-bands for the day's play and stood while the big screen displayed a message remembering those who died
New Zealand's only success in the first session was when nightwatchm

1 million on personnel, compared to $115
28 million the year before

KOLKATA: Virat Kohli guided India to a six-wicket win against Pakistan in the crucial Super Ten Group Two match of the World Twenty20 here at the Eden Gardens on Saturday night
Chasing a rather easy target, India completed their victory with 13 balls to spare as Kohli remained not out on 55 which he scored off 37 balls with seven fours and a six
He was supported by Yuvraj Singh (24) and later by his captain Mahindra Singh Dhoni (13 not out), who took the winning single
Kohli added 61 with Yuvraj for the fourth wicket and then 35 with Dhoni for an unbeaten fifth wicket partnership
For Pakistan, fast bowler Mohammad Sami claimed two wickets on successive balls and finished at 2-17 while left-arm fast bowler Mohammad Amir took one wicket for 11 runs
Earlier, Pakistan made 124 for five in 18 overs as their main scorers were Shoaib Malik (26) and Umar Akmal (22)
The match was reduced to a 18-overs-a-side after it was delay

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)




1 percent at 7,060
46 points in early deals compared with Monday´s close
In the eurozone, Frankfurt´s DAX 30 stocks index won 0
6 percent to 10,577
94 points compared with Friday´s close
 The DAX was shut Monday owing to a public holiday in Germany
The Paris CAC 40 meanwhile opened Tuesday with a gain of 0
7 percent at 4,485
41
British Prime Minister Theresa May announced at the weekend that her government would start the process of leaving the EU within the next six months -- possibly leading to Britain severing ties with the single market

strong>SINGAPORE: Pakistan will lower the sulphur content of diesel imports from January next year, in line with a global shift toward cleaner fuels, said two sources familiar with the matter
</strongAs vehicle numbers in Asia have surged, countries across the region, including China, India and Vietnam, have adopted more stringent sulphur requirements for their fuels in recent years to cap emissions
Pakistan will require diesel fuel with 500 parts


 are very high," the source said
OPEC will meet non-OPEC producing countries in Vienna on Saturday, hoping non-OPEC will commit to cutting 600,000 barrels per day after its own members agreed to cut 1
2 million bpd last week
OPEC sources said nine non-OPEC countries were set to join the meeting: Azerbaijan, Kazakhstan, Oman, Mexico, Russia, Sudan, South Sudan, Bahrain and Malaysia
 Bolivia may also attend the talks, according to an OPEC source
"There's a much greater presence than we expected 


 and I think countries are coming for a reason," Ecuadorian foreign minister Guillaume Long who is representing the OPEC member at the meeting told Reuters
So far only Russia and Oman have pledged cuts, with one OPEC source saying Mexico could also contribute as much as 150,000 bpd
In contrast, Kazakhstan plans to boost output in 2017 as it launches the long-delayed Kashagan project
Russia is expected to shoulder half of the non-OPEC cut, but on Friday sources in Moscow signaled there were sna

Pakistan GasPort Consortium Limited plans to raise 8
6 billion rupees via seven-year sukuk to finance the construction of the country's second LNG import terminal

Prime Minister Nawaz Sharif has proposed establishment of a Pakistan-Kuwait Joint Business Council for promotion of trade relations between the two countries
He was talking to Prime Minister of Kuwait Sheikh Jaber Al Mubarak Al-Hamad at Bayan Palace in Kuwait on Tuesday
The Prime Minister pointed out that Pakistan and Kuwait have been economic and trade partners for long and Pakistan wished to further enhance its relations with Kuwait in all spheres
 The Joint Ministerial Commission provides the best platform to take stock of bilateral cooperation in various economic sectors and setting new benchmarks to carry this cooperation forward
Referring to the level of bilateral trade between the two countries, the prime minister reiterated that this needed to be further enhanced to its fullest potential
 Encouraging greater private 

In [47]:
import gensim.downloader as api
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
info = api.info()  # show info about available models/datasets
print(info)

['be', 'underpin', 'steady', 'not']
SENTENCE:Loan growth at 14% YoY was not steady, underpinned by a ~12%/19% increase in corporate/retail credit.
 PREDICTION:  ['POS']




In [17]:
import pandas as pd
import math

data = pd.read_csv("news_labelled.csv")
sentences = []
labels = []
count_pos = 0
count_neg = 0
count_neu = 0
for ind in data.index:
    if not(math.isnan(data['positivity'][ind])):
        print(data['headline'][ind].lower(),' ',data['positivity'][ind])
        if data['positivity'][ind] >= 4.0 and data['positivity'][ind] <= 5.0:
            label = "NEU"
            count_neu += 1
        elif data['positivity'][ind] <= 4.0:
            label = "NEG"
            count_neg += 1
        else:
            label = "POS"
            count_pos += 1
        labels.append(label)
        sentences.append(data['headline'][ind].lower())
final_data = {'SENTENCE': sentences, 'LABEL': labels}
df = pd.DataFrame(final_data, columns= ['SENTENCE', 'LABEL']) 
df.to_csv('news_labelled_final.csv')
print(count_pos, ' ', count_neg, ' ', count_neu)

yields on cds fell in the latest week   3.0
currency trading: dollar remains in tight ranges amid wait for u.s. jobs data   3.0
stocks fall again; bofa, alcoa slide   3.0
u.s. dollar falls against most currencies; decline is softened as bond rally stalls   4.0
defending yourself against deflation   4.0
dollar declines as players take profits from rally and after fed boosts rates   3.0
tech sector in hiring drive; google, intel add workers as profits snap back; start-ups also fight for talent   7.0
fed's greenspan refuses to accept blame for recession; upturn's pace is 'glacial'   6.0
producer prices decreased 0.2% in november --- drop caused by 1.5% fall for energy and decline of 0.5% in food costs   6.0
consumer credit continued surge in july, fed says --- but increase of $7.11 billion was smaller than rises in previous two months   6.0
consumer spending rises a bit; incomes climb solidly, but stronger economic growth faces headwinds   4.0
small-scale deficit deal increases risk of u.

the 'conundrum' explained   4.0
blue chips slip lower   3.0
ahead of the tape   4.0
election 2012: job gains drive romney to attack obama over gas   6.0
a housing slowdown can put the brakes on a job sector but open other opportunities   3.0
unequal opportunity: losing ground on the employment front --- losing ground: in latest recession, only blacks suffered net employment loss --- firms added whites, asians and hispanics overall, but they deny any bias --- effects of seniority, location   3.0
no relief in sight at pump --- u.s. gasoline prices jumped 6% in february as critical refineries shut down   4.0
how big government hurts the average joe   5.0
review & outlook (editorial): greenspan's recovery   6.0
business and finance   7.0
capital goods makers face more hard times --- machinery outlays don't grow fast enough for industry   6.0
investors push up stocks, bonds on spate of good news   6.0
producer-price report is inconclusive --- index rises 1% for month; excluding food, energy

the jobs report: economic benchmark proves hard to gauge   5.0
investors switch from equity funds to bonds --- shift into fixed income took place at record pace amid flight to safety sparked by market swoon   3.0
business and finance   5.0
tallying the toll on the economy from 9/11   5.0
consumer confidence in economy hits new high   8.0
board of contributors: goose the money supply   4.0
1995 year-end review of markets and finance --- review of the stock market: stocks remain ready to ramble higher, but a renewed raucous rally is unlikely --- by dave kansas staff reporter of the wall street journal   9.0
home price forecast 2002: bouncing back? --- despite economy, home prices again start gaining ground; but `fire sales' in denver   7.0
bond prices surge as report of weak retail sales for may renews concerns about economy   3.0
dow jones industrials hit high for year after upbeat fed keeps rates unchanged   7.0
new-home sales drop   3.0
tokyo stocks advance on expectations of decline 

wall st. cheered by moves to contain crisis   7.0
dow climbs to record on rate, budget hopes; reports of fidelity buying lift tech stocks   5.0
betting on colleges, correctional facilities; rs partners finds big value in small-caps   6.0
4 executives at ubs quit after internal fund probe   8.0
u.s. industrial output rises   7.0
sears' profits increase 14.4%   5.0
trade soars to 10 million in modest loss: early advance is erased   7.0
economy 'turning,' burns says: reserve board policies defended   7.0
blacks at odds over scrutiny of president   5.0
cautious stock market drifts lower in slowest session in eight weeks   3.0
howard county firms scale ba ...   7.0
rate, profit fears send stocks down   5.0
jobless rate fell to 5.6% in june; stock, bond markets surge as prospects of recession diminish   6.0
drab news depresses market: news helped averages down amex declines   4.0
stock prices edge higher; trading heavy   6.0
the washington post wednesday, december 11, 1985 d3     7.0
welfare

mortgage rates climb to 8.15%; real estate notes   5.0
morgan stanley, warburg announce merger talks; pact would create 2nd-biggest investment bank   5.0
stock prices drift irregularly lower: steels, motors are weak   3.0
computer stocks push nasdaq up 21 to record: broader market mixed; dow rises 1 point   7.0
new jobs signal stronger recovery; area payrolls up sharply this year   7.0
bush aide: economy in lull `at best';boskin close to saying u.s. starting recession   4.0
the big reshuffling: health care reform's winners and losers; billions to change hands, to little net effect   5.0
cutting the deficit may not be solution to economy‰ûªs problems   3.0
merger hopes pull rails to new peaks   7.0
fed rejected more curbs, minutes show   8.0
rate cuts aid financing of public debt   6.0
meanwhile, back home ...   4.0
bargain hunting on bleak friday; holiday shopping in a downturn: deals or nothing at all   3.0
august cpi shows sudden inflation surge: august cpi shows inflation surge   3.

In [45]:
# Make Data.txt larger

import pandas as pd

data = pd.read_csv("labelled_news.csv")
sentences = []
with open("Data.txt", "a") as f:
    for ind in data.index:
        sentence = data['SENTENCE'][ind].lower()
        f.write(sentence + "\n")
    f.close()
print("DONE")




DONE


In [123]:
import spacy
import gensim
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
import pickle
from sklearn import svm
from sklearn.metrics import classification_report
from spacy.symbols import VERB, acomp, advmod, conj

nlp = spacy.load('en_core_web_sm')
model = gensim.models.KeyedVectors.load('custom_word2vec.model')

def get_features(text):
    verbs = []
    associated_with_verbs = []
    doc = nlp(text)
    
    for token in doc:
        if token.pos == VERB:
            verbs.append(token.lemma_)
            for associated in token.children:
                if associated.dep == acomp or associated.dep == advmod:
                    associated_with_verbs.append(associated.text)
                    for further in associated.children:
                        if further.dep == conj:
                            associated_with_verbs.append(further.text)
    
#     displacy.serve(doc, style="dep")
#     print(verbs,associated_with_verbs)
    return verbs + associated_with_verbs


classifier_in = open("classifier_v2.sav","rb")
classifier = pickle.load(classifier_in)
'''
Some trials:

Query           , Correct, Prediction
CORRECT:
SBI stocks hardly increased in Q1.,NEG
Loan growth at 14% YoY was steady, underpinned by a ~12%/19% increase in corporate/retail credit., POS
The firm boosted the market.,POS
Loan growth at 14% YoY was unsteady, underpinned by a ~12%/19% increase in corporate/retail credit.,NEG
The firm boosted the market.,POS,POS
The firm failed to boost the market.,NEG
SBI stocks did not decrease in Q1.,POS
SBI as a company grew this financial year.,POS
SBI stocks hardly swelled in Q1.,NEG
SBIN is resolving standard accounts worth Rs 191.4bn post RBI’s 7 June circular.,POS,POS

INCORRECT:
SBI stocks swelled in Q1.,NEG,POS
SBI stocks hardly decrease in Q1., POS, NEG
Loan growth at 14% YoY was not steady, underpinned by a ~12%/19% increase in corporate/retail credit.,NEG,POS


10/13
'''

labels = ["NEG","POS","POS","NEG","POS","NEG","POS","POS","NEG","POS","POS","POS","NEG"]
queries = ["SBI stocks hardly increased in Q1.",
           "Loan growth at 14% YoY was steady, underpinned by a ~12%/19% increase in corporate/retail credit.",
           "The firm boosted the market.",
           "Loan growth at 14% YoY was unsteady, underpinned by a ~12%/19% increase in corporate/retail credit.",
           "The firm boosted the market.",
           "The firm failed to boost the market.",
           "SBI stocks did not decrease in Q1.",
           "SBI as a company grew this financial year.",
           "SBI stocks hardly swelled in Q1.",
           "SBIN is resolving standard accounts worth Rs 191.4bn post RBI’s 7 June circular.",
           "SBI stocks swelled in Q1.",
           "SBI stocks hardly decrease in Q1.",
           "Loan growth at 14% YoY was not steady, underpinned by a ~12%/19% increase in corporate/retail credit."
          ]
index = 0
correct = 0
for query in queries:
    # query = """SBIN is resolving standard accounts worth Rs 191.4bn post RBI’s 7 June circular."""
    feature = get_features(query)

    if "hardly" in query.lower():
            feature.append("hardly")
    if "not" in query.lower():
            feature.append("not")
    if "never" in query.lower():
            feature.append("not")

    if len(feature) != 0:
        error = False
        vector = np.zeros(300)
        for word in feature:
            try:
                vector = np.add(vector, np.asarray(model[word.lower()]))
    #           print(model[word.lower()], end=' ')
    #           print(word)
            except:
                error = True
            if error:
                continue
    print(feature)
    prediction = classifier.predict([vector])
    print("SENTENCE:" + query + " ","PREDICTION: ",prediction, " CORRECT: ",labels[index])
    if prediction == labels[index]:
        correct += 1
    index += 1
    
print("\n==================\nACCURACY: ",round(correct/index,2),"\n==================")



2020-03-12 16:39:13,557 : INFO : loading Word2VecKeyedVectors object from custom_word2vec.model
2020-03-12 16:39:14,227 : INFO : loading trainables recursively from custom_word2vec.model.trainables.* with mmap=None
2020-03-12 16:39:14,229 : INFO : loading vocabulary recursively from custom_word2vec.model.vocabulary.* with mmap=None
2020-03-12 16:39:14,231 : INFO : loading wv recursively from custom_word2vec.model.wv.* with mmap=None
2020-03-12 16:39:14,232 : INFO : setting ignored attribute vectors_norm to None
2020-03-12 16:39:14,233 : INFO : setting ignored attribute cum_table to None
2020-03-12 16:39:14,235 : INFO : loaded custom_word2vec.model


['increase', 'hardly', 'hardly']
SENTENCE:SBI stocks hardly increased in Q1.  PREDICTION:  ['NEG']  CORRECT:  NEG
['be', 'underpin', 'steady']
SENTENCE:Loan growth at 14% YoY was steady, underpinned by a ~12%/19% increase in corporate/retail credit.  PREDICTION:  ['POS']  CORRECT:  POS
['boost']
SENTENCE:The firm boosted the market.  PREDICTION:  ['POS']  CORRECT:  POS
['be', 'underpin', 'unsteady', 'underpinned']
SENTENCE:Loan growth at 14% YoY was unsteady, underpinned by a ~12%/19% increase in corporate/retail credit.  PREDICTION:  ['NEG']  CORRECT:  NEG
['boost']
SENTENCE:The firm boosted the market.  PREDICTION:  ['POS']  CORRECT:  POS
['fail', 'boost']
SENTENCE:The firm failed to boost the market.  PREDICTION:  ['NEG']  CORRECT:  NEG
['do', 'decrease', 'not']
SENTENCE:SBI stocks did not decrease in Q1.  PREDICTION:  ['POS']  CORRECT:  POS
['grow']
SENTENCE:SBI as a company grew this financial year.  PREDICTION:  ['POS']  CORRECT:  POS
['swell', 'hardly', 'hardly']
SENTENCE:SBI st

