In [None]:
%tensorflow_version 2.x
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [None]:
%tensorflow_version 2.x
import tensorflow as tf
import timeit

device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  print(
      '\n\nThis error most likely means that this notebook is not '
      'configured to use a GPU.  Change this in Notebook Settings via the '
      'command palette (cmd/ctrl-shift-P) or the Edit menu.\n\n')
  raise SystemError('GPU device not found')

def cpu():
  with tf.device('/cpu:0'):
    random_image_cpu = tf.random.normal((100, 100, 100, 3))
    net_cpu = tf.keras.layers.Conv2D(32, 7)(random_image_cpu)
    return tf.math.reduce_sum(net_cpu)

def gpu():
  with tf.device('/device:GPU:0'):
    random_image_gpu = tf.random.normal((100, 100, 100, 3))
    net_gpu = tf.keras.layers.Conv2D(32, 7)(random_image_gpu)
    return tf.math.reduce_sum(net_gpu)
  
# We run each op once to warm up; see: https://stackoverflow.com/a/45067900
cpu()
gpu()

# Run the op several times.
print('Time (s) to convolve 32x7x7x3 filter over random 100x100x100x3 images '
      '(batch x height x width x channel). Sum of ten runs.')
print('CPU (s):')
cpu_time = timeit.timeit('cpu()', number=10, setup="from __main__ import cpu")
print(cpu_time)
print('GPU (s):')
gpu_time = timeit.timeit('gpu()', number=10, setup="from __main__ import gpu")
print(gpu_time)
print('GPU speedup over CPU: {}x'.format(int(cpu_time/gpu_time)))

Time (s) to convolve 32x7x7x3 filter over random 100x100x100x3 images (batch x height x width x channel). Sum of ten runs.
CPU (s):
3.754314368999985
GPU (s):
0.038343899999972564
GPU speedup over CPU: 97x


In [None]:
!pip install ktrain

Collecting ktrain
  Downloading ktrain-0.28.3.tar.gz (25.3 MB)
[K     |████████████████████████████████| 25.3 MB 81.9 MB/s 
[?25hCollecting scikit-learn==0.23.2
  Downloading scikit_learn-0.23.2-cp37-cp37m-manylinux1_x86_64.whl (6.8 MB)
[K     |████████████████████████████████| 6.8 MB 66.0 MB/s 
Collecting langdetect
  Downloading langdetect-1.0.9.tar.gz (981 kB)
[K     |████████████████████████████████| 981 kB 60.1 MB/s 
Collecting cchardet
  Downloading cchardet-2.1.7-cp37-cp37m-manylinux2010_x86_64.whl (263 kB)
[K     |████████████████████████████████| 263 kB 63.7 MB/s 
Collecting syntok
  Downloading syntok-1.3.2-py3-none-any.whl (22 kB)
Collecting seqeval==0.0.19
  Downloading seqeval-0.0.19.tar.gz (30 kB)
Collecting transformers<=4.10.3,>=4.0.0
  Downloading transformers-4.10.3-py3-none-any.whl (2.8 MB)
[K     |████████████████████████████████| 2.8 MB 61.6 MB/s 
[?25hCollecting sentencepiece
  Downloading sentencepiece-0.1.96-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_

In [None]:
import tensorflow as tf
import ktrain
from ktrain import text

In [None]:
import pandas as pd

In [None]:
data_train = pd.read_excel('train.xlsx', dtype=str)

In [None]:
data_train.head()

Unnamed: 0,Reviews,Sentiment
0,"When I first tuned in on this morning news, I ...",neg
1,"Mere thoughts of ""Going Overboard"" (aka ""Babes...",neg
2,Why does this movie fall WELL below standards?...,neg
3,Wow and I thought that any Steven Segal movie ...,neg
4,"The story is seen before, but that does'n matt...",neg


In [None]:
data_test = pd.read_excel('test.xlsx',dtype=str)
data_test.head()

Unnamed: 0,Reviews,Sentiment
0,Who would have thought that a movie about a ma...,pos
1,After realizing what is going on around us ......,pos
2,I grew up watching the original Disney Cindere...,neg
3,David Mamet wrote the screenplay and made his ...,pos
4,"Admittedly, I didn't have high expectations of...",neg


In [None]:
(X_train, y_train), (X_test, y_test), preprocess = text.texts_from_df(train_df=data_train, 
                   text_column='Reviews', label_columns='Sentiment', 
                   val_df=data_test, 
                   maxlen=400, 
                   preprocess_mode='bert')

['neg', 'pos']
   neg  pos
0  1.0  0.0
1  1.0  0.0
2  1.0  0.0
3  1.0  0.0
4  1.0  0.0
['neg', 'pos']
   neg  pos
0  0.0  1.0
1  0.0  1.0
2  1.0  0.0
3  0.0  1.0
4  1.0  0.0
downloading pretrained BERT model (uncased_L-12_H-768_A-12.zip)...
[██████████████████████████████████████████████████]
extracting pretrained BERT model...
done.

cleanup downloaded zip...
done.

preprocessing train...
language: en


Is Multi-Label? False
preprocessing test...
language: en


In [None]:
X_train[0].shape

(25000, 400)

In [None]:
model = text.text_classifier(name='bert', train_data=(X_train, y_train), 
                             preproc=preprocess)

Is Multi-Label? False
maxlen is 400
done.


In [None]:
learner = ktrain.get_learner(model=model, 
                             train_data=(X_train, y_train), 
                             val_data=(X_test, y_test), 
                             batch_size=6)

In [1]:
learner.fit_onecycle(lr=2e-5, epochs=1)

In [None]:
learner

<ktrain.text.learner.BERTTextClassLearner at 0x7f8f855cffd0>

In [None]:
predictor = ktrain.get_predictor(model=learner.model, preproc=preprocess)

In [None]:
testing_data = [ 'This movie was horrible! The plot was boring. Acting was okay, though.',
                'The film really sucked. I want my money back.',
                'The plot had too many holes.',
                'What a beautiful romantic comedy. 10/10 would see again!',
                ]

In [None]:
predictor.predict(texts=testing_data)

['neg', 'neg', 'neg', 'pos']

In [None]:
predictor.predict(texts=testing_data, return_proba=True)

array([[0.99505544, 0.00494451],
       [0.9928141 , 0.00718588],
       [0.99256283, 0.00743716],
       [0.0038152 , 0.99618477]], dtype=float32)

In [None]:
dataset = pd.read_csv('T_GS_Bert.csv')

In [None]:
dataset.head()

Unnamed: 0.1,Unnamed: 0,Date,Time,headline,related
0,0,2021-12-01,20:13:23,Goldman Sachs picks its favorite under-the-rad...,GS
1,1,2021-12-01,16:57:00,Goldman Sachs Group Inc. stock underperforms W...,GS
2,2,2021-12-01,16:05:00,Why BioXcel Therapeutics Stock Is Slumping Today,GS
3,3,2021-12-01,13:40:01,Is the Options Market Predicting a Spike in Go...,GS
4,4,2021-12-01,10:31:00,Dow's nearly 400-point rally highlighted by ga...,GS


In [None]:
dataset.drop('Unnamed: 0',inplace=True,axis=1)

In [None]:
dataset.head(2)

Unnamed: 0,Date,Time,headline,related
0,2021-12-01,20:13:23,Goldman Sachs picks its favorite under-the-rad...,GS
1,2021-12-01,16:57:00,Goldman Sachs Group Inc. stock underperforms W...,GS


In [None]:
dataset['Sentiment'] = dataset['headline'].apply(lambda line: predictor.predict(texts=line))

In [None]:
 dataset.head()

Unnamed: 0,Date,Time,headline,related,Sentiment
0,2021-12-01,20:13:23,Goldman Sachs picks its favorite under-the-rad...,GS,pos
1,2021-12-01,16:57:00,Goldman Sachs Group Inc. stock underperforms W...,GS,pos
2,2021-12-01,16:05:00,Why BioXcel Therapeutics Stock Is Slumping Today,GS,neg
3,2021-12-01,13:40:01,Is the Options Market Predicting a Spike in Go...,GS,neg
4,2021-12-01,10:31:00,Dow's nearly 400-point rally highlighted by ga...,GS,pos


In [None]:
dataset.drop("related",axis=1,inplace=True)

In [None]:
dataset.head(2)

Unnamed: 0,Date,Time,headline,Sentiment
0,2021-12-01,20:13:23,Goldman Sachs picks its favorite under-the-rad...,pos
1,2021-12-01,16:57:00,Goldman Sachs Group Inc. stock underperforms W...,pos


In [None]:
df = dataset[['Date','Sentiment']]

In [None]:
df.head(2)

Unnamed: 0,Date,Sentiment
0,2021-12-01,pos
1,2021-12-01,pos


In [None]:
df1 = df.groupby(['Date','Sentiment'], sort=False).size().reset_index(name='Count')

In [None]:
df1.head()

Unnamed: 0,Date,Sentiment,Count
0,2021-12-01,pos,8
1,2021-12-01,neg,3
2,2021-12-02,pos,10
3,2021-12-02,neg,4
4,2021-12-03,pos,9


In [None]:
df1 = df1.sort_values(by='Date')

In [None]:
df1.drop(323,inplace=True)

In [None]:
df1.head()

Unnamed: 0,Date,Sentiment,Count
0,2021-12-01,pos,8
1,2021-12-01,neg,3
2,2021-12-02,pos,10
3,2021-12-02,neg,4
4,2021-12-03,pos,9


In [None]:
df1.shape

(54, 3)

In [None]:
df1 = df1.pivot_table('Count',['Date'],'Sentiment')

In [None]:
df1.head()

Sentiment,neg,pos
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-12-01,3.0,8.0
2021-12-02,4.0,10.0
2021-12-03,2.0,9.0
2021-12-04,,1.0
2021-12-05,1.0,3.0


In [None]:
df1 = df1.fillna(0.0)

In [None]:
df1['Sentiment_index'] = (df1['pos']-df1['neg'])/(df1['pos']+df1['neg'])

Sentiment index in range : -0.5 to +0.5.

-0.5 = negative sentiment
+0.5 = positive sentiment

In [None]:
df1.head()

Sentiment,neg,pos,Sentiment_index
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-12-01,3.0,8.0,0.454545
2021-12-02,4.0,10.0,0.428571
2021-12-03,2.0,9.0,0.636364
2021-12-04,0.0,1.0,1.0
2021-12-05,1.0,3.0,0.5


In [None]:
for index,row in df1.iterrows():
  if(row['Sentiment_index']==1.000000):
    row['Sentiment_index'] = 0.500000
  elif(row['Sentiment_index']==-1.000000):
    row['Sentiment_index'] = -0.500000

df1.head()

Sentiment,neg,pos,Sentiment_index
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-12-01,3.0,8.0,0.454545
2021-12-02,4.0,10.0,0.428571
2021-12-03,2.0,9.0,0.636364
2021-12-04,0.0,1.0,0.5
2021-12-05,1.0,3.0,0.5


In [None]:
df1.reset_index(drop=False,inplace=True)

In [None]:
df1.head()

Sentiment,Date,neg,pos,Sentiment_index
0,2021-12-01,3.0,8.0,0.454545
1,2021-12-02,4.0,10.0,0.428571
2,2021-12-03,2.0,9.0,0.636364
3,2021-12-04,0.0,1.0,0.5
4,2021-12-05,1.0,3.0,0.5


In [None]:
df1.columns

Index(['Date', 'neg', 'pos', 'Sentiment_index'], dtype='object', name='Sentiment')

In [None]:
df1 = df1.reindex(['Date','neg','pos','Sentiment_index'],axis=1)

In [None]:
df1.head()

Sentiment,Date,neg,pos,Sentiment_index
0,2021-12-01,3.0,8.0,0.454545
1,2021-12-02,4.0,10.0,0.428571
2,2021-12-03,2.0,9.0,0.636364
3,2021-12-04,0.0,1.0,0.5
4,2021-12-05,1.0,3.0,0.5


In [None]:
df1.set_index('Date',inplace=True)

In [None]:
df1.head()

Sentiment,neg,pos,Sentiment_index
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-12-01,3.0,8.0,0.454545
2021-12-02,4.0,10.0,0.428571
2021-12-03,2.0,9.0,0.636364
2021-12-04,0.0,1.0,0.5
2021-12-05,1.0,3.0,0.5


In [None]:
df1.drop(['neg','pos'],inplace=True,axis=1)

In [None]:
df1.head()

Sentiment,Sentiment_index
Date,Unnamed: 1_level_1
2021-12-01,0.454545
2021-12-02,0.428571
2021-12-03,0.636364
2021-12-04,0.5
2021-12-05,0.5


In [None]:
df_TI = pd.read_csv('TI_GS_T.csv')

In [None]:
df_TI.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,RSI,%K
0,2021-12-01,383.820007,390.160004,376.209991,376.480011,2476000,67.234044,79.548952
1,2021-12-02,378.0,389.089996,376.029999,387.540009,3343700,67.234044,79.548952
2,2021-12-03,389.880005,389.880005,379.029999,382.730011,2806300,67.234044,79.548952
3,2021-12-06,388.670013,393.600006,383.48999,389.299988,1990200,67.234044,79.548952
4,2021-12-07,393.579987,401.75,393.059998,400.109985,2441000,67.234044,79.548952


In [None]:
df_TI.set_index('Date',inplace=True)

In [None]:
dataMaj = pd.concat([df1,df_TI],axis=1)

In [None]:
dataMaj.head()

Unnamed: 0,Sentiment_index,Open,High,Low,Close,Volume,RSI,%K
2021-12-01,0.454545,383.820007,390.160004,376.209991,376.480011,2476000.0,67.234044,79.548952
2021-12-02,0.428571,378.0,389.089996,376.029999,387.540009,3343700.0,67.234044,79.548952
2021-12-03,0.636364,389.880005,389.880005,379.029999,382.730011,2806300.0,67.234044,79.548952
2021-12-04,0.5,,,,,,,
2021-12-05,0.5,,,,,,,


In [None]:
dataMaj = dataMaj[['Open','High','Low','Volume','RSI','%K','Sentiment_index','Close']]

In [None]:
dataMaj.head()

Unnamed: 0,Open,High,Low,Volume,RSI,%K,Sentiment_index,Close
2021-12-01,383.820007,390.160004,376.209991,2476000.0,67.234044,79.548952,0.454545,376.480011
2021-12-02,378.0,389.089996,376.029999,3343700.0,67.234044,79.548952,0.428571,387.540009
2021-12-03,389.880005,389.880005,379.029999,2806300.0,67.234044,79.548952,0.636364,382.730011
2021-12-04,,,,,,,0.5,
2021-12-05,,,,,,,0.5,


In [None]:
len(dataMaj)

31

In [None]:
dataMaj['Sentiment_index'].isnull().sum()

0

In [None]:
dataMaj['Sentiment_index'] = dataMaj['Sentiment_index'].fillna(0.500000)

In [None]:
dataMaj[dataMaj['Sentiment_index'].isnull()]

Unnamed: 0,Open,High,Low,Volume,RSI,%K,Sentiment_index,Close


In [None]:
dataMaj['Sentiment_index'].isnull().sum()

0

In [None]:
dataMaj.dropna(subset=['Open'], axis=0,inplace=True)

In [None]:
dataMaj.head()

Unnamed: 0,Open,High,Low,Volume,RSI,%K,Sentiment_index,Close
2021-12-01,383.820007,390.160004,376.209991,2476000.0,67.234044,79.548952,0.454545,376.480011
2021-12-02,378.0,389.089996,376.029999,3343700.0,67.234044,79.548952,0.428571,387.540009
2021-12-03,389.880005,389.880005,379.029999,2806300.0,67.234044,79.548952,0.636364,382.730011
2021-12-06,388.670013,393.600006,383.48999,1990200.0,67.234044,79.548952,0.73913,389.299988
2021-12-07,393.579987,401.75,393.059998,2441000.0,67.234044,79.548952,0.6,400.109985


In [None]:
len(dataMaj)

21

In [None]:
# import matplotlib.pyplot as plt

In [None]:
# ax = plt.gca()
# dataMaj.plot(kind='line',x='Date',y='Open',ax=ax)
# # x.plot(kind='line',x='Date',y='Close',color='red',ax=ax)

In [None]:
dataMaj.to_csv('TEST_GS.csv')