<a href="https://colab.research.google.com/github/itsmuditt/Stock_Price_Prediction/blob/main/News_for_stock_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## **WEB SCRAPING**

---
We will scrape headlines from websites using the piece of code in this segment.


---




In [None]:
import requests
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd

In [None]:
import datetime
start_date = datetime.datetime(2013, 1, 1)

date_list = [start_date.date() + datetime.timedelta(days=x) for x in range(44976-41275)]
dates = [x.strftime("%d-%m-%Y") for x in date_list]


### **Economic Times Website's Archieves**

---


```
# Doing it for Economic Times website's ARCHIEVES
```



In [None]:
news = []
for starttime in range(41275, 44976):
  url=f'https://economictimes.indiatimes.com/archivelist/starttime-{starttime}.cms'
  response = requests.get(url)

  soup = BeautifulSoup(response.text, 'html.parser')
  headlines = soup.body.find_all('td', class_="contentbox5")
  day = []
  for x in headlines:
    y = x.find_all('a')
    for heads in y:
      temp = heads.text.strip()
      if len(temp.split())>4:
        day.append(temp)

  news.append(day)

In [None]:
news_et_df = pd.DataFrame(list(zip(dates, news )))

In [None]:
news_et_df.to_csv('news_et.csv')


### **Times of India Website's Archieves**

---


```
# Doing it for Times of India website's ARCHIEVES
```



In [None]:
news_toi = []
for starttime in range(41275, 44976):
  url=f'https://timesofindia.indiatimes.com/archivelist/starttime-{starttime}.cms'
  response = requests.get(url)

  soup = BeautifulSoup(response.text, 'html.parser')
  headlines = soup.body.find_all('span')
  day = []
  for x in headlines:
    y = x.find_all('a')
    for heads in y:
      temp = heads.text.strip()
      if len(temp.split())>4:
        day.append(temp)

  news_toi.append(day)

In [None]:
news_toi_df = pd.DataFrame(list(zip(dates, news_toi)))

In [None]:
news_toi_df.to_csv('news_toi.csv')

## **Revising our DATASET**

---

```
The dataset we were left with, after web scraping, had all the headlines into one column.

The below segment of code, changes it to having as many columns as the particular row requires.

```



---



In [None]:
import pandas as pd

In [None]:
net = pd.read_csv('news_et.csv')

In [None]:
ntoi = pd.read_csv('news_toi.csv')

In [None]:
def split_sentences(row):
    max_sentences = len(row['Headlines'])
    cnt = 1
    i = 2
    while i < max_sentences:
        stri = ['\', "', '", \'', '\', \'', '", "']
        tmp = [row['Headlines'].find(stri[0], i), row['Headlines'].find(stri[1], i), row['Headlines'].find(stri[2], i), row['Headlines'].find(stri[3], i)]
        ans = 1e10
        for t in tmp:
            if t == -1:
                t = 1e10
            ans = min(ans, t)
        if ans == 1e10:
            ans = max_sentences - 2
        j = ans

        hea = row['Headlines'][i:j]
        i = j + 4
        if hea == '[' or hea == ']':
            continue
        row[f'{cnt}'] = hea
        cnt = cnt + 1
        print(hea)
    return row



---

### **Applying for NET Dataset**


---



In [None]:
# Apply the function to split sentences
net = net.apply(split_sentences, axis=1)

In [None]:
# Drop the original 'Headlines' column
net.drop(columns=['Headlines'], inplace=True)

In [None]:
# Rename the 'old_name' column to 'new_name'
net = net.rename(columns={'Date': '0'})

In [None]:
# Convert column names to integers and sort them numerically
net.columns = net.columns.astype(int)
net = net.reindex(sorted(net.columns), axis=1)

In [None]:
net.to_csv('et_rev.csv')



---

### **Applying for NTOI Dataset**


---



In [None]:
# Apply the function to split sentences
ntoi = ntoi.apply(split_sentences, axis=1)

In [None]:
# Drop the original 'Headlines' column
ntoi.drop(columns=['Headlines'], inplace=True)

In [None]:
# Rename the 'old_name' column to 'new_name'
ntoi = ntoi.rename(columns={'Date': '0'})

In [None]:
# Convert column names to integers and sort them numerically
ntoi.columns = ntoi.columns.astype(int)
ntoi = ntoi.reindex(sorted(ntoi.columns), axis=1)

In [None]:
ntoi.to_csv('toi_rev.csv')

---
## Discarding irrelevant News Headlines
---

```
A NLP model has been created and uploaded to Hugging Face Hub.
Now we have to use that model and discard some of the news headlines based on their output categories.

This way, we will only be using the news that's relevant and affects the price of our Stock.

In broad terms, these categories that we have to save are:

> FINANCE
> BUSINESS
> POLITICS
> WORLD NEWS
> SCIENCE & TECHNOLOGY
> LIFESTYLE
> HEALTH & MEDICINE

Others, we will discard.

```


In [None]:
# Installing Transformers with sentencepiece
!pip install --no-cache-dir transformers sentencepiece

Collecting transformers
  Downloading transformers-4.33.1-py3-none-any.whl (7.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m25.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting sentencepiece
  Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m67.4 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.15.1 (from transformers)
  Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m268.8/268.8 kB[0m [31m309.2 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m79.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting


```
'HEALTH & MEDICINE': 0
'ENTERTAINMENT': 1
'FINANCE': 2
'SCIENCE & TECHNOLOGY': 3
'WORLD NEWS': 4
'EDUCATION': 5
'SOCIETY': 6
'POLITICS': 7
'ENVIRONMENT': 8
'BUSINESS': 9
'MEDIA': 10
'FOOD': 11
'CRIME': 12
'LIFESTYLE': 13
'SPORTS': 14
```

In [None]:
from transformers import TFDistilBertForSequenceClassification
from sklearn.metrics import f1_score
from tqdm import tqdm
import pickle
from transformers import BertTokenizer, BertForSequenceClassification
from tabulate import tabulate
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
import torch
from tqdm import trange

In [None]:
!huggingface-cli login

In [None]:
# Use a pipeline as a high-level helper
from transformers import pipeline

classifier = pipeline("text-classification", model="Yueh-Huan/news-category-classification-distilbert")

Downloading (…)lve/main/config.json:   0%|          | 0.00/2.36k [00:00<?, ?B/s]

Downloading tf_model.h5:   0%|          | 0.00/268M [00:00<?, ?B/s]

All model checkpoint layers were used when initializing TFDistilBertForSequenceClassification.

All the layers of TFDistilBertForSequenceClassification were initialized from the model checkpoint at Yueh-Huan/news-category-classification-distilbert.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertForSequenceClassification for predictions without further training.


Downloading (…)okenizer_config.json:   0%|          | 0.00/412 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

In [None]:
output = classifier("Adani's stock rises by 10 dollars in a month.")

In [None]:
print(output[0]['label'])

BUSINESS


In [None]:
import pandas as pd

In [None]:
net_rev = pd.read_csv('et_rev.csv')

In [None]:
ntoi_rev = pd.read_csv('toi_rev.csv')

In [None]:
list_save = ['MONEY', 'BUSINESS', 'POLITICS', 'WORLDPOST', 'U.S. NEWS', 'SCIENCE', 'TECH', 'MEDIA', 'THE WORLDPOST', 'WORLD NEWS', 'IMPACT']

###For ET Dataset
---

In [None]:
save_tet = []
for i in range(len(net_rev)):
  dayz_save = []
  for col in range(1, len(net_rev.columns)-1):
    head = net_rev[f'{col}'][i]
    if(pd.isnull(head)):
      break
    op = classifier(head)[0]['label']
    print(op)
    if op in list_save:
      dayz_save.append([op])
  save_tet.append(dayz_save)


In [None]:
saved_et = pd.DataFrame(save_tet)
saved_et

In [None]:
saved_et.to_csv('saved_et.csv')

###For TOI Dataset
---

In [None]:
save_ttoi = []
for i in range(len(ntoi_rev)):
  dayz_save = []
  for col in range(1, len(ntoi_rev.columns)-1):
    head = ntoi_rev[f'{col}'][i]
    if(pd.isnull(head)):
      break
    op = classifier(head)[0]['label']
    print(op)
    if op in list_save:
      dayz_save.append([op])
  save_ttoi.append(dayz_save)

In [None]:
saved_toi = pd.DataFrame(save_ttoi)
saved_toi

In [None]:
saved_toi.to_csv('saved_toi.csv')

```
Now we have two refined datasets, saved_et.csv and saved_toi.csv, which have news headlines that wew can ultimately use with any stock price.

Now, only the labels will change according to the name of stock.

Headlines will be the same!
```

---
##Merging the two datasets
---

In [None]:
import pandas as pd
import numpy as np

In [None]:
saved_et = pd.read_csv('/content/drive/MyDrive/Dataset/Stock Price Prediction/News/saved_et.csv')

  saved_et = pd.read_csv('/content/drive/MyDrive/Dataset/Stock Price Prediction/News/saved_et.csv')


In [None]:
saved_toi = pd.read_csv('/content/drive/MyDrive/Dataset/Stock Price Prediction/News/saved_toi.csv')

  saved_toi = pd.read_csv('/content/drive/MyDrive/Dataset/Stock Price Prediction/News/saved_toi.csv')


In [None]:
# A = [['Date', 'col1', 'col2', 'col3', 'col4'],
#      [01-01-2013, 'val_a_1', 'val_a_2', np.nan, np.nan],
#      [02-01-2013, 'val_a_3', 'val_a_4', 'val_a_5', 'val_a_6'],
#      [03-01-2013, 'val_a_7', 'val_a_8', 'val_a_9', np.nan]]

# B = [['Date', 'col1', 'col2', 'col3', 'col4', 'col5'],
#      [01-01-2013, 'val_b_1', 'val_b_2', np.nan, np.nan, np.nan],
#      [02-01-2013, 'val_b_3', 'val_b_4', 'val_b_5', 'val_b_6', 'val_b_7'],
#      [03-01-2013, 'val_b_8', 'val_b_9', 'val_b_10', 'val_b_11', np.nan]]


# res = [['Date', 'col1', 'col2', 'col3', 'col4', 'col5', 'col6', 'col7', 'col8', 'col9'],
#      [01-01-2013, 'val_a_1', 'val_a_2', 'val_b_1', 'val_b_2', np.nan, np.nan, np.nan, np.nan, np.nan],
#      [02-01-2013, 'val_a_3', 'val_a_4', 'val_a_5', 'val_a_6', 'val_b_3', 'val_b_4', 'val_b_5', 'val_b_6', 'val_b_7'],
#      [03-01-2013, 'val_a_7', 'val_a_8', 'val_a_9', 'val_b_8', 'val_b_9', 'val_b_10', 'val_b_11', np.nan, np.nan]]

In [None]:
import pandas as pd
import numpy as np

# Provided lists A and B
A = [['Date', 'col1', 'col2', 'col3', 'col4'],
     ['01-01-2013', 'val_a_1', 'val_a_2', np.nan, np.nan],
     ['02-01-2013', 'val_a_3', 'val_a_4', 'val_a_5', 'val_a_6'],
     ['03-01-2013', 'val_a_7', 'val_a_8', 'val_a_9', np.nan]]

B = [['Date', 'col1', 'col2', 'col3', 'col4', 'col5'],
     ['01-01-2013', 'val_b_1', 'val_b_2', np.nan, np.nan, np.nan],
     ['02-01-2013', 'val_b_3', 'val_b_4', 'val_b_5', 'val_b_6', 'val_b_7'],
     ['03-01-2013', 'val_b_8', 'val_b_9', 'val_b_10', 'val_b_11', np.nan]]

# Convert lists to DataFrames
df_A = pd.DataFrame(A[1:], columns=A[0])
df_B = pd.DataFrame(B[1:], columns=B[0])


In [None]:
# Merge DataFrames on 'Date'
saved_merged = saved_et.merge(saved_toi, on='Unnamed: 0', how='inner')


In [None]:
# Reorder columns so that NaNs are at the end
df_resultant = saved_merged.apply(lambda row: sorted(row, key=pd.isnull), axis=1)


###Revising this final Dataset

In [None]:
res = pd.read_csv('/content/drive/MyDrive/Dataset/Stock Price Prediction/News/res.csv')

In [None]:
res['0'][0][len(res['0'][0])-2]

'n'

In [None]:
def split_sentences(row):
    max_sentences = len(row['0'])
    cnt = 1
    i = 7
    while i < max_sentences:
        stri = ['\']', '"]']
        tmp = [row['0'].find(stri[0], i), row['0'].find(stri[1], i)]
        ans = 1e10
        for t in tmp:
            if t == -1:
                t = 1e10
            ans = min(ans, t)
        if ans == 1e10:
            break
        j = ans

        head = row['0'][i:j]
        i = j + 8
        if head == '[' or head == ']':
            continue
        row[f'{cnt}'] = head
        cnt = cnt + 1
        print(head)
    return row

In [None]:
# Apply the function to split sentences
res = res.apply(split_sentences, axis=1)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Tech Layoffs: Microsoft-owned GitHub to shed 10% employees, will close all offices
Oil prices jump more than 2% on Russian plan to cut output
Lupin Q3 Results: Profit slumps 72% to Rs 153 cr
Layoffs strip away tech worker visas along with jobs
Gold tumbles Rs 669; Silver plummets Rs 1,026
UP receives investment proposals of Rs 32.92 lakh crores through GIS roadshows: CM Yogi Adityanath
In India\'s well-being lies world prosperity: PM Modi at UP investors\' summit
2022 smartphone shipments fell 10% on year: IDC
4.34 crore hospital admissions costing Rs 51,749 crore authorised under govt\'s health insurance scheme: Health Minister
PPF, SCSS, Sukanya Samriddhi, other small savings schemes: What happens if an accountholder dies without nominee; govt clarifies
MSCI says four Adani stocks accounted for 0.27% of EM index as of Feb 8
Prime Minister Narendra Modi\'s popularity intact despite Adani controversy, says new poll
No imp

In [None]:
# Drop the original 'Headlines' column
res.drop(columns=['0'], inplace=True)

In [None]:
res.drop(columns=['Unnamed: 0'], inplace=True)

In [None]:
# Convert column names to integers and sort them numerically
res.columns = res.columns.astype(int)
res = res.reindex(sorted(res.columns), axis=1)

In [None]:
res.to_csv('/content/drive/MyDrive/Dataset/Stock Price Prediction/News/res_rev.csv')

In [None]:
res_rev = pd.read_csv('/content/drive/MyDrive/Dataset/Stock Price Prediction/News/res_rev.csv')

  res_rev = pd.read_csv('/content/drive/MyDrive/Dataset/Stock Price Prediction/News/res_rev.csv')


In [None]:
res_rev.replace({'â': "'", 'â': "'"}, regex=True, inplace=True)

In [None]:
res_new = res_rev.replace(to_replace=r"\\", value="", regex=True)

In [None]:
# Define a function to drop characters from the string based on the index
def drop_chars(cell, index):
    if index >= 10 and index < 100:
        return cell[1:]
    elif index >= 100 and index < 1000:
        return cell[2:]
    elif index >= 1000:
        return cell[3:]
    else:
        return cell


In [None]:
# Apply the function to each cell of the 'col_1' column
res_new['1'] = [drop_chars(cell, idx) for idx, cell in enumerate(res_new['1'])]

In [None]:
# Access a particular cell
cell_value = res_new.loc[8, '4']


In [None]:
# Find out the data type of the cell
cell_dtype = type(cell_value)

In [None]:
print(cell_dtype)

<class 'str'>


In [None]:
res_new.columns

Index(['Unnamed: 0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
       ...
       '700', '701', '702', '703', '704', '705', '706', '707', '708', '709'],
      dtype='object', length=710)

In [None]:
res_new.drop('Unnamed: 0', axis=1, inplace=True)

In [None]:
res_new.head(3)

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,700,701,702,703,704,705,706,707,708,709
0,Direct cash transfer in 20 districts from today,Why Singapore scores over India on settlement ...,Sharp to insulate Indian operations from globa...,Anti-rape laws: BJP leader Sushma Swaraj reviv...,Government to allow power companies to divert ...,New year starts with a number of reasons to wo...,Govt proposes separate power supply lines for ...,B-schools strengthen family business courses t...,Higher investment limits & weaker rupee help F...,Gold set for 12th annual gain on stimulus efforts,...,,,,,,,,,,
1,Essar Engery in talks with lenders for crude i...,Indian student arrested in Singapore over bomb...,Natural gas futures fall on milder weather,Haryana Minister Shiv Charan Sharma stirs a ro...,ET review: Lenovo LePhone K860,Why 2013 will be a crucial year for chief econ...,New year: Seven jobs in finance up for grabs i...,Under the lens: Bankers to watch out for in 2013,Commodity broking firms doubled income from FI...,Sebi seeks approval for foreign funds into alt...,...,,,,,,,,,,
2,Focus shifts to fiscal deficit as US House of ...,Sovereign debt of leading economies to fall in...,US 'fiscal cliff' deal puts higher tax on 77% ...,"Tata group to invest over Rs 45,000 cr, expand...","IT officials seize Rs 28,000 cr worth 'US bond...",Setback for Narendra Modi: SC upholds Justice ...,Delhi rape case should be tried speedily but l...,The Water Policy is at best a beginning and ne...,Ending oil subsidies key to curb a runaway cur...,How India Inc can make their CSR spends count,...,,,,,,,,,,


In [None]:
res_new.index.name = 'Index'

In [None]:
res_new.head(3)

In [None]:
res_new.to_csv('/content/drive/MyDrive/Dataset/Stock Price Prediction/News/res_new_without_date.csv')

In [None]:
df = pd.read_csv('/content/drive/MyDrive/Dataset/Stock Price Prediction/News/res_new_without_date.csv')

  df = pd.read_csv('/content/drive/MyDrive/Dataset/Stock Price Prediction/News/res_new_without_date.csv')


In [None]:
import datetime
start_date = datetime.datetime(2013, 1, 1)

date_list = [start_date.date() + datetime.timedelta(days=x) for x in range(44976-41275)]
dates = [x.strftime("%d-%m-%Y") for x in date_list]

In [None]:
len(dates)

3701

In [None]:
df.head(2)

Unnamed: 0.1,Date,Unnamed: 0,1,2,3,4,5,6,7,8,...,700,701,702,703,704,705,706,707,708,709
0,01-01-2013,0,Direct cash transfer in 20 districts from today,Why Singapore scores over India on settlement ...,Sharp to insulate Indian operations from globa...,Anti-rape laws: BJP leader Sushma Swaraj reviv...,Government to allow power companies to divert ...,New year starts with a number of reasons to wo...,Govt proposes separate power supply lines for ...,B-schools strengthen family business courses t...,...,,,,,,,,,,
1,02-01-2013,1,Essar Engery in talks with lenders for crude i...,Indian student arrested in Singapore over bomb...,Natural gas futures fall on milder weather,Haryana Minister Shiv Charan Sharma stirs a ro...,ET review: Lenovo LePhone K860,Why 2013 will be a crucial year for chief econ...,New year: Seven jobs in finance up for grabs i...,Under the lens: Bankers to watch out for in 2013,...,,,,,,,,,,


In [None]:
# Replace the 'Unnamed: 0' column with the new list
df.insert(loc=0, column='Date', value=dates)

In [None]:
df = df.drop('Unnamed: 0', axis=1)

In [None]:
df.head(1)

Unnamed: 0,Date,1,2,3,4,5,6,7,8,9,...,700,701,702,703,704,705,706,707,708,709
0,01-01-2013,Direct cash transfer in 20 districts from today,Why Singapore scores over India on settlement ...,Sharp to insulate Indian operations from globa...,Anti-rape laws: BJP leader Sushma Swaraj reviv...,Government to allow power companies to divert ...,New year starts with a number of reasons to wo...,Govt proposes separate power supply lines for ...,B-schools strengthen family business courses t...,Higher investment limits & weaker rupee help F...,...,,,,,,,,,,


In [None]:
df.shape

(3701, 710)

In [None]:
df.to_csv('/content/drive/MyDrive/Dataset/Stock Price Prediction/News/res_new_with_date.csv')



```
Dates added!
Datasets merged.
CSV Saved!
```



## Adding Sentiment Score

In [1]:
!pip install transformers



In [2]:
# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-classification", model="mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis", device=0)

Downloading (…)lve/main/config.json:   0%|          | 0.00/933 [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/328M [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/333 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

In [3]:
import pandas as pd

In [6]:
df = pd.read_csv('/content/drive/MyDrive/Dataset/Stock Price Prediction/News/news_dataset_for_financial_analysis.csv', index_col=False)

  df = pd.read_csv('/content/drive/MyDrive/Dataset/Stock Price Prediction/News/news_dataset_for_financial_analysis.csv', index_col=False)


In [7]:
df = df.drop('Unnamed: 0', axis=1)

In [10]:
def scale_value(value):
    min_val = 0
    max_val = 1
    new_min = -0.1
    new_max = 0.1
    scaled_value = ((value - min_val) / (max_val - min_val)) * (new_max - new_min) + new_min
    return scaled_value

In [13]:
def give_score(row):
  score = 0.0
  cnt = 0
  for head in row:
    if pd.isna(head):
      break
    cnt = cnt + 1
    op = pipe(head)
    if op[0]['label'] == 'negative':
      score = score - op[0]['score']
    elif op[0]['label'] == 'neutral':
      score = score + scale_value(op[0]['score'])
    else:
      score = score + op[0]['score']
  avg = score/cnt
  print(avg, " ---", cnt)
  return avg

In [15]:
# Apply the function to each row
score_df = df[df.columns.difference(['Date'])].apply(give_score, axis=1)



0.07631101063319619  --- 210
0.06903892306249536  --- 268
0.10368060415848758  --- 266
0.05455414466938727  --- 295
0.01839937402142419  --- 180
0.04579674105404473  --- 179
0.14371277847656838  --- 325
0.005016126138878511  --- 309
0.03585858637491863  --- 375
0.005733386907109465  --- 326
0.05712734037994318  --- 351
0.0484349380056542  --- 166
-0.0038242488914395005  --- 161
0.01618004828963238  --- 329
0.08339104515905597  --- 262
0.07288337009574718  --- 303
0.06550288665976996  --- 297
0.06753952552194473  --- 292
0.04426152774657326  --- 174
0.054570661693118395  --- 172
0.11196253755513358  --- 306
0.07812110253020649  --- 283
0.06961970307610252  --- 275
0.06450219376230092  --- 317
0.04969005515850871  --- 251
0.06146165442793334  --- 146
0.04013729632355787  --- 131
0.09319782142179558  --- 332
0.09157418525143037  --- 314
0.10800377433354032  --- 309
0.09388411883887558  --- 329
0.021271397865424726  --- 295
-0.014458273725900033  --- 171
-0.009606871179714326  --- 157
0.06

In [28]:
score_df = score_df.assign(Date = df['Date'])

In [32]:
score_df.to_csv('/content/drive/MyDrive/Dataset/Stock Price Prediction/News/sentiment_scores.csv')

---
---

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

---
---