# Market Forecasting with News Analysis

## 1. Setting Up the Environment
```python
# Install necessary libraries
!pip install torch torchvision torchaudio
!pip install pandas numpy scikit-learn
!pip install requests beautifulsoup4
!pip install nltk textblob
!pip install gdeltdoc
!pip install yfinance

In [27]:

# The Company to analyze, the start date and end date to analyze:

company = 'Nvidia'
ticker_symbol = 'NVDA'
start_date = '2023-01-01'
end_date = '2023-12-31'

# Data Collection
## Gathering News Data

In [24]:
import pandas as pd
import gdeltdoc

# https://pypi.org/project/gdeltdoc/

def fetch_gdelt_data(query, start_date, end_date):
    gd = GdeltDoc()
    f = Filters(
        keyword=query,
        start_date=start_date,
        end_date=end_date,
        num_records=250
    )
    results = gd.article_search(f)
    return results

# Example usage
company_news_data = fetch_gdelt_data(company, start_date, end_date)

# Convert to DataFrame for easier manipulation
company_news_df = pd.DataFrame(company_news_data)

# Filter out non english
company_news_df = company_news_df[company_news_df['language'].str.lower() == 'english']

print(company_news_df)

                                                   url  \
0    https://www.windowscentral.com/software-apps/n...   
2    https://www.fool.com/investing/2023/11/07/will...   
3    https://www.investors.com:443/research/ibd-sto...   
7    https://www.investors.com:443/news/technology/...   
8    https://www.tweaktown.com/news/94274/nvidia-fl...   
..                                                 ...   
242  https://www.benzinga.com/markets/equities/23/1...   
244  https://www.bnnbloomberg.ca/nvidia-upgrades-pr...   
246  https://www.newegg.com/gigabyte-geforce-rtx-40...   
248  https://www.techtimes.com/articles/298643/2023...   
249  https://finance.yahoo.com/news/nvidia-stock-hi...   

                                            url_mobile  \
0                                                        
2                                                        
3                                                        
7                                                        
8            

## Gathering Stock Data

In [28]:
import yfinance as yf

# Download the stock data
stock_data = yf.download(ticker_symbol, start=start_date, end=end_date)

# Display the stock data
print(stock_data.head())

[*********************100%***********************]  1 of 1 completed

              Open    High     Low   Close  Adj Close     Volume
Date                                                            
2023-01-03  14.851  14.996  14.096  14.315  14.305580  401277000
2023-01-04  14.567  14.853  14.241  14.749  14.739296  431324000
2023-01-05  14.491  14.564  14.148  14.265  14.255614  389168000
2023-01-06  14.474  15.010  14.034  14.859  14.849222  405044000
2023-01-09  15.284  16.056  15.141  15.628  15.617718  504231000





# Sentiment Analysis
Analysing the news of the company based on the title of the news article

In [None]:
# for company_news_df, "seendate" contains the date the article was published, in the form of yyyymmddThhmmssZ ex: 20231101T013000Z
# for stock_data, "date" contains the date and is in the form of yyyy-mm-dd 2023-11-01

# Model Building

In [29]:
import torch
import torch.nn as nn
import torch.optim as optim

# Define the device to use GPU if available, otherwise use CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

class StockPredictionModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(StockPredictionModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h_0 = torch.zeros(num_layers, x.size(0), hidden_size).to(device)
        c_0 = torch.zeros(num_layers, x.size(0), hidden_size).to(device)
        out, _ = self.lstm(x, (h_0, c_0))
        out = self.fc(out[:, -1, :])
        return out

input_size = 1
hidden_size = 50
num_layers = 2
output_size = 1
num_epochs = 100
learning_rate = 0.001

model = StockPredictionModel(input_size, hidden_size, num_layers, output_size).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

cpu
