# CODE

###### Importing necessary libraries.

In [10]:
import os
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from newsapi import NewsApiClient
from datetime import datetime, timedelta
import pickle
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from textblob import TextBlob

#### 1. OBTAINING API KEY AND DOWNLOADING THE STOCK DATA AND NEWS DATA

In [11]:
# Obtaining News API Key
news_api_key = "***"

In [12]:
# Downloading Historical Stock Data
start_date = "2023-10-15"
end_date = "2023-11-14"

##### (NOTE : the accessible date is for the latest 30 days, so start_date and end_date must be adjusted according to the current date before running the code.)

In [None]:
#  Downloading historical stock price data for Apple Inc. (AAPL)
stock_data = yf.download("AAPL", start=start_date, end=end_date)
print((stock_data))

In [19]:
# Reset the index to make date a column
stock_data.reset_index(inplace=True)
stock_data.index = pd.to_datetime(stock_data.index)  # If Date is an index




In [None]:
def fetch_news(company, api_key, from_date, to_date):
    newsapi = NewsApiClient(api_key=api_key)
    headlines = newsapi.get_everything(
        q=company,
        from_param=from_date,
        to=to_date,
        language="en",
        sort_by="publishedAt",
    )
    return headlines["articles"]
# Obtaining News API Key
news_api_key = "**"
# Ensure the Date column/index is in datetime format
stock_data.index = pd.to_datetime(stock_data.index)  # If Date is an index
# OR stock_data["Date"] = pd.to_datetime(stock_data["Date"])  # If Date is a column

# Fetch news headlines for the given date range
news_articles = []

# If Date is in the index
for date in stock_data.index:
    date_str = date.strftime("%Y-%m-%d")
    news = fetch_news("Apple Inc.", news_api_key, date_str, date_str)
    news_articles.append({"Date": date, "News": news})

# If Date is a column
# for index, row in stock_data.iterrows():
#     date_str = row["Date"].strftime("%Y-%m-%d")
#     news = fetch_news("Apple Inc.", news_api_key, date_str, date_str)
#     news_articles.append({"Date": row["Date"], "News": news})

# Display results
print(news_articles)


#### 2. DATA MERGING 

In [None]:
# Combining stock data with news data
merged_data = pd.merge(stock_data, pd.DataFrame(news_articles), on="Date", how="left")

# Forward filling missing news data
merged_data["News"].fillna(method="ffill", inplace=True)

#### 3. FEATURE EXTRACTION USING THE TEXTBLOB

In [None]:
# Feature Engineering - Sentiment Analysis
def analyze_sentiment(news_list):
    # Perform sentiment analysis using TextBlob
    sentiments = [TextBlob(news["title"]).sentiment.polarity for news in news_list]
    # Return the average sentiment polarity for all news articles
    return np.mean(sentiments)

# Appling sentiment analysis to the News column and create a new feature 'Sentiment'
merged_data['Sentiment'] = merged_data['News'].apply(lambda x: analyze_sentiment(x) if isinstance(x, list) else 0)

# Display the updated data with the new 'Sentiment' feature
print(merged_data[['Date', 'Close', 'News', 'Sentiment']])

#### 4. SPLITTING DATASET INTO TRAINING AND TEST SETS

In [70]:
# Select features (including the new 'Sentiment' feature) and target variable
features = merged_data[["Open", "High", "Low", "Volume", "Sentiment"]]
target = merged_data["Close"]

# Spliting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Standardizing the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

#### 5. MODEL BUILDING (with rbf kernel)

In [71]:
# Model Building - SVR with 'rbf' kernel
svr_rbf = SVR(kernel='rbf')

#### 6. MODEL TRAINING AND EVALUATION

In [72]:
# Model Training and Evaluation
# Training the SVR model on the training data
svr_rbf.fit(X_train_scaled, y_train)

# Making predictions on the test set for the next 30 days
future_dates = pd.date_range(end=merged_data['Date'].max(), periods=30, freq='B')[1:]  # Generate next 30 business days
future_features = pd.DataFrame(index=future_dates, columns=features.columns)
future_features['Sentiment'] = 0.5  # You can update this with actual sentiment analysis on future news

In [None]:

# Handle missing values in future_features
imputer = SimpleImputer(strategy='mean')
future_features_imputed = imputer.fit_transform(future_features)

# Using only the first feature
single_feature = future_features_imputed[:, 0].reshape(-1, 1)

# Standardizing the feature
scaler = StandardScaler()
single_feature_scaled = scaler.fit_transform(single_feature)

# Spliting the data into training and testing sets
# Assuming you have X_train, X_test, y_train, y_test

# Training a new SVR model
svr_rbf_single_feature = SVR(kernel='rbf')
svr_rbf_single_feature.fit(X_train_scaled[:, :1], y_train)  # Assuming only the first feature is used

# Make predictions for the next 30 days
future_predictions = svr_rbf_single_feature.predict(single_feature_scaled)

# Evaluating the model's performance on the test set
y_pred_test = svr_rbf_single_feature.predict(X_test_scaled[:, :1])

mae = mean_absolute_error(y_test, y_pred_test)
mse = mean_squared_error(y_test, y_pred_test)
r2 = r2_score(y_test, y_pred_test)

print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"R-squared (R^2): {r2}")

# Saving the model as a pickle file
model_filename = "svr_model.pkl"
with open(model_filename, 'wb') as file:
    pickle.dump(svr_rbf, file)