In [12]:
import requests
import yfinance as yf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
from textblob import TextBlob
from datetime import datetime
import nltk

# Download NLTK data
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

# --- Fetch News Articles ---
def fetch_tesla_news(api_key, start_date, end_date):
    url = "https://newsapi.org/v2/everything"
    params = {
        'q': 'Tesla',
        'from': start_date.strftime('%Y-%m-%d'),
        'to': end_date.strftime('%Y-%m-%d'),
        'sortBy': 'publishedAt',
        'apiKey': api_key,
        'language': 'en'
    }
    response = requests.get(url, params=params)
    if response.status_code == 200:
        data = response.json()
        return data.get('articles', [])
    else:
        print(f"Error fetching news: {response.status_code}")
        return []

# --- Sentiment Analysis ---
def analyze_sentiment(text):
    blob = TextBlob(text)
    score = blob.sentiment.polarity
    return ('Positive' if score > 0 else 'Negative' if score < 0 else 'Neutral'), score

# --- News Sentiment for Dec 2020 ---
api_key = '8fa805af31b54a0ab98c3d5e804f49df'
start_date = datetime(2020, 12, 1)
end_date = datetime(2020, 12, 31)
articles = fetch_tesla_news(api_key, start_date, end_date)

scores = []
for article in articles:
    title = article.get('title', '')
    desc = article.get('description', '') or ''
    combined = title + " " + desc
    _, score = analyze_sentiment(combined)
    scores.append(score)

average_sentiment = np.mean(scores) if scores else 0.0
print("Average Sentiment Score (Dec 2020):", average_sentiment)

# --- Download Tesla Data ---
df = yf.download('TSLA', start='2015-01-01', end='2020-12-31')

# --- Handle MultiIndex Columns ---
if isinstance(df.columns, pd.MultiIndex):
    df.columns = ['_'.join(col).strip().lower() for col in df.columns.values]
else:
    df.columns = df.columns.str.strip().str.lower()

# --- Get Close Column ---
close_col = next((col for col in df.columns if 'close' in col), None)
if close_col is None:
    raise ValueError("No 'close' column found.")

# --- Scale and Prepare Data ---
df = df.dropna(subset=[close_col])
data = df[[close_col]].values

scaler_close = MinMaxScaler(feature_range=(0, 1))
scaled_close = scaler_close.fit_transform(data)

sentiment_feature = np.full((len(scaled_close), 1), average_sentiment)
combined_data = np.hstack((scaled_close, sentiment_feature))

train_len = int(np.ceil(len(combined_data) * 0.8))
train_data = combined_data[:train_len]

X_train, y_train = [], []
for i in range(60, len(train_data)):
    X_train.append(train_data[i-60:i])
    y_train.append(train_data[i, 0])  # Predict close price only

X_train, y_train = np.array(X_train), np.array(y_train)
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)

# --- Build and Train LSTM ---
model = Sequential([
    LSTM(50, return_sequences=False, input_shape=(60, 2)),
    Dense(1)
])
model.compile(optimizer='adam', loss='mse')
model.fit(X_train, y_train, epochs=50, batch_size=32, verbose=1)

# --- Prediction for Jan 2021 ---
df_2021 = yf.download('TSLA', start='2021-01-01', end='2021-01-31')
actual_2021 = df_2021['Close'].values
dates_2021 = df_2021.index

# Prepare test data
test_data = combined_data[-60:]  # Last 60 days of 2020
X_test = np.array([test_data])  # Shape: (1, 60, 2)

# Predict iteratively for each day in Jan 2021
predicted_scaled = []
for _ in range(len(actual_2021)):
    pred = model.predict(X_test, verbose=0)
    predicted_scaled.append(pred[0, 0])
    
    # Dynamic update of X_test with prediction and sentiment
    next_entry = np.array([[pred[0, 0], average_sentiment]])
    X_test = np.append(X_test[:, 1:, :], [next_entry], axis=1)

# --- Inverse Transform ---
predicted_scaled = np.array(predicted_scaled).reshape(-1, 1)
predicted_prices = scaler_close.inverse_transform(predicted_scaled).flatten()

# --- Final Comparison ---
comparison_df = pd.DataFrame({
    'Date': dates_2021.to_numpy(),
    'Actual Price': actual_2021.flatten(),
    'Predicted Price': predicted_prices
})

print(comparison_df.round(2))



# --- Export to CSV ---
comparison_df.to_csv("tesla_2021_january_comparison.csv", index=False)
print("Saved to 'tesla_2021_january_comparison.csv'")


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[*********************100%***********************]  1 of 1 completed

Error fetching news: 426
Average Sentiment Score (Dec 2020): 0.0
X_train shape: (1148, 60, 2)
y_train shape: (1148,)
Epoch 1/50



  super().__init__(**kwargs)


[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 18ms/step - loss: 5.1095e-04
Epoch 2/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - loss: 7.3579e-05
Epoch 3/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - loss: 2.6587e-05
Epoch 4/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 2.3033e-05
Epoch 5/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - loss: 2.2784e-05
Epoch 6/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - loss: 2.1003e-05
Epoch 7/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - loss: 2.2090e-05
Epoch 8/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - loss: 2.0924e-05
Epoch 9/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - loss: 1.8370e-05
Epoch 10/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m

[*********************100%***********************]  1 of 1 completed


         Date  Actual Price  Predicted Price
0  2021-01-04        243.26       238.309998
1  2021-01-05        245.04       245.710007
2  2021-01-06        251.99       253.960007
3  2021-01-07        272.01       262.779999
4  2021-01-08        293.34       272.000000
5  2021-01-11        270.40       281.510010
6  2021-01-12        283.15       291.260010
7  2021-01-13        284.80       301.220001
8  2021-01-14        281.67       311.369995
9  2021-01-15        275.39       321.739990
10 2021-01-19        281.52       332.329987
11 2021-01-20        283.48       343.160004
12 2021-01-21        281.66       354.230011
13 2021-01-22        282.21       365.559998
14 2021-01-25        293.60       377.130005
15 2021-01-26        294.36       388.959991
16 2021-01-27        288.05       401.010010
17 2021-01-28        278.48       413.269989
18 2021-01-29        264.51       425.709991
Saved to 'tesla_2021_january_comparison.csv'
