<a href="https://colab.research.google.com/github/biswajitmohanty/genai/blob/main/Data_Scraping.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import yfinance as yf
import pandas as pd
from bs4 import BeautifulSoup
import requests

# Function to get stock data using yfinance
def get_stock_data(ticker, start_date, end_date):
    stock = yf.Ticker(ticker)
    df = stock.history(start=start_date, end=end_date)
    return df

# Example usage
ticker = "AAPL"  # Apple stock
start_date = "2023-01-01"
end_date = "2023-12-31"

stock_data = get_stock_data(ticker, start_date, end_date)
print(stock_data.head())  # Display the first 5 rows

# Save data to CSV for analysis
stock_data.to_csv(f"{ticker}_stock_data.csv")

                                 Open        High         Low       Close  \
Date                                                                        
2023-01-03 00:00:00-05:00  128.782649  129.395518  122.742873  123.632530   
2023-01-04 00:00:00-05:00  125.431607  127.181268  123.642412  124.907700   
2023-01-05 00:00:00-05:00  125.668849  126.301493  123.326093  123.583099   
2023-01-06 00:00:00-05:00  124.561732  128.792531  123.454601  128.130234   
2023-01-09 00:00:00-05:00  128.970458  131.876670  128.397123  128.654129   

                              Volume  Dividends  Stock Splits  
Date                                                           
2023-01-03 00:00:00-05:00  112117500        0.0           0.0  
2023-01-04 00:00:00-05:00   89113600        0.0           0.0  
2023-01-05 00:00:00-05:00   80962700        0.0           0.0  
2023-01-06 00:00:00-05:00   87754700        0.0           0.0  
2023-01-09 00:00:00-05:00   70790800        0.0           0.0  


In [2]:
import yfinance as yf

# List of stocks
tickers = ["AAPL", "MSFT", "GOOGL"]

# Date range
start_date = "2023-01-01"
end_date = "2023-12-31"

# Fetch and save data for multiple stocks
for ticker in tickers:
    stock_data = yf.Ticker(ticker).history(start=start_date, end=end_date)
    stock_data.to_csv(f"{ticker}_stock_data.csv")
    print(f"Saved {ticker} stock data.")

Saved AAPL stock data.
Saved MSFT stock data.
Saved GOOGL stock data.


In [3]:
import plotly.graph_objects as go
import plotly.express as px

# Load stock data
df_aapl = pd.read_csv("AAPL_stock_data.csv", index_col="Date", parse_dates=True)
df_msft = pd.read_csv("MSFT_stock_data.csv", index_col="Date", parse_dates=True)

# Plot stock price trends
def plot_stock_trend(df1, df2, ticker1, ticker2):
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=df1.index, y=df1["Close"], mode='lines', name=ticker1))
    fig.add_trace(go.Scatter(x=df2.index, y=df2["Close"], mode='lines', name=ticker2))

    fig.update_layout(title=f"{ticker1} vs {ticker2} Stock Price Trend",
                      xaxis_title="Date", yaxis_title="Closing Price",
                      template="plotly_dark")
    fig.show()

plot_stock_trend(df_aapl, df_msft, "AAPL", "MSFT")

# Daily Returns Histogram
df_aapl["Daily Return"] = df_aapl["Close"].pct_change()
fig = px.histogram(df_aapl, x="Daily Return", nbins=50, title="AAPL Daily Returns", template="plotly_dark")
fig.show()

In [4]:
import numpy as np

# Compute Simple Moving Average (SMA) & Exponential Moving Average (EMA)
df_aapl["SMA_50"] = df_aapl["Close"].rolling(window=50).mean()
df_aapl["EMA_20"] = df_aapl["Close"].ewm(span=20, adjust=False).mean()

# Compute Relative Strength Index (RSI)
def compute_rsi(data, window=14):
    delta = data["Close"].diff(1)
    gain = np.where(delta > 0, delta, 0)
    loss = np.where(delta < 0, -delta, 0)

    avg_gain = pd.Series(gain).rolling(window=window, min_periods=1).mean()
    avg_loss = pd.Series(loss).rolling(window=window, min_periods=1).mean()

    rs = avg_gain / avg_loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

df_aapl["RSI"] = compute_rsi(df_aapl)

# Plot moving averages & RSI
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_aapl.index, y=df_aapl["Close"], mode='lines', name="Close Price"))
fig.add_trace(go.Scatter(x=df_aapl.index, y=df_aapl["SMA_50"], mode='lines', name="SMA 50", line=dict(dash="dot")))
fig.add_trace(go.Scatter(x=df_aapl.index, y=df_aapl["EMA_20"], mode='lines', name="EMA 20", line=dict(dash="dot")))

fig.update_layout(title="AAPL Stock with SMA & EMA", xaxis_title="Date", yaxis_title="Price", template="plotly_dark")
fig.show()