In [49]:
# Import necessary modules and functions
import warnings
import sys
import os
import ta
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
sys.path.append('../scripts')
warnings.filterwarnings('ignore')


In [50]:
#load Dataset
df = pd.read_csv("..\data\yfinance_data\AMZN_historical_data.csv")
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Dividends,Stock Splits
0,1997-05-15,0.121875,0.125,0.096354,0.097917,0.097917,1443120000,0.0,0.0
1,1997-05-16,0.098438,0.098958,0.085417,0.086458,0.086458,294000000,0.0,0.0
2,1997-05-19,0.088021,0.088542,0.08125,0.085417,0.085417,122136000,0.0,0.0
3,1997-05-20,0.086458,0.0875,0.081771,0.081771,0.081771,109344000,0.0,0.0
4,1997-05-21,0.081771,0.082292,0.06875,0.071354,0.071354,377064000,0.0,0.0


In [51]:
df.shape

(6846, 9)

In [52]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6846 entries, 0 to 6845
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Date          6846 non-null   object 
 1   Open          6846 non-null   float64
 2   High          6846 non-null   float64
 3   Low           6846 non-null   float64
 4   Close         6846 non-null   float64
 5   Adj Close     6846 non-null   float64
 6   Volume        6846 non-null   int64  
 7   Dividends     6846 non-null   float64
 8   Stock Splits  6846 non-null   float64
dtypes: float64(7), int64(1), object(1)
memory usage: 481.5+ KB


In [53]:
df.isnull().sum()

Date            0
Open            0
High            0
Low             0
Close           0
Adj Close       0
Volume          0
Dividends       0
Stock Splits    0
dtype: int64

In [54]:
df.value_counts()

Date        Open        High        Low         Close       Adj Close   Volume      Dividends  Stock Splits
1997-05-15  0.121875    0.125000    0.096354    0.097917    0.097917    1443120000  0.0        0.0             1
2015-07-17  23.885000   24.271000   23.862499   24.150499   24.150499   98644000    0.0        0.0             1
2015-07-15  23.152000   23.235001   23.010000   23.059500   23.059500   59748000    0.0        0.0             1
2015-07-14  23.115999   23.480000   22.908001   23.278500   23.278500   94724000    0.0        0.0             1
2015-07-13  22.414499   22.893499   22.377001   22.778500   22.778500   79136000    0.0        0.0             1
                                                                                                              ..
2006-06-07  1.670000    1.727000    1.645500    1.688500    1.688500    106076000   0.0        0.0             1
2006-06-06  1.679000    1.690000    1.646500    1.670500    1.670500    85532000    0.0        0.0   

In [55]:
df.describe()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,Dividends,Stock Splits
count,6846.0,6846.0,6846.0,6846.0,6846.0,6846.0,6846.0,6846.0
mean,37.360464,37.801009,36.882561,37.351096,37.351096,138071300.0,0.0,0.003944
std,53.756955,54.366449,53.087051,53.729913,53.729913,138234600.0,0.0,0.246789
min,0.070313,0.072396,0.065625,0.069792,0.069792,9744000.0,0.0,0.0
25%,2.070125,2.120625,2.040156,2.07675,2.07675,65054420.0,0.0,0.0
50%,8.5745,8.66675,8.4665,8.5915,8.5915,102065000.0,0.0,0.0
75%,50.453876,50.723249,50.059875,50.4265,50.4265,156756000.0,0.0,0.0
max,200.089996,201.199997,199.050003,200.0,200.0,2086584000.0,0.0,20.0


In [56]:

# Load only AMZN data
def load_amzn_data():
    try:
        filename = os.path.join("..", "data", "yfinance_data", "AMZN_historical_data.csv")
        df = pd.read_csv(filename)
        df['Date'] = pd.to_datetime(df['Date'])
        print(f"✓ Loaded AMZN: {len(df)} records")
        return df
    except FileNotFoundError:
        print("✗ Could not load AMZN_historical_data.csv")
        return None

# Load data
amzn_df = load_amzn_data()

# Check latest available date
if amzn_df is not None and 'Date' in amzn_df.columns:
    latest_date = amzn_df['Date'].max()
    print("Latest available date (end_date) for AMZN:")
    print(f"AMZN: {latest_date.strftime('%Y-%m-%d')}")


✓ Loaded AMZN: 6846 records
Latest available date (end_date) for AMZN:
AMZN: 2024-07-30


In [57]:

# Define the date range
start_date = '2022-01-01'
end_date = '2023-12-31'

# Filter data
df_filtered = df[(df['Date'] >= start_date) & (df['Date'] <= end_date)]

# Plot
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=df_filtered['Date'],
    y=df_filtered['Close'],
    name='AMZN Close Price',
    line=dict(color='blue')
))
fig.update_layout(
    title=dict(
        text=f'AMZN Stock Price from {start_date} to {end_date}',
        x=0.5,
        xanchor='center'
    ),
    xaxis_title='Date',
    yaxis_title='Close Price'
)
fig.show()
