In [1]:
import yfinance as yf

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

In [2]:
df_apple_to_csv = yf.download("AAPL", start="2020-01-01", end="2025-08-13")
df_microsoft_to_csv = yf.download("MSFT", start="2020-01-01", end="2025-08-13")
df_amazon_to_csv = yf.download("AMZN", start="2020-01-01", end="2025-08-13")
df_nvidia_to_csv = yf.download("NVDA", start="2020-01-01", end="2025-08-13")


apple_path = 'datasets/apple.csv'
microsoft_path = 'datasets/microsoft.csv'
amazon_path = 'datasets/amazon.csv'
nvidia_path = 'datasets/nvidia.csv'

df_apple_to_csv.to_csv(apple_path)
df_microsoft_to_csv.to_csv(microsoft_path)
df_amazon_to_csv.to_csv(amazon_path)
df_nvidia_to_csv.to_csv(nvidia_path)

df_apple = pd.read_csv(apple_path)
df_microsoft = pd.read_csv(microsoft_path)
df_amazon = pd.read_csv(amazon_path)
df_nvidia = pd.read_csv(nvidia_path)

  df_apple_to_csv = yf.download("AAPL", start="2020-01-01", end="2025-08-13")
[*********************100%***********************]  1 of 1 completed
  df_microsoft_to_csv = yf.download("MSFT", start="2020-01-01", end="2025-08-13")
[*********************100%***********************]  1 of 1 completed
  df_amazon_to_csv = yf.download("AMZN", start="2020-01-01", end="2025-08-13")
[*********************100%***********************]  1 of 1 completed
  df_nvidia_to_csv = yf.download("NVDA", start="2020-01-01", end="2025-08-13")
[*********************100%***********************]  1 of 1 completed


In [3]:
df_apple['Date'] = df_apple['Price']
df_apple = df_apple.drop(index=[0,1], columns='Price')

df_microsoft['Date'] = df_microsoft['Price']
df_microsoft = df_microsoft.drop(index=[0,1], columns='Price')

df_amazon['Date'] = df_amazon['Price']
df_amazon = df_amazon.drop(index=[0,1], columns='Price')

df_nvidia['Date'] = df_nvidia['Price']
df_nvidia = df_nvidia.drop(index=[0,1], columns='Price')

In [4]:
#INITIAL
dfs = [df_apple, df_amazon, df_microsoft, df_nvidia]
window = 14
short_period = 12
long_period = 26
signal_line_period = 9
stocks = ['AAPL', 'AMZN', 'MSFT', 'NVDA']

In [5]:
#CONVERTING TO CORRECT FEATURE TYPES
def convert_type(df):
    df['Close'] = df['Close'].astype(float)
    df['High'] = df['High'].astype(float)
    df['Low'] = df['Low'].astype(float)
    df['Open'] = df['Open'].astype(float)
    df['Volume'] = df['Volume'].astype(int)
    df['Date'] = pd.to_datetime(df['Date'])

In [6]:
#FEATURE ENGINEERING
def engineer_features(df):
    df['Price Change'] = df['Close'].diff().fillna(0)

    df['Gain'] = df.loc[df['Price Change'] > 0, 'Price Change']
    df['Loss'] = -df.loc[df['Price Change'] < 0, 'Price Change']

    df['Gain'] = df['Gain'].fillna(0)
    df['Loss'] = df['Loss'].fillna(0)

    df['Average Gain'] = df['Gain'].rolling(window=window).mean().fillna(0)
    df['Average Loss'] = df['Loss'].rolling(window=window).mean().fillna(0)

    df['RS'] = df['Average Gain'] / df['Average Loss']

    df['RSI'] = 100 - (100 / (1 + df['RS']))

    df['Short EMA'] = df['Close'].ewm(span=short_period,adjust=False).mean()
    df['Long EMA'] = df['Close'].ewm(span=long_period, adjust=False).mean()

    df['MACD'] = df['Short EMA'] - df['Long EMA']

    df['Signal Line'] = df['MACD'].ewm(span=signal_line_period, adjust=False).mean()
    
    df['MACD Histogram'] = df['MACD'] - df['Signal Line']
    
    df['Year'] = df['Date'].dt.year
    df['Month'] = df['Date'].dt.month
    df['Week'] = df['Date'].dt.weekday
    
    df['Year-Month'] = df['Date'].dt.to_period('M').astype(str)
    #df['Week']
    
    return df

In [7]:
#DROPPING NULL AND UNIMPORTANT COLUMNS/ROWS
def drop_features_and_na(df):
    df = df.drop(columns=['Gain', 'Loss'])
    df = df.dropna()
    return df

In [8]:
#PRINTING SEABORN PLOTS
# def print_plots(df, stock):
#     for year in years:
#         sns.relplot(data=df.loc[df['Year'] == year], x='Date', y='RSI', kind='line', height=7, aspect=2.74)
#         plt.title(f'RSI trend for year {year} Stock {stock}')
#         plt.savefig(f"plots/{stock}{year}.jpg", dpi=300, bbox_inches="tight")
#         # plt.show()
#         print(f'RSI trend for year {year} Stock {stock} downloaded!')
        
        # sns.relplot(data=df.loc[df['Year'] == year], x='Date', y='MACD', kind='line')
        # plt.title(f'MACD trend for year {year} Stock {stock}')
        # plt.figure(figsize=(20,10))
        # plt.show()

# def print_plots(df, stock):
#     for year in years:
#         fig = px.line(data_frame=df.loc[df['Year'] == year], x='Date', y='RSI', title=f"RSI trend for year {year} Stock {stock}"f"RSI trend for year {year} Stock {stock}")
#         # plt.title(f'RSI trend for year {year} Stock {stock}')
#         # plt.savefig(f"plots/{stock}{year}.jpg", dpi=300, bbox_inches="tight")
#         fig.update_layout(width=1920, height=1080)
#         fig.write_html(f"../../frontend/public/{stock}{year}.html")
#         # plt.show()
#         print(f'RSI trend for year {year} Stock {stock} downloaded!')
        
#         # sns.relplot(data=df.loc[df['Year'] == year], x='Date', y='MACD', kind='line')
#         # plt.title(f'MACD trend for year {year} Stock {stock}')
#         # plt.figure(figsize=(20,10))
#         # plt.show()

In [9]:
#1D INTERVAL
def print_1d_plot(df, stock):
    fig = px.line(data_frame=df, x='Date', y='Close')
    fig.update_layout(width=1920, height=1080)
    fig.write_html(f"../../frontend/public/{stock}1D.html")
    print(f'1 Day Interval trend for {stock}, downloaded!')

In [10]:

#TODO: 1W INTERVAL
def print_1w_plot(df, stock):
    fig = px.line(data_frame=df, x='Date', y='Close')
    fig.update_layout(width=1920, height=1080)
    fig.write_html(f"../../frontend/public/{stock}1W.html")
    print(f'1 Week Interval trend for {stock}, downloaded!')

In [11]:
#1M INTERVAL
def print_1m_plot(df, stock):
    fig = px.line(data_frame=df, x='Year-Month', y='Close')
    fig.update_layout(width=1920, height=1080)
    fig.write_html(f"../../frontend/public/{stock}1M.html")
    print(f'1 Month Interval trend for {stock}, downloaded!')

In [12]:
#1Y INTERVAL
def print_1y_plot(df, stock):
    fig = px.line(data_frame=df, x='Year', y='Close')
    fig.update_layout(width=1920, height=1080)
    fig.write_html(f"../../frontend/public/{stock}1Y.html")
    print(f'1 Year Interval trend for {stock}, downloaded!')

In [13]:
def print_plots(df, stock):
    print_1d_plot(df, stock)
    # print_1w_plot(df, stock)
    print_1m_plot(df, stock)
    print_1y_plot(df, stock)
    pass

In [14]:
for i,df in enumerate(dfs):
    convert_type(df)
    df = engineer_features(df)
    df = drop_features_and_na(df)
    df = df.drop(columns='Close')
    
    print('---------------------------------------------------------------------------')
    print(f"{stocks[i]}:")
    print(df.head())
    
    # print_plots(df, stocks[i])

---------------------------------------------------------------------------
AAPL:
         High        Low       Open     Volume       Date  Price Change  \
15  77.281827  76.634574  76.941293  101832400 2020-01-22      0.272919   
16  77.177986  76.233668  76.781908  104472000 2020-01-23      0.369522   
17  78.088477  76.685286  77.344620  146537600 2020-01-24     -0.222206   
18  75.296586  73.632564  74.883600  161940000 2020-01-27     -2.260559   
19  76.897837  75.398041  75.497062  162234000 2020-01-28      2.110840   

    Average Gain  Average Loss        RS        RSI  Short EMA   Long EMA  \
15      0.508209      0.208905  2.432728  70.868654  75.349468  74.270925   
16      0.534603      0.208905  2.559075  71.902813  75.618518  74.480359   
17      0.534603      0.174406  3.065283  75.401468  75.811989  74.657820   
18      0.493722      0.335874  1.469959  59.513504  75.627918  74.654687   
19      0.644496      0.311552  2.068664  67.412525  75.796910  74.808145   

    