In [1]:
%matplotlib widget
%load_ext autoreload
%autoreload 2

In [2]:
from os.path import abspath
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sys

file_path = abspath('../../outputs/Apple_5-Year.txt')
sys.stdout = open(file_path, "w")

In [3]:
# Read /data/S&P500.xlsx
aapl = pd.read_csv(abspath('../../data/Apple/Apple_5-Year.csv'), index_col=0)

# Remove $ from the entire dataframe
aapl = aapl.replace('[\$,)]', '', regex=True)

# Read the entire dataframe as floats
aapl = aapl.astype(float)

# Read the first column as datatime
aapl.index = pd.to_datetime(aapl.index)

aapl.head()

Unnamed: 0_level_0,Close/Last,Volume,Open,High,Low
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-02-14,168.88,86185530.0,167.37,169.58,166.56
2022-02-11,168.64,98670690.0,172.33,173.08,168.04
2022-02-10,172.12,90865900.0,174.14,175.48,171.55
2022-02-09,176.28,71285040.0,176.05,176.65,174.9
2022-02-08,174.83,74829220.0,171.73,175.35,171.43


In [None]:
# Define the formula for daily return which is (today's closing price - yesterday's closing price) / yesterday's closing price
aapl['daily_return'] = aapl['Close/Last'].pct_change()

# Define the formula for monthly return which is (this month's closing price - last month's closing price) / last month's closing price
aapl['monthly_return'] = aapl['Close/Last'].pct_change(12)

# Define the formula for annual return which is (this year's closing price - last year's closing price) / last year's closing price
aapl['annual_return'] = aapl['Close/Last'].pct_change(252)

# Define 50 day moving average of the closing price
aapl['50_day_moving_average'] = aapl['Close/Last'].rolling(50).mean()

# Define 200 day moving average of the closing price
aapl['200_day_moving_average'] = aapl['Close/Last'].rolling(200).mean()

In [None]:
# Find which month has the highest return
print("Highest returning month: " + str(aapl['monthly_return'].idxmax()))

# Determine number of days where closing price is higher than the opening price
aapl['Close>Open'] = (aapl['Close/Last'] > aapl['Open'])
print("Number of days where closing price is higher than the opening price: " + str(aapl['Close>Open'].sum()))


# Find how many times the index at the close is lower than the open
aapl['Close<Open'] = (aapl['Close/Last'] < aapl['Open'])
print("Number of times the index at the close is lower than the open: " + str(aapl['Close<Open'].sum()))

aapl['Up&Down'] = aapl['Close/Last'].diff()

# The number of up days is the days in the Up&Down column which are positive
print("Number of up days: " + str(aapl['Up&Down'][aapl['Up&Down'] > 0].count()))

# The number of down days is the days in the Up&Down column which are negative
print("Number of down days: " + str(aapl['Up&Down'][aapl['Up&Down'] < 0].count()))

In [None]:
import datetime as dt

fridays = {}
mondays = {}

for i in range(len(aapl)):
    if aapl.index[i].isoweekday() == 5:
        fridays[aapl.index[i]] = aapl['Volume'][i]
    elif aapl.index[i].isoweekday() == 1:
        mondays[aapl.index[i]] = aapl['Volume'][i]

# Find the sum of fridays and mondays
print("Sum of fridays: " + str(sum(fridays.values())))
print("Sum of mondays: " + str(sum(mondays.values())))

# Print whether the friday or monday sum is greater
if sum(fridays.values()) > sum(mondays.values()):
    print("Friday sum is greater")
else:
    print("Monday sum is greater")

# # Find any correlation between the volume and return
correlation = aapl['Volume'].corr(aapl['daily_return'])
print("Correlation between the volume and return: " + str(correlation))

In [None]:
# Save the dataframe to the initial file that was opened
aapl.to_csv(abspath('../../data/Apple/Apple.csv'))