In [1]:
from glob import glob
from os.path import basename, splitext
import pandas as pd

"""
A. Read the stock datasets
B. Calculate 7 days average price of AAPL, GOOGL, META, AMZN
"""
def n_days_average_price(days: int, csv_file: str) -> pd.core.frame.DataFrame:
    # read stock name
    stock_name = splitext(basename(csv_file))[0]
    # generate DataFrame
    df = pd.read_csv(csv_file)
    
    # handle ',' when exists
    for x in ['Price', 'High', 'Low', 'Open']:
        df[x] = df[x].replace(',','', regex=True).astype(float)
    
    # new column name
    n_day_avg_price = f'{stock_name}_moving_avg'
    
    # list daily price
    prices = list(df['Price'])
    dates = list(df['Date'])
    for d in df['Price']:
        # split by latest 7 days
        d_price = prices[:days]
        
        # set row
        df.loc[df['Date'] == dates[0], n_day_avg_price] = sum(d_price) / len(d_price)
        
        # move forward
        prices = prices[1:]
        dates = dates[1:]
        
    # set index
    df = df.set_index('Date')
    
    return df

    
for csv_file in glob('../dataset/stock/*.csv'):
    d = n_days_average_price(7, csv_file)
    
    stock_name = splitext(basename(csv_file))[0]
    print(stock_name)
    
    print(d)
    print('-' * 85)

Nasdaq_100
               Price      Open      High       Low     Vol. Change %  \
Date                                                                   
09/23/2022  11311.24  11393.59  11400.86  11169.83  274.74M   -1.66%   
09/22/2022  11501.65  11579.63  11616.40  11448.90  253.96M   -1.17%   
09/21/2022  11637.79  11900.37  12062.29  11637.19  262.51M   -1.80%   
09/20/2022  11851.54  11853.43  11951.10  11762.07  223.79M   -0.85%   
09/19/2022  11953.28  11752.43  11956.15  11752.43  231.26M    0.77%   
...              ...       ...       ...       ...      ...      ...   
01/07/2022  15592.19  15774.13  15835.00  15526.80  212.83M   -1.10%   
01/06/2022  15765.36  15675.22  15900.60  15608.57  229.89M   -0.04%   
01/05/2022  15771.77  16190.55  16249.23  15763.84  257.52M   -3.12%   
01/04/2022  16279.73  16513.87  16513.87  16151.91  241.07M   -1.35%   
01/03/2022  16501.77  16395.51  16504.13  16306.64  199.76M    1.11%   

            Nasdaq_100_moving_avg  
Date            

In [2]:
"""
C. Output a CSV file which should contain 5 columns: Date, AAPL_moving_avg, GOOGL_moving_avg, \
META_moving_avg, AMZN_moving_avg
"""
output_df = pd.DataFrame()

for csv_file in glob('../dataset/stock/*.csv'):
    d = n_days_average_price(7, csv_file)
    
    stock_name = splitext(basename(csv_file))[0]
    n_day_avg_price = f'{stock_name}_moving_avg'
    
    if 'Date' not in output_df:
        output_df['Date'] = d.index
        output_df = output_df.set_index('Date')
        
    if 'Nasdaq_100' not in stock_name:
        output_df[n_day_avg_price] = d[n_day_avg_price]

output_df.to_csv('../output/stock/moving_avg.csv')

output_df

Unnamed: 0_level_0,AMZN_moving_avg,GOOGL_moving_avg,META_moving_avg,AAPL_moving_avg
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
09/23/2022,120.905714,101.154286,145.042857,153.048571
09/22/2022,123.027143,102.048571,146.622857,153.762857
09/21/2022,124.385714,102.645714,148.095714,153.920000
09/20/2022,126.944286,104.300000,151.930000,155.307143
09/19/2022,128.527143,105.658571,155.224286,155.374286
...,...,...,...,...
01/07/2022,165.616000,140.388000,332.698000,176.160000
01/06/2022,166.382500,141.230000,332.925000,177.157500
01/05/2022,167.426667,142.390000,333.080000,178.876667
01/04/2022,168.960000,144.695000,337.535000,180.855000


In [3]:
"""
D. Output a CSV file which should contain 6 columns: Date, NDX_price, AAPL_price, GOOGL_price, 
META_price, AMZN_price
"""
output_df = pd.DataFrame()

for csv_file in glob('../dataset/stock/*.csv'):
    stock_name = splitext(basename(csv_file))[0]
    df = pd.read_csv(csv_file)
    
    for x in ['Price', 'High', 'Low', 'Open']:
        df[x] = df[x].replace(',','', regex=True).astype(float)
    
    if 'Date' not in output_df:
        output_df['Date'] = df['Date']
    
    col_name = f'{stock_name}_price'
    if 'Nasdaq_100'in stock_name:
        col_name = f'NDX_price'
    
    output_df[col_name] = df['Price']

output_df = output_df.set_index('Date')
output_df.to_csv('../output/stock/stock_prices.csv')

output_df

Unnamed: 0_level_0,NDX_price,AMZN_price,GOOGL_price,META_price,AAPL_price
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
09/23/2022,11311.24,113.78,98.74,140.41,150.43
09/22/2022,11501.65,117.31,100.14,142.82,152.74
09/21/2022,11637.79,118.54,99.28,142.12,153.72
09/20/2022,11851.54,122.19,101.14,146.09,156.90
09/19/2022,11953.28,124.66,103.07,148.02,154.48
...,...,...,...,...,...
01/07/2022,15592.19,162.55,137.02,331.79,172.17
01/06/2022,15765.36,163.25,137.75,332.46,172.00
01/05/2022,15771.77,164.36,137.78,324.17,174.92
01/04/2022,16279.73,167.52,144.40,336.53,179.70
