In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import warnings
import os

# Ignore warnings
warnings.filterwarnings('ignore')

# Function to read and show data
def load_and_show_data(input_file):
    # Read raw data from file (assuming 'raw_data' folder is one level up)
    data = pd.read_csv(f'../raw_data/{input_file}')
    
    # Show data info
    print(f"Showing data for {input_file}")
    print(data.head())  # Display first few rows for quick review
    print(data.info())  # Show summary info of the data
    
    return data

# Function to remove unwanted columns (Dividends and Stock Splits)
def clean_data(input_file):
    # Read raw data
    data = pd.read_csv(f'../raw_data/{input_file}')
    
    # Remove unwanted columns (Dividends and Stock Splits)
    cleaned_data = data.drop(columns=['Dividends', 'Stock Splits'])
    
    # Save cleaned data to a new CSV file (clean_data folder outside main_code)
    clean_file_path = Path(f'../clean_data/clean_{input_file}')  # Adjust path to save outside 'main_code'
    clean_file_path.parent.mkdir(parents=True, exist_ok=True)  # Create parent directory if it doesn't exist
    cleaned_data.to_csv(clean_file_path, index=False)
    print(f"Cleaned data saved to {clean_file_path}")
    
    return cleaned_data





In [2]:
# List of stock tickers
tickers = ["AMZN", "AAPL", "NVDA", "MSFT", "GOOG", "META", "TSLA", "WMT", "JPM", "NFLX"]

In [3]:
# Step 1: Load and show all data for the tickers
for ticker in tickers:
    load_and_show_data(f"data{ticker}.csv")

Showing data for dataAMZN.csv
                        Date       Open       High        Low      Close  \
0  2016-01-04 00:00:00-05:00  32.814499  32.886002  31.375500  31.849501   
1  2016-01-05 00:00:00-05:00  32.342999  32.345501  31.382500  31.689501   
2  2016-01-06 00:00:00-05:00  31.100000  31.989500  31.015499  31.632500   
3  2016-01-07 00:00:00-05:00  31.090000  31.500000  30.260500  30.396999   
4  2016-01-08 00:00:00-05:00  30.983000  31.207001  30.299999  30.352501   

      Volume  Dividends  Stock Splits  
0  186290000        0.0           0.0  
1  116452000        0.0           0.0  
2  106584000        0.0           0.0  
3  141498000        0.0           0.0  
4  110258000        0.0           0.0  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2243 entries, 0 to 2242
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Date          2243 non-null   object 
 1   Open          2243 non-null   flo

In [4]:
# Step 2: Clean all data and save it

for ticker in tickers:
    clean_data(f"data{ticker}.csv")

Cleaned data saved to ..\clean_data\clean_dataAMZN.csv
Cleaned data saved to ..\clean_data\clean_dataAAPL.csv
Cleaned data saved to ..\clean_data\clean_dataNVDA.csv
Cleaned data saved to ..\clean_data\clean_dataMSFT.csv
Cleaned data saved to ..\clean_data\clean_dataGOOG.csv
Cleaned data saved to ..\clean_data\clean_dataMETA.csv
Cleaned data saved to ..\clean_data\clean_dataTSLA.csv
Cleaned data saved to ..\clean_data\clean_dataWMT.csv
Cleaned data saved to ..\clean_data\clean_dataJPM.csv
Cleaned data saved to ..\clean_data\clean_dataNFLX.csv
