In [None]:
import pandas as pd

# File paths
amazon_path = "Amazon.csv"
google_path = "GOOG.csv"
netflix_path = "NFLX.csv"

# Read CSV files
amazon_df = pd.read_csv(amazon_path)
google_df = pd.read_csv(google_path)
netflix_df = pd.read_csv(netflix_path)

# Function to clean stock data
def clean_data(df, name):
    print(f"\n--- Cleaning {name} Dataset ---")
    
    # 1. Remove duplicate rows (if any)
    df = df.drop_duplicates()
    
    # 2. Handle missing values
    df = df.dropna()   # removes rows with any missing values
    
    
    # 3. Convert 'Date' column to datetime format
    df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
    
    # 4. Remove rows where 'Date' could not be parsed (NaT)
    df = df.dropna(subset=['Date'])
    print("Removing rows",df.dropna())
    
    # 5. Reset index after cleaning
    df = df.reset_index(drop=True)
    
    print(" Cleaning done!")
    print("Shape after cleaning:", df.shape)
    return df

# Clean all datasets
amazon_clean = clean_data(amazon_df, "Amazon")
google_clean = clean_data(google_df, "Google")
netflix_clean = clean_data(netflix_df, "Netflix")

print(amazon_clean.head())
print(google_clean.head())
print(netflix_clean.head())

# Sort Amazon, Google, and Netflix data by Date (newest first) 
amazon_sorted = amazon_clean.sort_values(by="Date", ascending=False) 
google_sorted = google_clean.sort_values(by="Date", ascending=False) 
netflix_sorted = netflix_clean.sort_values(by="Date", ascending=False)

print(amazon_sorted.head(10))
print(google_sorted.head(10))
print(netflix_sorted.head(10))



--- Cleaning Amazon Dataset ---
 Cleaning done!
Shape after cleaning: (6099, 7)

--- Cleaning Google Dataset ---
 Cleaning done!
Shape after cleaning: (2347, 7)

--- Cleaning Netflix Dataset ---
 Cleaning done!
Shape after cleaning: (5422, 7)
        Date      High       Low      Open     Close     Volume  Adj Close
0 1997-12-31  5.041667  4.843750  4.875000  5.020833  4446000.0   5.020833
1 1998-01-02  5.000000  4.796875  5.000000  4.958333  1660800.0   4.958333
2 1998-01-05  4.937500  4.708333  4.885417  4.750000  3590400.0   4.750000
3 1998-01-06  4.875000  4.666667  4.697917  4.838542  4174800.0   4.838542
4 1998-01-07  4.833333  4.692708  4.833333  4.781250  2725200.0   4.781250
        Date       Open       High        Low      Close  Adj Close    Volume
0 2015-01-02  26.378078  26.490770  26.133251  26.168653  26.168653  28951268
1 2015-01-05  26.091366  26.144720  25.582764  25.623152  25.623152  41196796
2 2015-01-06  25.679497  25.738087  24.983908  25.029282  25.029282  579