In [2]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# File paths (update if needed)
amazon_path = "Amazon.csv"
google_path = "GOOG.csv"
netflix_path = "NFLX.csv"

# Read already cleaned & sorted CSV files
amazon_df = pd.read_csv(amazon_path)
google_df = pd.read_csv(google_path)
netflix_df = pd.read_csv(netflix_path)

# Function to preprocess stock data (only feature extraction + scaling)
def preprocess_data(df, name):
    print(f"\n--- Preprocessing {name} Dataset ---")
    
    # Ensure 'Date' is datetime
    df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
    
    # Extract new time features
    df['Year'] = df['Date'].dt.year
    df['Month'] = df['Date'].dt.month
    df['Day'] = df['Date'].dt.day
    df['DayOfWeek'] = df['Date'].dt.dayofweek  # 0 = Monday, 6 = Sunday
    
    # Normalize numeric columns
    scaler = MinMaxScaler()
    numeric_cols = ['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']
    available_cols = [col for col in numeric_cols if col in df.columns]
    df[available_cols] = scaler.fit_transform(df[available_cols])
    
    print("Preprocessing done!")
    print(df.head())  # show first few rows after preprocessing
    
    return df

# Apply preprocessing
amazon_preprocessed = preprocess_data(amazon_df, "Amazon")
google_preprocessed = preprocess_data(google_df, "Google")
netflix_preprocessed = preprocess_data(netflix_df, "Netflix")





--- Preprocessing Amazon Dataset ---
Preprocessing done!
        Date      High       Low      Open     Close    Volume  Adj Close  \
0 1997-12-31  0.000174  0.000189  0.000189  0.000204  0.034459   0.000204   
1 1998-01-02  0.000163  0.000176  0.000223  0.000187  0.007535   0.000187   
2 1998-01-05  0.000146  0.000152  0.000192  0.000131  0.026188   0.000131   
3 1998-01-06  0.000130  0.000141  0.000142  0.000155  0.031837   0.000155   
4 1998-01-07  0.000119  0.000148  0.000178  0.000140  0.017824   0.000140   

   Year  Month  Day  DayOfWeek  
0  1997     12   31          2  
1  1998      1    2          4  
2  1998      1    5          0  
3  1998      1    6          1  
4  1998      1    7          2  

--- Preprocessing Google Dataset ---
Preprocessing done!
        Date      Open      High       Low     Close  Adj Close    Volume  \
0 2015-01-02  0.011322  0.011602  0.012387  0.010786   0.010786  0.101752   
1 2015-01-05  0.009427  0.009320  0.008645  0.007129   0.007129  0.15