In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# File paths (update if needed)
amazon_path = "Amazon.csv"
google_path = "GOOG.csv"
netflix_path = "NFLX.csv"

# Read already cleaned & sorted CSV files
amazon_df = pd.read_csv(amazon_path)
google_df = pd.read_csv(google_path)
netflix_df = pd.read_csv(netflix_path)

# Function to preprocess stock data (only feature extraction + scaling)
def preprocess_data(df, name):
    print(f"\n--- Preprocessing {name} Dataset ---")
    
    # Ensure 'Date' is datetime
    df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
    
    # Extract new time features
    df['Year'] = df['Date'].dt.year
    df['Month'] = df['Date'].dt.month
    df['Day'] = df['Date'].dt.day
    df['DayOfWeek'] = df['Date'].dt.dayofweek  # 0 = Monday, 6 = Sunday
    
    # Normalize numeric columns
    scaler = MinMaxScaler()
    numeric_cols = ['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']
    available_cols = [col for col in numeric_cols if col in df.columns]
    df[available_cols] = scaler.fit_transform(df[available_cols])
    
    print("Preprocessing done!")
    print(df.head())  # show first few rows after preprocessing
    
    return df

# Apply preprocessing
amazon_preprocessed = preprocess_data(amazon_df, "Amazon")
google_preprocessed = preprocess_data(google_df, "Google")
netflix_preprocessed = preprocess_data(netflix_df, "Netflix")




ModuleNotFoundError: No module named 'sklearn'