<a href="https://colab.research.google.com/github/gopiBNK/FAANG/blob/main/FAANG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 1. IMPORT LIBRARIES
import streamlit as st
import pandas as pd
import numpy as np
import mlflow
import mlflow.sklearn
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
# 2. STREAMLIT CONFIG
st.set_page_config(page_title="FAANG Stock Predictor", layout="wide")
st.title("FAANG Stock Price Prediction (Bias–Variance Aware)")
# 3. LOAD DATA
df = pd.read_csv("faang.csv")

st.subheader("Dataset Preview")
st.dataframe(df.head())
# 4. DATA CLEANING
df = df.dropna(subset=["Ticker", "Open", "High", "Low", "Close", "Volume"])

df = df[
    (df["Open"] > 0) &
    (df["High"] > 0) &
    (df["Low"] > 0) &
    (df["Close"] > 0)
]

st.write("Rows after cleaning:", len(df))
# 5. VISUALIZATION
st.subheader("Close Price Distribution")
fig = plt.figure(figsize=(5, 3))
plt.hist(df["Close"], bins=30)
plt.title("Close Price Histogram")
plt.tight_layout()
st.pyplot(fig)
plt.close()
# 6. FEATURES & TARGET
X = df[["Ticker", "Open", "High", "Low", "Volume"]]
y = df["Close"]
# 7. TRAIN MODELS (BIAS–VARIANCE AWARE)
def train_best_bias_variance_model(X, y):

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )

    preprocess = ColumnTransformer([
        ("num", StandardScaler(), ["Open", "High", "Low", "Volume"]),
        ("cat", OneHotEncoder(handle_unknown="ignore"), ["Ticker"])
    ])

    models = {
        "Linear Regression": LinearRegression(),
        "Ridge Regression": Ridge(alpha=1.0),
        "Lasso Regression": Lasso(alpha=0.01)
    }

    results = []
    best_score = float("inf")

    mlflow.set_experiment("FAANG_Bias_Variance_Model")

    for name, model in models.items():

        pipeline = Pipeline([
            ("preprocess", preprocess),
            ("model", model)
        ])

        with mlflow.start_run(run_name=name):

            # Train
            pipeline.fit(X_train, y_train)

            # Predictions
            train_preds = pipeline.predict(X_train)
            test_preds = pipeline.predict(X_test)

            # Errors
            train_rmse = np.sqrt(mean_squared_error(y_train, train_preds))
            test_rmse = np.sqrt(mean_squared_error(y_test, test_preds))

            # Cross-validation RMSE (Variance indicator)
            cv_scores = cross_val_score(
                pipeline,
                X,
                y,
                cv=5,
                scoring="neg_root_mean_squared_error"
            )
            cv_rmse = -cv_scores.mean()

            # Bias–Variance score
            bias_variance_score = test_rmse + abs(train_rmse - test_rmse) + cv_rmse

            # MLflow logs
            mlflow.log_metric("Train_RMSE", train_rmse)
            mlflow.log_metric("Test_RMSE", test_rmse)
            mlflow.log_metric("CV_RMSE", cv_rmse)
            mlflow.log_metric("Bias_Variance_Score", bias_variance_score)

            results.append({
                "Model": name,
                "Train RMSE": train_rmse,
                "Test RMSE": test_rmse,
                "CV RMSE": cv_rmse,
                "Bias–Variance Score": bias_variance_score
            })

            if bias_variance_score < best_score:
                best_score = bias_variance_score
                best_model = pipeline
                best_name = name
                best_test_preds = test_preds

    return best_model, best_name, X_test, y_test, best_test_preds, pd.DataFrame(results)
# 8. RUN TRAINING
best_model, best_name, X_test, y_test, best_preds, results_df = train_best_bias_variance_model(X, y)
# 9. DISPLAY RESULTS
st.subheader("Bias–Variance Comparison")
st.dataframe(results_df)

st.success(f"Best Model (Bias–Variance Balanced): {best_name}")

col1, col2, col3 = st.columns(3)
col1.metric("MAE", f"{mean_absolute_error(y_test, best_preds):.2f}")
col2.metric("RMSE", f"{np.sqrt(mean_squared_error(y_test, best_preds)):.2f}")
col3.metric("R2", f"{r2_score(y_test, best_preds):.3f}")
# 10. ACTUAL vs PREDICTED
fig2 = plt.figure(figsize=(5, 3))
plt.scatter(y_test, best_preds)
plt.xlabel("Actual Close Price")
plt.ylabel("Predicted Close Price")
plt.title("Actual vs Predicted")
plt.tight_layout()
st.pyplot(fig2)
plt.close()
# 11. DEPLOY MODEL
st.subheader("Predict Close Price")

ticker = st.selectbox("Ticker", df["Ticker"].unique())
open_p = st.number_input("Open Price", min_value=0.01)
high = st.number_input("High Price", min_value=0.01)
low = st.number_input("Low Price", min_value=0.01)
volume = st.number_input("Volume", min_value=1)

if st.button("Predict"):
    input_df = pd.DataFrame([{
        "Ticker": ticker,
        "Open": open_p,
        "High": high,
        "Low": low,
        "Volume": volume
    }])

    prediction = best_model.predict(input_df)[0]
    st.success(f"Predicted Close Price: {prediction:.2f}")
