<a href="https://colab.research.google.com/github/gopiBNK/FAANG/blob/main/FAANG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import mlflow
import mlflow.sklearn
import streamlit as st

df=pd.read_csv("FAANG.csv")

df.columns

df.columns = df.columns.str.strip()

print(df.head())

import pandas as pd
from sklearn.preprocessing import LabelEncoder

# Example DataFrame
data = {
    "Company": ["Apple", "Amazon", "Google", "Meta", "Netflix"],
    "Ticker": ["AAPL", "AMZN", "GOOGL", "META", "NFLX"],
    "Date": ["2023-01-01", "2023-02-01", "2023-03-01", "2023-04-01", "2023-05-01"],
    "Analyst Recommendation": ["Strong Buy", "Buy", "Hold", "Sell", "Strong Sell"]
}

# Convert Company and Ticker using Label Encoding
label_encoder = LabelEncoder()
df['Company'] = label_encoder.fit_transform(df['Company'])
df['Ticker'] = label_encoder.fit_transform(df['Ticker'])

# Convert Date to numeric (e.g., ordinal format)
df['Date'] = pd.to_datetime(df['Date'])
df['Date'] = df['Date'].map(lambda x: x.toordinal())

# Convert Analyst Recommendation using mapping
recommendation_mapping = {
    "Strong Buy": 1,
    "Buy": 2,
    "Hold": 3,
    "Sell": 4,
    "Strong Sell": 5
}
df['Analyst Recommendation'] = df['Analyst Recommendation'].map(recommendation_mapping)

# Display the converted DataFrame
print(df)

# Data Cleaning and EDA
# Handle missing values
df.fillna(df.median(), inplace=True)

# Convert Date to datetime format
df['Date'] = pd.to_datetime(df['Date'])

# Remove outliers using IQR
Q1 = df.quantile(0.25)
Q3 = df.quantile(0.75)
IQR = Q3 - Q1
df = df[~((df < (Q1 - 1.5 * IQR)) | (df > (Q3 + 1.5 * IQR))).any(axis=1)]

# Correlation heatmap
plt.figure(figsize=(10, 6))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()

# Line chart for stock price trends
plt.figure(figsize=(10, 6))
plt.plot(df[''], df['Close'], label='Close Price')
plt.title('Stock Price Trend')
plt.xlabel('Date')
plt.ylabel('Closing Price')
plt.legend()
plt.show()

# Model Development
# Select features and target
X = df[['Open', 'High', 'Low', 'Volume']]
y = df['Close']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Normalize features
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train Linear Regression model
model = LinearRegression()
model.fit(X_train_scaled, y_train)

# Make predictions
y_pred = model.predict(X_test_scaled)

# Evaluate the model
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print(f'MAE: {mae}, RMSE: {rmse}, R2: {r2}')

# MLflow Integration
mlflow.set_experiment("FAANG Stock Price Prediction")

with mlflow.start_run():
    # Log metrics
    mlflow.log_metric("MAE", mae)
    mlflow.log_metric("RMSE", rmse)
    mlflow.log_metric("R2", r2)

    # Log model
    mlflow.sklearn.log_model(model, "linear_regression_model")

# Streamlit Deployment
# Create a Streamlit app
st.title("FAANG Stock Price Prediction")

# User inputs
open_price = st.number_input("Open Price")
high_price = st.number_input("High Price")
low_price = st.number_input("Low Price")
volume = st.number_input("Volume")

# Load the model
model = mlflow.sklearn.load_model("linear_regression_model")

# Predict button
if st.button("Predict"):
    features = scaler.transform([[open_price, high_price, low_price, volume]])
    prediction = model.predict(features)
    st.success(f"Predicted Closing Price: ${prediction[0]:.2f}")