<a href="https://colab.research.google.com/github/ayushjha4wd/Market-Analysis-Tool-for-Space-Commercialization/blob/main/Market_Analysis_Tool_for_Space_Commercialization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install yfinance pandas numpy matplotlib seaborn scikit-learn plotly



In [10]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
import plotly.express as px

# Step 1: Data Collection
def fetch_stock_data(symbol, start_date, end_date):
    stock_data = yf.download(symbol, start=start_date, end=end_date)
    return stock_data

symbol = "SPCE"
start_date = "2023-01-01"
end_date = "2024-01-01"
stock_data = fetch_stock_data(symbol, start_date, end_date)

# Step 2: Data Preprocessing
def preprocess_data(stock_data):
    stock_data['Daily Change'] = stock_data['Close'].pct_change()
    stock_data['Moving Average'] = stock_data['Close'].rolling(window=5).mean()
    stock_data = stock_data.dropna()
    return stock_data

stock_data = preprocess_data(stock_data)

# Step 3: Exploratory Data Analysis
def perform_eda(data):
    """
    Perform basic EDA with visualizations.
    """
    plt.figure(figsize=(10, 6))
    sns.lineplot(x=data.index.to_series(), y=data['Close'].squeeze(), label='Close Price')
    sns.lineplot(x=data.index.to_series(), y=data['Moving Average'].squeeze(), label='5-Day Moving Average')
    plt.title("Stock Price and Moving Average")
    plt.xlabel("Date")
    plt.ylabel("Price")
    plt.legend()
    plt.show()

    sns.histplot(data['Daily Change'], kde=True, bins=30)
    plt.title("Distribution of Daily Price Changes")
    plt.show()

# Step 4: Predictive Modeling
def train_model(data):
    X = data[['Open', 'High', 'Low', 'Volume', 'Moving Average']]
    y = data['Close']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    y_train = y_train.values.ravel()

    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    print("Mean Absolute Error:", mean_absolute_error(y_test, y_pred))
    print("Root Mean Squared Error:", np.sqrt(mean_squared_error(y_test, y_pred)))
    return model


model = train_model(stock_data)

# Step 5: Visualization Dashboard
def create_dashboard(data):
    data_reset = data.reset_index()

    fig = px.line(data_reset,
                  x=data_reset["Date"].dt.strftime('%Y-%m-%d'),
                  y=data_reset["Close"],
                  title="Stock Price Over Time",
                  labels={"Date": "Date", "Close": "Stock Price"})
    fig.update_traces(line=dict(color="blue"))
    fig.show()

#Step 6: Scenario Analysis
print("Scenario analysis can include simulations using Monte Carlo or applying new events to the trained model.")


[*********************100%***********************]  1 of 1 completed


Mean Absolute Error: 1.8231607772827148
Root Mean Squared Error: 2.36515656354789
Scenario analysis can include simulations using Monte Carlo or applying new events to the trained model.
