In [1]:
# Required libraries
import yfinance as yf
import requests
from textblob import TextBlob
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, r2_score
import pickle

In [2]:
# Replace with your News API key
news_api_key = '267e28c3635649348ba8b077b412e418'

# Function to fetch IPO data using yfinance
def get_ipo_data(ticker):
    stock = yf.Ticker(ticker)
    ipo_data = {
        "ticker": ticker,
        "IPO_date": stock.info.get("ipoDate"),
        "Market_Cap": stock.info.get("marketCap"),
        "P/E_Ratio": stock.info.get("forwardPE"),
        "Dividend_Yield": stock.info.get("dividendYield"),
        "52_week_range": stock.info.get("fiftyTwoWeekRange"),
        "Price": stock.history(period='1d')['Close'].iloc[-1],
    }
    return ipo_data

In [3]:
# Function to fetch news articles related to a company
def fetch_news(company_name, api_key):
    url = f"https://newsapi.org/v2/everything?q={company_name}&apiKey={api_key}"
    response = requests.get(url)
    articles = response.json().get('articles', [])
    return articles

In [4]:
# Function to perform sentiment analysis on news articles
def analyze_sentiment(articles):
    sentiment_scores = []
    for article in articles:
        title = article.get('title') or ''  # Use empty string if title is None
        description = article.get('description') or ''  # Use empty string if description is None
        text = title + " " + description
        blob = TextBlob(text)
        sentiment_scores.append(blob.sentiment.polarity)
    return sum(sentiment_scores) / len(sentiment_scores) if sentiment_scores else 0

# Tickers for companies you want to analyze (include actual IPO tickers)
tickers = ["AAPL", "MSFT", "GOOGL", "TSLA"]  # Add relevant IPO tickers here
historical_ipo_data = []

In [5]:
# Collect data for each ticker
for ticker in tickers:
    ipo_data = get_ipo_data(ticker)
    
    # Check if key data fields are available, skip if missing
    if ipo_data['Market_Cap'] is not None and ipo_data['P/E_Ratio'] is not None:
        
        # Fetch and analyze news sentiment
        news_articles = fetch_news(ticker, news_api_key)
        sentiment_score = analyze_sentiment(news_articles) if news_articles else 0

        # Add sentiment score and placeholder for IPO performance
        ipo_data['Sentiment_Score'] = sentiment_score
        ipo_data['IPO_Performance'] = 0.05  # Replace with real data if available
        historical_ipo_data.append(ipo_data)


In [6]:
# Convert list of IPO data to DataFrame
historical_ipo_data = pd.DataFrame(historical_ipo_data)

In [7]:
# Prepare data for model training
X = historical_ipo_data[['Market_Cap', 'P/E_Ratio', 'Dividend_Yield', 'Sentiment_Score']]
y = historical_ipo_data['IPO_Performance']  # Replace with actual target variable

In [8]:
# Handle missing values by imputing them
imputer = SimpleImputer(strategy='mean')  # Choose 'mean', 'median', or 'most_frequent' as needed
X_imputed = imputer.fit_transform(X)

In [9]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_imputed, y, test_size=0.2, random_state=42)

In [10]:
# Define models to test
models = {
    "Linear Regression": LinearRegression(),
    "Random Forest": RandomForestRegressor(n_estimators=100, random_state=42),
    "XGBoost": XGBRegressor(objective='reg:squarederror', n_estimators=100, random_state=42)
}

In [11]:
# Dictionary to store results
results = {}

# Train and evaluate each model
for model_name, model in models.items():
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    mae = mean_absolute_error(y_test, predictions)
    r2 = r2_score(y_test, predictions)
    results[model_name] = {"MAE": mae, "R² Score": r2}
    print(f"{model_name} - Mean Absolute Error: {mae}, R² Score: {r2}")

Linear Regression - Mean Absolute Error: 6.938893903907228e-18, R² Score: nan
Random Forest - Mean Absolute Error: 9.020562075079397e-17, R² Score: nan
XGBoost - Mean Absolute Error: 7.450580569168253e-10, R² Score: nan




In [12]:
# Select the model with the best R² Score
best_model_name = max(results, key=lambda x: results[x]["R² Score"])
best_model = models[best_model_name]
print(f"The best model is {best_model_name} with R² Score of {results[best_model_name]['R² Score']}")

The best model is Linear Regression with R² Score of nan


In [13]:
# Save the best model to a file
with open("best_ipo_predictor_model.pkl", "wb") as file:
    pickle.dump(best_model, file)

print("Model saved as best_ipo_predictor_model.pkl")

Model saved as best_ipo_predictor_model.pkl


In [14]:
# Load the model for future predictions (Example)
with open("best_ipo_predictor_model.pkl", "rb") as file:
    loaded_model = pickle.load(file)

In [15]:
# Predict on new data (Example)
new_data = pd.DataFrame({
    "Market_Cap": [1e9],          # Example value
    "P/E_Ratio": [60],             # Example value
    "Dividend_Yield": [0.01],      # Example value
    "Sentiment_Score": [0.5]       # Example sentiment score
})

In [16]:
# Make prediction
new_prediction = loaded_model.predict(new_data)
print("Predicted IPO Performance:", new_prediction[0])

Predicted IPO Performance: 0.05000000000000001


