# Hybrid Mode - Dataset Creation 
we want to use the following structure to train our model:

Input:
- idea_text: "AI-powered e-commerce platform..."
- static_features: [market_size, investment, competition, team_strength]
- historical_stock: [month_1, month_2, ..., month_36]

Target:
- stock_performance (next 12 months): [month_37, month_38, ..., month_48]

---


This should then produce the follwoing output with our model:

Input:
- idea_text: "AI-powered e-commerce platform..."

Optional Input:
- static_features: [market_size, investment, competition, team_strength]

Output:
- Predicted stock performance (next 12 months): [month_1, month_2, ..., month_12]


In [6]:
from pydoc import describe

import yfinance as yf
import pandas as pd
import requests
from accelerate.commands.config.update import description
from bs4 import BeautifulSoup
import random

# Function to fetch stock performance data
def fetch_stock_performance(ticker, months=12):
    try:
        stock = yf.Ticker(ticker)
        hist = stock.history(period="1y", interval="1mo")  # Monthly data for 1 year
        return hist['Close'].tolist()[:months]  # Return closing prices for 12 months
    except Exception as e:
        print(f"Error fetching data for {ticker}: {e}")
        return None

# Function to scrape business description
def fetch_business_description(ticker):
    try:
        url = f"https://finance.yahoo.com/quote/{ticker}/profile?p={ticker}"
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')
        print(soup.prettify())
        description_t = soup.find('section', {'data-test': 'quote-profile'})
        print(description_t.prettify())
        text = description_t.find('p').text
        print(text)
        return description
    except Exception as e:
        print(f"Error fetching description for {ticker}: {e}")
        print(f"This was the incorrect url: {url}")
        return "No description available."

# Function to generate static features (example)
def generate_static_features():
    market_sizes = random.randint(1, 50) * 1e9  # Market size in billions
    investments = random.randint(100, 2000) * 1e3  # Investment in thousands
    competition = random.uniform(0, 1)  # Competition index
    team_strength = random.randint(1, 20)  # Number of key team members
    return {
        "market_size": market_sizes,
        "investment": investments,
        "competition": competition,
        "team_strength": team_strength
    }

# Main function to create the dataset
def create_real_dataset(tickers, months=12):
    data = []
    for ticker in tickers:
        print(f"Processing {ticker}...")
        stock_performance = fetch_stock_performance(ticker, months)
        if not stock_performance:
            continue
        
        description = fetch_business_description(ticker)
        static_features = generate_static_features()

        # Combine all data into a dictionary
        data.append({
            "ticker": ticker,
            "business_description": description,
            **static_features,
            **{f"month_{i+1}_performance": sp for i, sp in enumerate(stock_performance)}
        })
    
    return pd.DataFrame(data)

# Example usage
if __name__ == "__main__":
    # List of sample tickers (e.g., tech companies)
    tickers = ["AAPL", "MSFT", "GOOGL", "AMZN", "META"]
    
    # Create dataset
    dataset = create_real_dataset(tickers, months=12)
    
    # Save to CSV
    dataset.to_csv("real_company_stock_dataset.csv", index=False)
    print("Dataset saved as 'real_company_stock_dataset.csv'")


Processing AAPL...
<!DOCTYPE html>
<html lang="en-us">
 <head>
  <meta content="text/html; charset=utf-8" http-equiv="content-type"/>
  <meta charset="utf-8"/>
  <title>
   Yahoo
  </title>
  <meta content="width=device-width,initial-scale=1,minimal-ui" name="viewport"/>
  <meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/>
  <style>
   html {
      height: 100%;
  }
  body {
      background: #fafafc url(https://s.yimg.com/nn/img/sad-panda-201402200631.png) 50% 50%;
      background-size: cover;
      height: 100%;
      text-align: center;
      font: 300 18px "helvetica neue", helvetica, verdana, tahoma, arial, sans-serif;
  }
  table {
      height: 100%;
      width: 100%;
      table-layout: fixed;
      border-collapse: collapse;
      border-spacing: 0;
      border: none;
  }
  h1 {
      font-size: 42px;
      font-weight: 400;
      color: #400090;
  }
  p {
      color: #1A1A1A;
  }
  #message-1 {
      font-weight: bold;
      margin: 0;
  }
  #message-2 {
    