In [3]:
"""
Simple FT Markets Historical Prices Functions
Based on our working CloudScraper version with Volume support
"""

import cloudscraper
import pandas as pd
from datetime import datetime
from bs4 import BeautifulSoup
from urllib.parse import quote

def search_securities(query):
    """Search for securities on FT Markets"""
    scraper = cloudscraper.create_scraper(
        browser={
            'browser': 'chrome',
            'platform': 'windows',
            'desktop': True
        }
    )
    
    try:
        url = f"https://markets.ft.com/data/searchapi/searchsecurities?query={quote(query)}"
        response = scraper.get(url)
        response.raise_for_status()
        return response.json()
    except Exception as e:
        print(f"Error searching securities: {e}")
        return {"data": {"security": []}}

def get_xid_for_ticker(ticker):
    """Get xid for a ticker symbol from FT Markets"""
    search_results = search_securities(ticker)
    
    if search_results.get('data', {}).get('security'):
        securities = search_results['data']['security']
        
        # Look for exact match first
        for security in securities:
            if ticker.upper() in security.get('name', '').upper():
                return security.get('xid')
        
        # If no exact match, return first result
        if securities:
            return securities[0].get('xid')
    
    return None

def get_historical_prices(xid, start_date, end_date):
    """
    Get historical price data from FT Markets
    
    Args:
        xid: The xid from FT Markets search (e.g., "36276" for Apple)
        start_date: Start date in YYYY-MM-DD format
        end_date: End date in YYYY-MM-DD format
    
    Returns:
        pandas.DataFrame with columns: Date, Open, High, Low, Close, Volume
    """
    scraper = cloudscraper.create_scraper(
        browser={
            'browser': 'chrome',
            'platform': 'windows',
            'desktop': True
        }
    )
    
    try:
        # Convert dates to FT format
        start_formatted = start_date.replace('-', '%2F')
        end_formatted = end_date.replace('-', '%2F')
        
        url = f"https://markets.ft.com/data/equities/ajax/get-historical-prices?startDate={start_formatted}&endDate={end_formatted}&symbol={xid}"
        
        print(f"Fetching: {url}")
        response = scraper.get(url)
        response.raise_for_status()
        
        data = response.json()
        
        if not data.get('html'):
            print("No HTML data in response")
            return pd.DataFrame()
        
        html_content = data['html']
        if len(html_content) == 0:
            print("Empty HTML content")
            return pd.DataFrame()
        
        return _parse_ft_html_to_df(html_content)
        
    except Exception as e:
        print(f"Error fetching historical prices: {e}")
        return pd.DataFrame()

def _parse_ft_html_to_df(html_content):
    """Parse FT Markets HTML and return DataFrame with Volume"""
    soup = BeautifulSoup(html_content, 'html.parser')
    rows = soup.find_all('tr')
    
    historical_data = []
    
    for row in rows:
        cells = row.find_all('td')
        if len(cells) >= 6:  # Changed from 5 to 6 to include volume
            try:
                # Extract date
                date_cell = cells[0]
                date_spans = date_cell.find_all('span')
                if date_spans:
                    date_text = date_spans[0].get_text().strip()
                else:
                    date_text = date_cell.get_text().strip()
                
                # Parse date
                try:
                    date_obj = datetime.strptime(date_text, '%A, %B %d, %Y')
                except ValueError:
                    try:
                        date_obj = datetime.strptime(date_text, '%B %d, %Y')
                    except ValueError:
                        continue
                
                formatted_date = date_obj.strftime('%Y-%m-%d')
                
                # Extract OHLC prices
                open_price = float(cells[1].get_text().strip().replace(',', ''))
                high_price = float(cells[2].get_text().strip().replace(',', ''))
                low_price = float(cells[3].get_text().strip().replace(',', ''))
                close_price = float(cells[4].get_text().strip().replace(',', ''))
                
                # Extract volume from 6th column
                volume_cell = cells[5]
                volume_spans = volume_cell.find_all('span')
                if volume_spans:
                    # Use the first span with full number like "82,542,718"
                    volume_text = volume_spans[0].get_text().strip().replace(',', '')
                else:
                    volume_text = volume_cell.get_text().strip().replace(',', '')
                
                # Convert to integer
                try:
                    volume = int(float(volume_text))
                except ValueError:
                    volume = 0
                
                historical_data.append({
                    'Date': formatted_date,
                    'Open': open_price,
                    'High': high_price,
                    'Low': low_price,
                    'Close': close_price,
                    'Volume': volume  # Added volume column
                })
                
            except (ValueError, IndexError) as e:
                continue
    
    if historical_data:
        df = pd.DataFrame(historical_data)
        df['Date'] = pd.to_datetime(df['Date'])
        df = df.sort_values('Date').reset_index(drop=True)
        return df
    else:
        return pd.DataFrame()

def get_simple_historical_prices(xid, start_date, end_date):
    """
    Get historical prices in simple format (like your Quasar app expects)
    
    Returns: List of {"date": "YYYY-MM-DD", "closingPrice": 123.45}
    """
    df = get_historical_prices(xid, start_date, end_date)
    
    if df.empty:
        return []
    
    simple_data = []
    for _, row in df.iterrows():
        simple_data.append({
            "date": row['Date'].strftime('%Y-%m-%d'),
            "closingPrice": row['Close']
        })
    
    return simple_data

def get_simple_historical_prices_with_volume(xid, start_date, end_date):
    """
    Get historical prices with volume in simple format
    
    Returns: List of {"date": "YYYY-MM-DD", "closingPrice": 123.45, "volume": 82542718}
    """
    df = get_historical_prices(xid, start_date, end_date)
    
    if df.empty:
        return []
    
    simple_data = []
    for _, row in df.iterrows():
        simple_data.append({
            "date": row['Date'].strftime('%Y-%m-%d'),
            "closingPrice": row['Close'],
            "volume": row['Volume'] if 'Volume' in df.columns else 0
        })
    
    return simple_data

# Test the functions
if __name__ == "__main__":
    print("=== Testing FT Markets Functions with Volume ===")
    
    # Test 1: Search for Apple
    print("\n1. Searching for Apple...")
    search_results = search_securities("Apple")
    
    apple_xid = None
    if search_results.get('data', {}).get('security'):
        securities = search_results['data']['security']
        for security in securities:
            if 'Apple Inc' in security.get('name', ''):
                apple_xid = security.get('xid')
                print(f"Found Apple Inc: XID = {apple_xid}")
                break
    
    if not apple_xid:
        print("Apple Inc not found, using first result")
        if securities:
            apple_xid = securities[0].get('xid')
    
    # Test 2: Get historical data as DataFrame with Volume
    if apple_xid:
        print(f"\n2. Getting historical data for XID {apple_xid}...")
        df = get_historical_prices(apple_xid, "2024-06-01", "2024-06-30")
        
        if not df.empty:
            print(f"Got {len(df)} records")
            print(f"Columns: {list(df.columns)}")
            print("\nDataFrame:")
            print(df.head())
        else:
            print("No data received")
        
        # Test 3: Get simple format
        print(f"\n3. Getting simple format...")
        simple_data = get_simple_historical_prices(apple_xid, "2024-06-01", "2024-06-30")
        
        if simple_data:
            print(f"Got {len(simple_data)} records in simple format")
            print("First 3 records:")
            for record in simple_data[:3]:
                print(f"  {record}")
        else:
            print("No simple data received")
        
        # Test 4: Get simple format with volume
        print(f"\n4. Getting simple format with volume...")
        simple_with_volume = get_simple_historical_prices_with_volume(apple_xid, "2024-06-01", "2024-06-30")
        
        if simple_with_volume:
            print(f"Got {len(simple_with_volume)} records with volume")
            print("First 3 records:")
            for record in simple_with_volume[:3]:
                print(f"  {record}")
        else:
            print("No volume data received")
    
    print("\n=== Usage ===")
    print("# Search and get XID:")
    print("xid = get_xid_for_ticker('AAPL')")
    print()
    print("# Get DataFrame with Volume:")
    print("df = get_historical_prices(xid, '2024-01-01', '2024-01-31')")
    print()
    print("# Get simple format (for your API):")
    print("data = get_simple_historical_prices(xid, '2024-01-01', '2024-01-31')")
    print()
    print("# Get simple format with volume:")
    print("data = get_simple_historical_prices_with_volume(xid, '2024-01-01', '2024-01-31')")

=== Testing FT Markets Functions with Volume ===

1. Searching for Apple...
Found Apple Inc: XID = 36276

2. Getting historical data for XID 36276...
Fetching: https://markets.ft.com/data/equities/ajax/get-historical-prices?startDate=2024%2F06%2F01&endDate=2024%2F06%2F30&symbol=36276
Got 19 records
Columns: ['Date', 'Open', 'High', 'Low', 'Close', 'Volume']

DataFrame:
        Date    Open    High     Low   Close    Volume
0 2024-06-03  192.90  194.99  192.52  194.03  50080539
1 2024-06-04  194.64  195.32  193.03  194.35  47471445
2 2024-06-05  195.40  196.90  194.87  195.87  54156785
3 2024-06-06  195.69  196.50  194.17  194.48  41181753
4 2024-06-07  194.65  196.94  194.14  196.89  53103912

3. Getting simple format...
Fetching: https://markets.ft.com/data/equities/ajax/get-historical-prices?startDate=2024%2F06%2F01&endDate=2024%2F06%2F30&symbol=36276
Got 19 records in simple format
First 3 records:
  {'date': '2024-06-03', 'closingPrice': 194.03}
  {'date': '2024-06-04', 'closingPri

In [5]:
from ftgo import search_securities, get_xid, get_historical_prices
results = search_securities("LU2289280112")
xid = get_xid("LU2289280112")
df = get_historical_prices(xid, "01012024", "01022025")
results

Unnamed: 0,xid,name,symbol,asset_class,url
0,653184215,Fundsmith SICAV - Sustainable Equity Fund Clas...,LU2289280112:EUR,Funds,~/funds/tearsheet/summary?s=LU2289280112:EUR


In [6]:
from ftgo import get_xid, get_holdings, get_fund_breakdown

# Get XID for SPY ETF
xid = get_xid('QQQ')

# Get specific data type
sectors = get_holdings(xid, "sector_weights")
top_holdings = get_holdings(xid, "top_holdings")

# Get all data at once
asset_alloc, sectors, regions, holdings = get_holdings(xid, "all")

# Get complete breakdown as dictionary
breakdown = get_fund_breakdown(xid)

In [None]:
import cloudscraper
from bs4 import BeautifulSoup
import pandas as pd

def scrape_etf_profile(url):
    """
    Scrape Profile and Investment data from Financial Times ETF page
    
    Args:
        url (str): The Financial Times ETF URL
    
    Returns:
        pandas.DataFrame: DataFrame with Field and Value columns
    """
    
    # Create scraper
    scraper = cloudscraper.create_scraper()
    
    # Get page
    response = scraper.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Find the Profile and Investment section
    profile_section = soup.find('div', {'data-f2-app-id': 'mod-profile-and-investment-app'})
    
    if not profile_section:
        return pd.DataFrame()
    
    # Extract all table data
    data = []
    tables = profile_section.find_all('table')
    
    for table in tables:
        rows = table.find_all('tr')
        for row in rows:
            th = row.find('th')
            td = row.find('td')
            
            if th and td:
                field = th.get_text(strip=True)
                value = td.get_text(separator=' ', strip=True)
                value = ' '.join(value.split())  # Clean whitespace
                data.append({'Field': field, 'Value': value})
    
    return pd.DataFrame(data)

# Usage
if __name__ == "__main__":
    url = "https://markets.ft.com/data/etfs/tearsheet/summary?s={xid}"
    df = scrape_etf_profile(url)
    print(df)

                         Field  \
0                    Fund type   
1    Investment style (stocks)   
2             Income treatment   
3         Morningstar category   
4                   IMA sector   
5                  Launch date   
6               Price currency   
7                     Domicile   
8                         ISIN   
9         Manager & start date   
10           Pricing frequency   
11                   Fund size   
12            Share class size   
13              Ongoing charge   
14              Initial charge   
15           Max annual charge   
16                 Exit charge   
17     Min. initial investment   
18  Min. additional investment   
19     Min. regular investment   
20                      UK ISA   
21          Available for sale   

                                                Value  
0                                               SICAV  
1          Market Cap: Large Investment Style: Growth  
2                                        Accumula

In [2]:
from ftgo import get_xid, get_fund_profile, get_fund_stats, get_available_fields

xid = get_xid('QQQ')

# Explore what's available first
fields = get_available_fields(xid)
print("Available fields:", fields)

# Get all data as dictionary
stats = get_fund_stats(xid)

# Safe access to fields (returns None if field doesn't exist)
inception = stats.get('Inception date')
expense_ratio = stats.get('Ongoing charge')
total_assets = stats.get('Total net assets')

# Or iterate through all available data
for field, value in stats.items():
    print(f"{field}: {value}")

# Search for fields containing specific terms
fees = search_profile_field(xid, 'fee')
dates = search_profile_field(xid, 'date')

ImportError: cannot import name 'get_available_fields' from 'ftgo.infos' (c:\Users\gohibiki\GitHub\ftgo\ftgo\infos.py)