In [32]:
pip install langchain langchain-anthropic langchain-community firecrawl-py python-dotenv pydantic

You should consider upgrading via the '/Users/vladbalabash/Projects/nasa-exodoo/ai-rnd-storage/.venv/bin/python -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


In [56]:
import os

os.environ['ANTHROPIC_API_KEY'] = "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"
os.environ['FIRECRAWL_API_KEY'] = "fc-c2a4cf31a6084613875cfe73aee385f8"

In [75]:
import os
from langchain_anthropic import ChatAnthropic
from langchain_community.document_loaders import FireCrawlLoader
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv()) 

llm = ChatAnthropic(
    model="claude-3-5-sonnet-20240620", 
    temperature=0.2, 
    max_tokens=1024,
    api_key=os.environ["ANTHROPIC_API_KEY"]
)

In [58]:
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser

content_parsing_prompt = """
                            Parse the content of the page, clean it up and extract the main information about the planet.
                            Here is the HTML-content of the page:

                            {content}
                        """

content_parsing_template = PromptTemplate.from_template(template=content_parsing_prompt)

def get_planet_description(url: str):
    loader = FireCrawlLoader(
        api_key=os.environ["FIRECRAWL_API_KEY"],
        url=url,
        mode="scrape"
    )

    content = loader.load()[0]
    return content

    parsing_chain = content_parsing_template | llm | StrOutputParser()
    result = parsing_chain.invoke({ "content": content })
    print(result)
    return result


In [76]:
from enum import Enum
from pydantic import BaseModel, Field, HttpUrl
from typing import List, Optional 

# Enum for Star Types based on their spectral classification
class StarType(str, Enum):
    O = 'O-type'  # Hot, blue stars
    B = 'B-type'  # Blue stars
    A = 'A-type'  # White-blue stars
    F = 'F-type'  # Yellow-white stars
    G = 'G-type'  # Yellow stars (e.g., the Sun)
    K = 'K-type'  # Orange stars
    M = 'M-type'  # Red dwarfs (cool, faint stars)

# Enum for Exoplanet Types
class ExoplanetType(str, Enum):
    GAS_GIANT = 'Gas Giant'
    NEPTUNE_LIKE = 'Neptune-like'
    SUPER_EARTH = 'Super-Earth'
    TERRESTRIAL = 'Terrestrial'


# Pydantic model for individual publications
class Publication(BaseModel):
    link: HttpUrl = Field(description="URL link to the publication")
    description: str = Field(description="Brief description of the publication")


# Pydantic Model for Exoplanet, including host star and extended parameters
class ExoplanetModel(BaseModel):
    name: str = Field(description="Name of the exoplanet")
    description: str = Field(description="A brief description of the exoplanet")
    mass: Optional[str] = Field(description="Mass of the exoplanet in Earth masses (M⊕)")
    radius: Optional[str] = Field(description="Radius of the exoplanet in Earth radii (R⊕)")
    orbital_period: Optional[str] = Field(description="Orbital period in Earth days")
    semi_major_axis: Optional[str] = Field(description="Semi-major axis of the orbit in AU (astronomical units)")
    eccentricity: Optional[str] = Field(description="Orbital eccentricity (0 = circular orbit)")
    temperature: float = Field(description="Estimated surface temperature of the exoplanet in Kelvin")
    gravity: Optional[str] = Field(description="Gravity index on the exoplanet relative to Earth's gravity")
    density: Optional[str] = Field(description="Density of the exoplanet in g/cm³")
    habitability: bool = Field(description="Whether the planet is considered habitable (True/False)")
    surface_conditions: str = Field(description="Description of surface conditions (e.g., rocky, gaseous, water presence)")
    age: float = Field(description="Age of the exoplanet in billion years (Gyr)")
    distance_from_earth: float = Field(description="Distance from Earth in light years (ly)")
    travel_time: float = Field(description="Estimated travel time to the planet using current spacecraft in years")
    discovered_method: str = Field(description="Method used to discover the exoplanet (e.g., Transit, Radial Velocity)")

    # New Exoplanet Type field
    exoplanet_type: ExoplanetType = Field(description="Type of the exoplanet (Gas Giant, Neptune-like, Super-Earth, Terrestrial)")

    # Host star properties
    star_name: str = Field(description="Name of the host star")
    star_type: StarType = Field(description="Enum value representing the star's type (spectral class)")
    star_mass: Optional[str] = Field(description="Mass of the star in solar masses (Msun)")
    star_radius: Optional[str] = Field(description="Radius of the star in solar radii (Rsun)")
    star_temperature: float = Field(description="Temperature of the star in Kelvin")
    star_age: float = Field(description="Age of the star in billion years (Gyr)")
    # Visual assets
    planet_texture: Optional[str] = Field(None, description="Image file path of the visual texture of the planet")
    star_texture: Optional[str] = Field(None, description="Image file path of the visual texture of the host star")
    surface_photos: Optional[List[str]] = Field(None, description="List of image file paths for surface photos")
    locals_portrait: Optional[str] = Field(None, description="Image file path of the locals' portrait")
    flora_photos: Optional[List[str]] = Field(None, description="List of image file paths for possible flora")
    camp_photo: Optional[str] = Field(None, description="Image file path for a possible camp photo")
    background: Optional[str] = Field(None, description="Image file path for the background of the planet")
    # Publications related to the exoplanet
    publications: Optional[List[Publication]] = Field(description="List of publications with links and descriptions")


In [20]:
# TODO: Add detailed prompt
prompt_template_string = """
Parse the following content and generate a description of the planet.

{planet_information}

{format_instructions}
"""

In [77]:
from langchain_core.prompts import PromptTemplate
from langchain.output_parsers import PydanticOutputParser

planet_parser = PydanticOutputParser(pydantic_object=ExoplanetModel)
prompt = PromptTemplate.from_template(
        template=prompt_template_string,
        partial_variables={"format_instructions": planet_parser.get_format_instructions() },
    )

def to_json_file(data: list[ExoplanetModel], filename):
    with open(filename, 'w') as f:
        for item in data:
            f.write(item.model_dump_json(indent=4))

def parse_planet_resource(urls: List[str]):
    # HERE is array of html content
    collected_data = [get_planet_description(url) for url in urls]
    chain = prompt | llm | planet_parser

    return chain.invoke({ "planet_information": collected_data })


In [78]:
# TODO: Add urls grouped by planet - each planet is a list of urls
resources = [
    [
        "https://science.nasa.gov/image-detail/amf-pia20690/",
        "https://exoplanet.eu/catalog/hip_73786_b--10107/",
        # "https://iopscience.iop.org/article/10.3847/1538-3881/ac082a",
        # "https://iopscience.iop.org/article/10.3847/1538-4365/aaf6af",
        # "https://iopscience.iop.org/article/10.3847/1538-3881/ab3467"    
    ],
    [
        "https://exoplanet.eu/catalog/hat_p_18_ab--723/",
        # "https://academic.oup.com/mnras/article/529/4/4768/7610909",
        # "https://academic.oup.com/mnras/article/527/2/3183/7379625",
        "https://iopscience.iop.org/article/10.3847/2041-8213/ac9977"
    ]

]
planets = [parse_planet_resource(planet_url) for planet_url in resources]
print(planets)
to_json_file(planets, "planets.json")

[ExoplanetModel(name='HIP 73786 b', description='HIP 73786 b is a confirmed exoplanet discovered in 2010. It orbits a K-type star at a large distance of 1260 AU. The planet was detected using the imaging method and has a calculated temperature of 914 K. It is also known by the alternate names GJ 576 b and ULAS J150457.65+053800.8.', mass=None, radius=None, orbital_period=None, semi_major_axis='1260.0', eccentricity=None, temperature=914.0, gravity=None, density=None, habitability=False, surface_conditions='Unknown, likely gaseous due to its large orbit and temperature', age=1.6, distance_from_earth=19.0, travel_time=285000.0, discovered_method='Imaging', exoplanet_type=<ExoplanetType.GAS_GIANT: 'Gas Giant'>, star_name='HIP 73786', star_type=<StarType.K: 'K-type'>, star_mass='0.64', star_radius='0.596', star_temperature=4062.0, star_age=1.6, planet_texture=None, star_texture=None, surface_photos=None, locals_portrait=None, flora_photos=None, camp_photo=None, background=None, publication