In [1]:
import pandas as pd

from langchain.chat_models import init_chat_model
from typing import Optional, List, Dict
from typing_extensions import Annotated, TypedDict
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import PydanticOutputParser
from langchain_core.tools import tool

  from .autonotebook import tqdm as notebook_tqdm


In [13]:
df = pd.read_csv('entities.csv')
df.head()

Unnamed: 0,title,author,id,data,code_link,packages_used,task,field,abstract,models,...,methodology,metrics,limitations,reproducibility,strengths,weaknesses,reuse_potential,impact,metrics_list,scores
0,Generation of Modern Satellite Data from Galil...,"Harim Lee, Eunsu Park, Yong-Jae Moon",DOI: 10.3847/1538-4357/abd498,['Mount Wilson Observatory (MWO) sunspot drawi...,None provided,"['NumPy', 'Keras', 'TensorFlow', 'SunPy']",Generation of modern satellite data (magnetogr...,"Solar Physics (Sunspots, Solar Magnetic Fields...",This study presents a deep learning-based appr...,['Conditional Generative Adversarial Network (...,...,"The study uses a pix2pix model, a type of cond...",['Total Unsigned Magnetic Flux (TUMF) Correlat...,"[""The model does not successfully generate act...",4,['Innovative approach to bridge historical sun...,['Limited ability to reproduce detailed corona...,['The methodology can be extended to other his...,8,[{'Total Unsigned Magnetic Flux (TUMF) Correla...,"[0.82, 0.74, 0.75, 0.56, 0.65, 0.67, 0.7, 0.76..."
1,Visual Explanation of a Deep Learning Solar Fl...,"Kangwoo Yi, Yong-Jae Moon, Daye Lim, Eunsu Par...",DOI: 10.3847/2041-8213/abe94a,['Full-disk magnetograms from SOHO/MDI and SDO...,None provided,"['PyTorch', 'NumPy', 'Matplotlib']",Solar flare prediction,Solar flares,The study presents a visual explanation of a d...,['Convolutional Neural Network (CNN) with dens...,...,The study uses a CNN-based deep learning model...,"['True Skill Statistics (TSS)', 'Accuracy (ACC...",['Projection effects near the solar limb may a...,3,['First application of guided backpropagation ...,['Limited by the resolution and noise in magne...,['The methodology can be extended to other sol...,8,"[{'True Skill Statistics (TSS)': 0.65}, {'Accu...","[0.65, 0.83, 0.65, 0.61]"
2,Improved AI-generated Solar Farside Magnetogra...,"Hyun-Jin Jeong, Yong-Jae Moon, Eunsu Park, Har...",10.3847/2041-8213/ac911f,"['SDO/AIA EUV images (304, 193, 171 Å)', 'SDO/...",https://github.com/JeongHyunJin/Pix2PixCC,"['PyTorch', 'NumPy', 'Matplotlib', 'SciPy', 'A...",Generation of solar farside magnetograms,Solar Physics,The paper presents an improved artificial inte...,"['Pix2PixCC (Generator, Discriminator, Inspect...",...,"The study introduces the Pix2PixCC model, whic...","['Pixel-to-pixel CC (full disk)', 'Pixel-to-pi...",['Physical quantities based on pixel-to-pixel ...,4,['Improved accuracy in generating solar farsid...,['Dependence on the quality and availability o...,['The AI-generated magnetograms can be used fo...,8,"[{'Pixel-to-pixel CC (full disk)': 0.88}, {'Pi...","[0.88, 0.91, 0.7, 0.99, 0.94, 0.94, 0.9, 0.94,..."
3,Construction of global IGS-3D electron density...,"Eun-Young Ji, Yong-Jae Moon, Young-Sil Kwak, K...",10.1016/j.jastp.2024.106370,['International Global Navigation Satellite Sy...,None provided,['PyTorch'],Global 3-D electron density modeling,Ionospheric electron density,The study presents a deep learning-based metho...,['Multi-Layer Perceptron (MLP)'],...,The study employs a deep learning approach usi...,"['Root Mean Square Error (RMSE) (Jicamarca)', ...",['Dependence on IRI-2016 model data for traini...,4,['Improved accuracy in electron density predic...,['Model performance may be affected by the lim...,['The model can be adapted for real-time globa...,8,[{'Root Mean Square Error (RMSE) (Jicamarca)':...,"[0.37, 0.22, 0.34]"
4,Generation of He I 1083 nm Images from SDO AIA...,"Jihyeon Son, Junghun Cha, Yong-Jae Moon, Harim...",10.3847/1538-4357/ac133e,"['SDO/AIA 19.3 nm images', 'SDO/AIA 30.4 nm im...",None provided,"['SunPy', 'TensorFlow or PyTorch (implied by d...",Image-to-image translation (generating He I 10...,Solar Physics,The study presents a deep learning-based metho...,['pix2pixHD (conditional Generative Adversaria...,...,"The study utilized the pix2pixHD model, a cond...","['Correlation Coefficient (CC)', 'Root Mean Sq...",['Synthetic images may miss fine details in fi...,3,['Successful generation of He I 1083 nm images...,['Difficulty in capturing fine details of fila...,['The pix2pixHD model can be adapted for other...,8,"[{'Correlation Coefficient (CC)': 0.88}, {'Roo...","[0.88, 9.49]"


In [51]:
llm = init_chat_model('mistral-large-latest', model_provider='mistralai')

In [15]:
df.columns

Index(['title', 'author', 'id', 'data', 'code_link', 'packages_used', 'task',
       'field', 'abstract', 'models', 'hybrid_model', 'multimodal',
       'baselines', 'preprocessing', 'citations', 'approach_used',
       'methodology', 'metrics', 'limitations', 'reproducibility', 'strengths',
       'weaknesses', 'reuse_potential', 'impact', 'metrics_list', 'scores'],
      dtype='object')

In [16]:
entities_df = df.drop(columns=['metrics_list'])
entities_df['code_link'] = [code if 'http' in code else None for code in entities_df['code_link']]

In [17]:
entities_df

Unnamed: 0,title,author,id,data,code_link,packages_used,task,field,abstract,models,...,approach_used,methodology,metrics,limitations,reproducibility,strengths,weaknesses,reuse_potential,impact,scores
0,Generation of Modern Satellite Data from Galil...,"Harim Lee, Eunsu Park, Yong-Jae Moon",DOI: 10.3847/1538-4357/abd498,['Mount Wilson Observatory (MWO) sunspot drawi...,,"['NumPy', 'Keras', 'TensorFlow', 'SunPy']",Generation of modern satellite data (magnetogr...,"Solar Physics (Sunspots, Solar Magnetic Fields...",This study presents a deep learning-based appr...,['Conditional Generative Adversarial Network (...,...,['Image-to-image translation using pix2pix (cG...,"The study uses a pix2pix model, a type of cond...",['Total Unsigned Magnetic Flux (TUMF) Correlat...,"[""The model does not successfully generate act...",4,['Innovative approach to bridge historical sun...,['Limited ability to reproduce detailed corona...,['The methodology can be extended to other his...,8,"[0.82, 0.74, 0.75, 0.56, 0.65, 0.67, 0.7, 0.76..."
1,Visual Explanation of a Deep Learning Solar Fl...,"Kangwoo Yi, Yong-Jae Moon, Daye Lim, Eunsu Par...",DOI: 10.3847/2041-8213/abe94a,['Full-disk magnetograms from SOHO/MDI and SDO...,,"['PyTorch', 'NumPy', 'Matplotlib']",Solar flare prediction,Solar flares,The study presents a visual explanation of a d...,['Convolutional Neural Network (CNN) with dens...,...,['Visual explanation using guided backpropagat...,The study uses a CNN-based deep learning model...,"['True Skill Statistics (TSS)', 'Accuracy (ACC...",['Projection effects near the solar limb may a...,3,['First application of guided backpropagation ...,['Limited by the resolution and noise in magne...,['The methodology can be extended to other sol...,8,"[0.65, 0.83, 0.65, 0.61]"
2,Improved AI-generated Solar Farside Magnetogra...,"Hyun-Jin Jeong, Yong-Jae Moon, Eunsu Park, Har...",10.3847/2041-8213/ac911f,"['SDO/AIA EUV images (304, 193, 171 Å)', 'SDO/...",https://github.com/JeongHyunJin/Pix2PixCC,"['PyTorch', 'NumPy', 'Matplotlib', 'SciPy', 'A...",Generation of solar farside magnetograms,Solar Physics,The paper presents an improved artificial inte...,"['Pix2PixCC (Generator, Discriminator, Inspect...",...,['Deep learning-based image-to-image translati...,"The study introduces the Pix2PixCC model, whic...","['Pixel-to-pixel CC (full disk)', 'Pixel-to-pi...",['Physical quantities based on pixel-to-pixel ...,4,['Improved accuracy in generating solar farsid...,['Dependence on the quality and availability o...,['The AI-generated magnetograms can be used fo...,8,"[0.88, 0.91, 0.7, 0.99, 0.94, 0.94, 0.9, 0.94,..."
3,Construction of global IGS-3D electron density...,"Eun-Young Ji, Yong-Jae Moon, Young-Sil Kwak, K...",10.1016/j.jastp.2024.106370,['International Global Navigation Satellite Sy...,,['PyTorch'],Global 3-D electron density modeling,Ionospheric electron density,The study presents a deep learning-based metho...,['Multi-Layer Perceptron (MLP)'],...,"['Deep learning-based inversion method', 'Mult...",The study employs a deep learning approach usi...,"['Root Mean Square Error (RMSE) (Jicamarca)', ...",['Dependence on IRI-2016 model data for traini...,4,['Improved accuracy in electron density predic...,['Model performance may be affected by the lim...,['The model can be adapted for real-time globa...,8,"[0.37, 0.22, 0.34]"
4,Generation of He I 1083 nm Images from SDO AIA...,"Jihyeon Son, Junghun Cha, Yong-Jae Moon, Harim...",10.3847/1538-4357/ac133e,"['SDO/AIA 19.3 nm images', 'SDO/AIA 30.4 nm im...",,"['SunPy', 'TensorFlow or PyTorch (implied by d...",Image-to-image translation (generating He I 10...,Solar Physics,The study presents a deep learning-based metho...,['pix2pixHD (conditional Generative Adversaria...,...,['Image-to-image translation using pix2pixHD.'...,"The study utilized the pix2pixHD model, a cond...","['Correlation Coefficient (CC)', 'Root Mean Sq...",['Synthetic images may miss fine details in fi...,3,['Successful generation of He I 1083 nm images...,['Difficulty in capturing fine details of fila...,['The pix2pixHD model can be adapted for other...,8,"[0.88, 9.49]"
5,Can we properly determine differential emissio...,"Junmu Youn, Harim Lee, Hyun-Jin Jeong, Jin-Yi ...",DOI: 10.1051/0004-6361/202450000 (hypothetical...,"['SDO/AIA datasets (2011-2021)', 'Solar Orbite...",https://github.com/ianan/demreg/,"['PyTorch', 'Astropy', 'aiapy', 'SunPy', 'NumPy']",Differential Emission Measure (DEM) determination,"Solar Physics, Solar Corona, Extreme Ultraviol...",This study investigates whether differential e...,['Pix2PixCC (based on Pix2PixHD)'],...,['Deep learning-based image-to-image translati...,The study utilized a Pix2PixCC deep learning m...,"['Pearson CC (94 Å)', 'Pearson CC (131 Å)', 'P...",['Intercalibration discrepancies between SDO/A...,4,['Successful generation of five EUV channels f...,['Dependence on intercalibration accuracy betw...,['The methodology can be adapted for future so...,8,"[0.87, 0.96, 0.95, 0.95, 0.93, 0.5, 1.26, 38.9..."
6,Fast Reconstruction of 3D Density Distribution...,"Sumiaya Rahman, Seungheon Shin, Hyun-Jin Jeong...",10.3847/1538-4357/acc538,['MAS thermodynamic model data (photospheric m...,,"['SunPy', 'PyTorch', 'NumPy']",3D coronal electron density distribution recon...,"Solar corona, solar wind, space weather",This study presents the first attempt to gener...,['pix2pixHD'],...,"['Image-to-image translation using pix2pixHD',...","The study employs the pix2pixHD model, a deep ...","[""Pearson's correlation coefficient (solar min...",['Dependence on the quality and resolution of ...,4,['Significant reduction in computation time co...,['Limited by the resolution and quality of inp...,['Potential for integration with other space w...,8,"[0.99, 0.98, 0.08, 0.085]"
7,Solar farside magnetograms from deep learning ...,"Taeyoung Kim, Eunsu Park, Harim Lee, Yong-Jae ...",10.1038/s41550-019-0719-9,"['SDO/AIA 304-Å images (2011-2017, excluding S...",https://github.com/tykimos/SolarMagGAN,"['NumPy', 'Keras']",Generation of farside solar magnetograms,Solar Physics,The research presents a deep learning model ba...,['Conditional Generative Adversarial Networks ...,...,"['Image-to-image translation using cGANs', 'Tr...",The study uses a cGAN-based deep learning mode...,['Correlation Coefficient (CC) of Total Unsign...,['The model may not generalize well to odd sol...,4,['High correlation between generated and real ...,['Limited accuracy in quiet regions compared t...,['The model can be adapted for other image-to-...,8,"[0.97, 0.77, 0.067, 0.053]"
8,Three-day Forecasting of Solar Wind Speed Usin...,"Jihyeon Son, Suk-Kyung Sung, Yong-Jae Moon, Ha...",10.3847/1538-4357/ace042,"['SDO/AIA 211 and 193 Å EUV images', 'OMNIWeb ...",,"['SolarSoft library', 'TensorFlow/PyTorch (imp...",Solar wind speed prediction,"Solar wind, Space weather forecasting",This study presents a deep-learning model to f...,['Convolutional Neural Network (CNN) with Ince...,...,"['Deep learning with multimodal data fusion', ...",The study uses a hybrid deep-learning model co...,"['RMSE (6-hour prediction)', 'unit', 'RMSE (72...",['Struggles to predict solar wind speed enhanc...,4,['High accuracy in predicting solar wind speed...,['Limited ability to predict CME-induced solar...,['Potential for real-time space weather foreca...,8,"[37.4, 'km/s', 68.2, 'km/s', 0.92, 'dimensionl..."
9,Generation of High-resolution Solar Pseudo-mag...,"Gyungin Shin, Yong-Jae Moon, Eunsu Park, Hyun-...",10.3847/2041-8213/ab953d,['Ca II K 393.3 nm spectroheliograms from the ...,https://github.com/NoelShin/Ca2Mag,"['PIL', 'NumPy', 'PyTorch', 'scikit-image']",Generation of high-resolution pseudo-magnetogr...,Solar Physics,The paper presents a deep learning model based...,['pix2pixHD'],...,['Deep learning-based image-to-image translati...,"The study uses the pix2pixHD model, an advance...",['Total Unsigned Magnetic Flux CC (Full Disk)'...,['Struggles to accurately reconstruct complex ...,5,['High correlation between generated and real ...,['Difficulty in accurately reconstructing comp...,['Potential application to historical Ca II K ...,9,"[0.99, 0.74, 0.99, 0.81, 0.95, 0.24, 0.93, 0.9..."


In [18]:
class AuthorRelation(TypedDict):
    wrote: Annotated[List[str], ..., 'Papers author wrote or contributed to']

In [40]:
authors = []
for author in entities_df['author']:
    auths = author.split(',')
    auths = [auth.strip() for auth in auths]
    auths = [auth.replace('and', "").strip() if 'and' in auth else auth for auth in auths]
    authors.extend(auths)

In [74]:
entities = []

for ent in entities_df.index:
    ents = {}
    for i, col in enumerate(entities_df):
        ents[col] = df.iloc[ent, i]
    entities.append(ents)

In [78]:
system_prompt = f"""Extract the papers the given author wrote or contributed to from the dataset given.

Dataset: {entities}"""

In [79]:
prompt = ChatPromptTemplate.from_messages=([
    ('system', system_prompt),
    ('human', 'Author: {author}')
])

In [None]:
class KB():
    def __init__(self):
        self.entities = {}
        self.relations = []

        