# üî¨ Predi√ß√£o de Ponto de Fus√£o com Descritores Moleculares

Este notebook aplica aprendizado de m√°quina para prever ponto de fus√£o a partir de estruturas moleculares representadas em SMILES.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from rdkit import Chem
from rdkit.Chem import Descriptors

In [None]:
# Carregar dataset de exemplo
df = pd.read_csv("../data/exemplo_dataset.csv")
df

In [None]:
# Fun√ß√£o para gerar descritores
def gerar_descritores(smiles_list):
    descritores = []
    for smi in smiles_list:
        mol = Chem.MolFromSmiles(smi)
        if mol:
            desc = {
                'MolWt': Descriptors.MolWt(mol),
                'MolLogP': Descriptors.MolLogP(mol),
                'NumRotatableBonds': Descriptors.NumRotatableBonds(mol),
                'TPSA': Descriptors.TPSA(mol),
                'NumHDonors': Descriptors.NumHDonors(mol),
                'NumHAcceptors': Descriptors.NumHAcceptors(mol)
            }
            descritores.append(desc)
        else:
            descritores.append(None)
    return pd.DataFrame([d for d in descritores if d is not None])

X = gerar_descritores(df['SMILES'])
y = df.loc[X.index, 'Ponto_Fusao']

## üîç An√°lise Explorat√≥ria

In [None]:
sns.pairplot(pd.concat([X, y], axis=1))
plt.suptitle("Distribui√ß√£o dos Descritores e Ponto de Fus√£o", y=1.02)
plt.show()

## ü§ñ Treinamento do Modelo

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
modelo = RandomForestRegressor(n_estimators=100, random_state=42)
modelo.fit(X_train, y_train)
y_pred = modelo.predict(X_test)

In [None]:
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)
print(f"RMSE: {rmse:.2f}")
print(f"R¬≤: {r2:.2f}")

## üìä Visualiza√ß√£o dos Resultados

In [None]:
plt.figure(figsize=(6,6))
plt.scatter(y_test, y_pred, color='blue')
plt.plot([y.min(), y.max()], [y.min(), y.max()], '--r')
plt.xlabel("Valor Real")
plt.ylabel("Valor Predito")
plt.title("Predi√ß√£o de Ponto de Fus√£o")
plt.grid(True)
plt.show()