In [2]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import r2_score, mean_squared_error
import numpy as np
import pandas as pd

### Predicts ExE, ExN, NxN interactions seperately with RandomForests for comparison with Matrix predictions 

In [3]:
ee = pd.read_csv('../extracted_data/interaction_table_ee.csv')
en = pd.read_csv('../extracted_data/interaction_table_en.csv')
nn = pd.read_csv('../extracted_data/interaction_table_nn.csv')

In [4]:
def train_and_report(data):
    
    df = data[['query_gene', 'array_gene', 'dmf']].copy()

    le_query = LabelEncoder()
    le_array = LabelEncoder()
    df['query_gene_enc'] = le_query.fit_transform(df['query_gene'])
    df['array_gene_enc'] = le_array.fit_transform(df['array_gene'])

    X = df[['query_gene_enc', 'array_gene_enc']].values
    y = df['dmf'].values

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = RandomForestRegressor()
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)

    r2 = r2_score(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))

    print(f"R² score: {r2:.3f}")
    print(f"RMSE: {rmse:.3f}")

## Essential x Essential Gene Interactions

In [16]:
train_and_report(ee)

R² score: 0.377
RMSE: 0.147


# Essential x Nonessential Gene Interactions

In [17]:
train_and_report(en)

R² score: 0.473
RMSE: 0.133


## Nonessential x Nonessential Gene Interactions

In [5]:
train_and_report(nn)

: 