In [1]:
%%capture 
!pip install h2o

In [2]:
import pandas as pd
import matplotlib as plt
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import os

# 07_1_LEARN-DataAnalysisML

<a target="_blank" href="https://colab.research.google.com/github/hiyama341/ConStrain/blob/main/colab_notebooks/07_1_LEARN_DataAnalysis.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

We can import the data repositoru like this: 

In [3]:
# cloning the data repo
!git clone https://github.com/hiyama341/ConStrain_on_google_colab.git

fatal: destination path 'ConStrain_on_google_colab' already exists and is not an empty directory.


## 0. Intro
In this study, we aim to use machine learning techniques to predict the best promoter-gene combinations. Machine learning is a powerful tool that allows us to analyze large and complex datasets, identify patterns and make predictions. We will use various machine learning algorithms through the package H2O. 

The machine learning models will be trained on the data from the experiments from[06_1_TEST_LibraryCharacterisation](../colab_notebooks/06_1_TEST_LibraryCharacterisation.ipynb), and will learn to predict the best promoter-gene combination based on the observed phenotype and genotype. This will enable us to identify the combination of genes and promoters that result in the highest level of expression or activity, without the need for additional experimentation.

Ultimately, the use of machine learning to predict the best promoter-gene combination will greatly improve the efficiency and allowing us to identify the best combination in a shorter time and with fewer resources.



In this notebook we continue the workflow by using Machine Learning to predict the best promoter:gene combinations of the remaining library that was generated in [05_1_BUILD_CombinatorialLibrary_AllStrain](../colab_notebooks/05_1_BUILD_CombinatorialLibrary_AllStrains.ipynb) and analyzed in [06_1_TEST_LibraryCharacterisation](../colab_notebooks/06_1_TEST_LibraryCharacterisation.ipynb). 

## Project overview - Use ML to predicte best promoter:gene combinations 

**Hypothesis**
1. Specific combinations of CPR / G8H homologs and corresponding expression levels can remove the G8H bottleneck in the Strictosidine pathway

Aim: To test the hypothesis

Tasks
1. dgRNA
2. Base strain
3. Library
4. Phenotyping
**5. Machine Learning**
   - Predict the best promoter:gene combinations


## 1. Setting up the AutoML library

In [4]:
import h2o
from h2o.automl import H2OAutoML

In [5]:
# If this doesnt work - install java. 
# Start the H2O cluster (locally)
h2o.init(ip="localhost", min_mem_size_GB=8)

Checking whether there is an H2O instance running at http://localhost:54321 ..... not found.
Attempting to start a local H2O server...
  Java Version: java version "1.8.0_361"; Java(TM) SE Runtime Environment (build 1.8.0_361-b09); Java HotSpot(TM) 64-Bit Server VM (build 25.361-b09, mixed mode)
  Starting server from /Users/lucaslevassor/opt/anaconda3/envs/constrain/lib/python3.8/site-packages/h2o/backend/bin/h2o.jar
  Ice root: /var/folders/2f/lw3sfzbs7l7f_q1knzbtbwrr0000gp/T/tmpknkv2kwn
  JVM stdout: /var/folders/2f/lw3sfzbs7l7f_q1knzbtbwrr0000gp/T/tmpknkv2kwn/h2o_lucaslevassor_started_from_python.out
  JVM stderr: /var/folders/2f/lw3sfzbs7l7f_q1knzbtbwrr0000gp/T/tmpknkv2kwn/h2o_lucaslevassor_started_from_python.err
  Server is running at http://127.0.0.1:54321
Connecting to H2O server at http://127.0.0.1:54321 ... successful.


0,1
H2O_cluster_uptime:,21 secs
H2O_cluster_timezone:,Europe/Copenhagen
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.36.1.3
H2O_cluster_version_age:,6 months and 18 days !!!
H2O_cluster_name:,H2O_from_python_lucaslevassor_iv10f1
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,7.667 Gb
H2O_cluster_total_cores:,0
H2O_cluster_allowed_cores:,0


In [6]:
#h2o.shutdown()

### 1.1 Import out dataframe to h20 object

Train a model based on the target and the df

In [7]:
# Run ML with new input 
new_input_for_ml = pd.read_csv('ConStrain_on_google_colab/data/09-AutoML/input_to_ml/first_round/input_for_ml_1st_iteration_all_analytics.csv')
new_input_for_ml

Unnamed: 0,Line_name,0,1,2,3,Amt_norm
0,yp49_A01,1,2,5,1,0.922793
1,yp49_C01,1,2,5,9,0.509123
2,yp49_D01,2,4,7,3,0.166871
3,yp49_E01,2,1,5,7,0.327489
4,yp49_F01,3,3,6,1,25.060934
...,...,...,...,...,...,...
162,yp51_C12,8,3,7,2,0.000000
163,yp51_D12,8,2,7,6,0.591185
164,yp51_E12,8,1,6,10,0.448644
165,yp50_F05,6,1,8,2,13.391244


In [8]:
new_input_for_ml['default_rank'] = new_input_for_ml['Amt_norm'].rank()
new_input_for_ml['max_rank'] = new_input_for_ml['Amt_norm'].rank(method='max')
new_input_for_ml['NA_bottom'] = new_input_for_ml['Amt_norm'].rank(na_option='bottom')
new_input_for_ml['pct_rank'] = new_input_for_ml['Amt_norm'].rank(pct=True)
new_input_for_ml = new_input_for_ml.sort_values(by= 'max_rank', ascending = False)

In [9]:
new_input_for_ml

Unnamed: 0,Line_name,0,1,2,3,Amt_norm,default_rank,max_rank,NA_bottom,pct_rank
73,yp50_D03,5,2,6,3,245.034275,167.0,167.0,167.0,1.000000
75,yp50_G03,6,2,6,5,156.327285,166.0,166.0,166.0,0.994012
86,yp50_E05,5,1,6,8,144.340857,165.0,165.0,165.0,0.988024
126,yp51_A02,7,2,6,1,135.693498,164.0,164.0,164.0,0.982036
106,yp50_A09,4,2,6,3,134.321746,163.0,163.0,163.0,0.976048
...,...,...,...,...,...,...,...,...,...,...
60,yp50_E01,5,3,5,7,0.000000,30.0,59.0,30.0,0.179641
114,yp50_D11,5,4,8,6,0.000000,30.0,59.0,30.0,0.179641
24,yp49_E05,2,3,7,8,0.000000,30.0,59.0,30.0,0.179641
27,yp49_H05,3,3,7,10,0.000000,30.0,59.0,30.0,0.179641


In [10]:
# Choosing which coloums to train on 
new_input_for_ml = new_input_for_ml[['Line_name','0','1','2','3','Amt_norm']]
new_input_for_ml.columns = ['Line_name','0','1','2','3','Amt_norm']

In [11]:
df_test = h2o.H2OFrame(pd.concat([new_input_for_ml], axis='columns'))
df_test.describe()

Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
Rows:167
Cols:6




Unnamed: 0,Line_name,0,1,2,3,Amt_norm
type,string,int,int,int,int,real
mins,,1.0,1.0,5.0,1.0,0.0
mean,,4.580838323353294,2.682634730538923,6.562874251497007,5.305389221556885,15.546391286319041
maxs,,8.0,4.0,8.0,10.0,245.03427469317091
sigma,,2.253055064641254,1.1621014428030096,1.1696200092422187,2.836323292129,36.58610063884372
zeros,0,0,0,0,0,59
missing,0,0,0,0,0,0
0,yp50_D03,5.0,2.0,6.0,3.0,245.03427469317091
1,yp50_G03,6.0,2.0,6.0,5.0,156.3272849244192
2,yp50_E05,5.0,1.0,6.0,8.0,144.34085698560642


In [12]:
# Since the data is categorical we need to make it from numerical to categorical 
df_test['0']= df_test['0'].asfactor()
df_test['1']= df_test['1'].asfactor()
df_test['2'] = df_test['2'].asfactor()
df_test['3'] = df_test['3'].asfactor()

In [13]:
df_test.describe()

Rows:167
Cols:6




Unnamed: 0,Line_name,0,1,2,3,Amt_norm
type,string,enum,enum,enum,enum,real
mins,,,,,,0.0
mean,,,,,,15.546391286319041
maxs,,,,,,245.03427469317091
sigma,,,,,,36.58610063884372
zeros,0,,,,,59
missing,0,0,0,0,0,0
0,yp50_D03,5,2,6,3,245.03427469317091
1,yp50_G03,6,2,6,5,156.3272849244192
2,yp50_E05,5,1,6,8,144.34085698560642


Set some arguments for the autoML. 


Important here is that we dont split the dataset but rather keep the cross-validation validate a model internally, i.e., estimate the model performance without having to sacrifice a validation split. See: https://docs.h2o.ai/h2o/latest-stable/h2o-docs/cross-validation.html


## NOTE: IF you don't want to run the autoML skip the next few paragraphs and go to 4

In [14]:
asdfasdfa

NameError: name 'asdfasdfa' is not defined

## 2. RUN automl

Here we are defining the autoML object and after we can train it

In [None]:
# Select the columns we want to train on
feature_cols = ['0', '1', '2', '3']

# Initialize H2O autoML class
AutoML = H2OAutoML(
    max_runtime_secs=0,  # 1 hour =int(3600 * 1) , if unlimited time is wanted then set this to zero = 0
    max_models=None,  # None =  no limit
    nfolds=10,         # number of folds for k-fold cross-validation (nfolds=0 disables cross-validation)
    seed=1,            # Reproducibility
#    exclude_algos = ["StackedEnsemble"],
    sort_metric = "MAE",
    keep_cross_validation_predictions=True 
)

Train a model based on the target and the df. 


It has been Commented out and saved as showed below

In [None]:
%%time

AutoML.train(
     x=feature_cols,
     y='Amt_norm',
     training_frame=df_test,
 ) 

In [None]:
print('DOOOOOOONE')

## 3. Processing model


In [None]:
## Saving the leaderboard ids
leaderboard = AutoML.leaderboard
model_ids = list(leaderboard['model_id'].as_data_frame().iloc[:,0])


### 3.1 Saving the best model

In [None]:
# how to save any model
out_path = 'ConStrain_on_google_colab/data/09-AutoML/best_models/first_round/'
mdl = h2o.get_model(model_ids[0])
h2o.save_model(model=mdl, path=out_path, force=True)


### 3.2 Saving the leaderboard

In [None]:
# saving the leaderboard
out_path = 'ConStrain_on_google_colab/data/09-AutoML/leaderboards/first_round/'
h2o.export_file(leaderboard, os.path.join(out_path, 'aml_leaderboard.h2o'), force=True)

In [None]:
# saving the models
#leaderboard = AutoML.leaderboard
#lb= leaderboard
#model_ids = list(leaderboard['model_id'].as_data_frame().iloc[:,0])
#
#out_path = 'ConStrain_on_google_colab/data/09-AutoML/leaderboards/first_round/'
#for m_id in model_ids:
#     mdl = h2o.get_model(m_id)
#     h2o.save_model(model=mdl, path=out_path, force=True)

In [None]:
len(model_ids)

Now we want to extract the best models

In [None]:
df_from_h2o_object = leaderboard.as_data_frame(use_pandas=True, header=True)
df_from_h2o_object

In [None]:
best_model = AutoML.get_best_model()
best_model

### 3.4 saving the CV-holdout predictions 


In [None]:
# Get the predictions
cross_validation_holdout_predictions = best_model.cross_validation_holdout_predictions()

# make the df_test to a dataframe
as_data_frame_df_test = df_test.as_data_frame()

# Make cv_pred to a dataframe
as_data_frame_CV_predictions = cross_validation_holdout_predictions.as_data_frame()
as_data_frame_CV_predictions.columns = ['cv_holdout_predictions']
as_data_frame_CV_predictions

In [None]:
# merge with the test dataframe
df3 = pd.merge(as_data_frame_df_test, as_data_frame_CV_predictions, left_index=True, right_index=True)
df3 = df3.sort_values(by= 'Amt_norm', ascending= False)
df3 = df3.reset_index()
df3['ranked_pct'] = df3['Amt_norm'].rank(pct= True)
df3['cv_holdout_predictions_ranked_pct'] = df3['cv_holdout_predictions'].rank(pct= True)
df3

In [None]:
# save it to the folder
df3.to_csv('ConStrain_on_google_colab/data/09-AutoML/cv_holdout_predictions/first_round/cv_holdout_predictions.csv')

## 4. Importing the best model

### 4.1 Importing the leaderboard

In [None]:
models_path = "ConStrain_on_google_colab/data/09-AutoML/leaderboards/first_round/"

lb = h2o.import_file(path=os.path.join(models_path, "aml_leaderboard.h2o"))
lb

In [None]:
best_model = h2o.load_model("ConStrain_on_google_colab/data/09-AutoML/best_models/first_round/"+lb[0,0]) 
my_local_model = h2o.download_model(best_model, path="ConStrain_on_google_colab/data/09-AutoML/best_models/first_round/")
uploaded_model = h2o.upload_model(my_local_model)

In [None]:
best_model

### 4.1 Investigate the best model - Cross-validation holdout predictions

Get the cross validation predictions

In [None]:
df3 = pd.read_csv('ConStrain_on_google_colab/data/09-AutoML/cv_holdout_predictions/second_round/cv_holdout_predictions.csv')

In [None]:
#cross_validation_holdout_predictions = best_model.predict(df_test)

In [None]:
# This is where list of cv preds are stored (one element per fold):
#cross_validation_holdout_predictions = best_model.cross_validation_holdout_predictions()

In [None]:
#cross_validation_holdout_predictions.frame_id

In [None]:
#cv_fram = cross_validation_holdout_predictions.as_data_frame()
#cv_fram

In [None]:
# However you most likely want a single-column frame including all cv preds
#cross_validation_predictions = best_model.cross_validation_predictions()
#print(len(cross_validation_predictions))

In [None]:
#as_data_frame_df_test = df_test.as_data_frame()
#as_data_frame_CV_predictions = cross_validation_holdout_predictions.as_data_frame()

In [None]:
df3 = pd.merge(as_data_frame_df_test, as_data_frame_CV_predictions, left_index=True, right_index=True).sort_values(by= 'Amt_norm', ascending = False).reset_index()
df3.head(10)

#### Barplot of production vs prediction 

In [None]:
from constrain.plotting.plotting import bar_plot_w_hue

In [None]:
%%capture
# initialize
ds1 = df3[['Line_name', 'Amt_norm']]
ds1.columns = ['Line_name', 'strict']
ds2 = df3[['Line_name', 'cv_holdout_predictions']]
ds2.columns = ['Line_name', 'strict']

# add category
ds2['category'] = 'Predicted'
ds1['category'] = 'Strictosidine'
dss = pd.concat([ds1, ds2])

In [None]:
bar_plot_w_hue

In [None]:
bar_plot_w_hue(dss, 'Line_name', 'strict', 
               path = 'ConStrain_on_google_colab/data/10-plots/07_1_LEARN_DataAnalysis/Prediction of the sampled library_ranked',
               palette = 'dark',
              size_height= 10, 
              size_length = 20)

#### Ranking production and correlation plots 

In [None]:
df3['norm_strict_measured_rank_pct']= df3['Amt_norm'].rank(pct=True)
df3['Predicted_strict_production_rank_pct'] = df3['cv_holdout_predictions'].rank(pct=True)
df3.to_csv('ConStrain_on_google_colab/data/09-AutoML/all_predictions/first_round/input_for_ml_1st_iteration_w_predictions_and_ranking_2701.csv')
df3

In [None]:
from constrain.plotting.plotting import correlation_plot

In [None]:
correlation_plot(df3,"Amt_norm","cv_holdout_predictions", save_pdf = True , 
                 path ='ConStrain_on_google_colab/data/10-plots/07_1_LEARN_DataAnalysis/corr_plot_Amt_predict')

In [None]:
correlation_plot(df3,"norm_strict_measured_rank_pct","Predicted_strict_production_rank_pct", save_pdf = True , 
                 path ='ConStrain_on_google_colab/data/10-plots/07_1_LEARN_DataAnalysis/corr_plot_rank_rank_predict')

## 5. Clean up data on the remaining library of combinations of genotypes

In [None]:
import numpy as np

In [None]:
all_genotypes = pd.read_csv('ConStrain_on_google_colab/data/03-strain_sequences/systematic_names_of_all_strains/systematic_names_on_all_combinations.csv')
all_genotypes_df = h2o.H2OFrame(pd.concat([all_genotypes], axis='columns'))
all_genotypes_df

In [None]:
input_fixed = new_input_for_ml[['0','1','2','3']]
input_fixed

Getting rid of the genotypes we have already tested:

In [None]:
df_diff = all_genotypes[~all_genotypes.apply(tuple,1).isin(input_fixed.apply(tuple,1))]
df_diff

In [None]:
# make the diff into a df 
all_genotypes_df = h2o.H2OFrame(pd.concat([df_diff], axis='columns'))
all_genotypes_df['0']= all_genotypes_df['0'].asfactor()
all_genotypes_df['1']= all_genotypes_df['1'].asfactor()
all_genotypes_df['2'] = all_genotypes_df['2'].asfactor()
all_genotypes_df['3'] = all_genotypes_df['3'].asfactor()

## 6. Predict the  phenotypes from the whole library

In [None]:
predicted = best_model.predict(all_genotypes_df)

In [None]:
len(predicted)

In [None]:
predicted_norm = predicted.as_data_frame()
predicted_norm.head()

In [None]:
# making predictions into a list
predicted_list = predicted_norm['predict'].values.tolist()

# Adding predictions to our dataframe
df_diff['predicted_norm_strict'] = predicted_list

# Sorting the dataframe
predicted_merged_sorted = df_diff.sort_values('predicted_norm_strict', ascending=False)
predicted_merged_sorted

If we want the names on the genotypes we can do the following: 

In [None]:
g8h_list_of_index = ["Cacu", "Opum","Cro","Vmin","Smus","Rsep","Oeu","Ccal"]
cpr_list_of_index = ["Cro", "Aan","Ara","Clo","Rse","Ahu","Ani","Cac","Oeu","Cpo"]
promoters = ["CYC1", "ENO2","PCK1","RPL15B", "CCW12", "TPI1","MLS1","URE2"]

g8h_genotype = []
cpr_genotype = []

pg8h_genotype = []
pcpr_genotype = []

predicted_list = []
# adding index of genotypes to individual rows 
for index, row in predicted_merged_sorted.iterrows():
    g8h_genotype.append(g8h_list_of_index[int(row['0'])-1])
    cpr_genotype.append(cpr_list_of_index[int(row['3'])-1])
    pg8h_genotype.append(promoters[int(row['1'])-1])
    pcpr_genotype.append(promoters[int(row['2'])-1])
    predicted_list.append(row['predicted_norm_strict'])
    
    
list_of_lists = [g8h_genotype,pg8h_genotype,pcpr_genotype, cpr_genotype, predicted_list  ]

In [None]:
df_with_genotypes = pd.DataFrame(list_of_lists )
df_with_genotypes = df_with_genotypes.T
df_with_genotypes.columns = ['G8H', 'pG8H', 'pCPR', 'CPR', 'Predicted_strict_production']
df_with_genotypes.head(10)

In [None]:
from constrain.plotting.plotting import bar_plot

#### 6.1 Plot of predicted strictosidine production across all promoter:homolog combinations

In [None]:
x = list(df_with_genotypes.index)
y = list(df_with_genotypes['Predicted_strict_production'])

bar_plot(x, y, path = 'ConStrain_on_google_colab/data/10-plots/07_1_LEARN_DataAnalysis/barplot_predicting_remaining_best_combinations_2701')

In [None]:
df_with_genotypes

Lets save the dataframes into csv files

In [None]:
# getting a unique name
from datetime import datetime
now = datetime.now() # current date and time
time = now.strftime("%Y_%m_%d_%H:%M_")

name = 'top_ML_predicted_after_first_DBTL_merged_analytics'
path = 'ConStrain_on_google_colab/data/09-AutoML/all_predictions/first_round/'

df_with_genotypes.to_csv(path+time+name+'.csv', index=False)

## 7. Top 20 predicted producers of the unseen library

Lets look at the file generated first from one of the first models. 

In [None]:
df_with_genotypes = pd.read_csv('ConStrain_on_google_colab/data/09-AutoML/all_predictions/first_round/top_ML_predicted_after_first_DBTL_merged_analytics.csv')

In [None]:
df_with_genotypes

Adding unique names 

In [None]:
df_with_genotypes['names'] = df_with_genotypes['G8H']+'-' +df_with_genotypes['pG8H']+ '_'+ df_with_genotypes['pCPR']+'_'+df_with_genotypes['CPR']

In [None]:
from constrain.plotting.plotting import horisontal_bar_plot

In [None]:
# Plotting top 20 strains
x_axis = list(df_with_genotypes['names'])[:20][::-1]
y_axis = list(df_with_genotypes['Predicted_strict_production'])[:20][::-1]

horisontal_bar_plot(x_axis,y_axis, path = 'ConStrain_on_google_colab/data/10-plots/07_1_LEARN_DataAnalysis/Top20 predicted strains DBTL1', 
                    title = 'Top20 predicted strains DBTL1', 
                   size_height=5, 
                   size_length=10)

### 7.1 How many genotypes are predicted to produce more than the control strain?

In [None]:
over_100_strict_procduction = df_with_genotypes[df_with_genotypes['Predicted_strict_production'] >= 100 ]
print(f"{len(over_100_strict_procduction)} strains out of {len(df_with_genotypes)} showed production of strictosidine over the normalized value")
print(f"Out of the remaing theoretical space these constittue : {(len(over_100_strict_procduction)/len(df_with_genotypes))*100}")

## 8. Learning curve on partitioned data

In [None]:
from constrain.machine_learning.auto_ml import autoML_on_partitioned_data

In [None]:
shuffled_new_input_for_ml = new_input_for_ml.sample(frac=1, random_state= 2).reset_index(drop=True) # Random state sets a seed on the shuffeling
shuffled_new_input_for_ml

Already did this. It takes +20 hours on my laptop.

In [None]:
#feature_cols = ['0', '1', '2', '3']
#training_column = 'Amt_norm'
#
## Training 3 partitions 5 times - the function is set to nfold = 10  by default. 
#for i in range(0,3): 
#    autoML_on_partitioned_data(feature_cols, training_column,
#                               new_input_for_ml,
#                               training_time=0, 
#                               partitions = 3,
#                               nfold= 10,
#                               path = 'ConStrain_on_google_colab/data/09-AutoML/learning_curve_data/first_round/0sec_experiment/NOT_shuffled_60sec/')

### 8.1 - visualizing learning curve

In [None]:
import os

# folder path
dir_path = 'ConStrain_on_google_colab/data/09-AutoML/learning_curve_data/first_round/nfold_10/0sec_experiment/shuffled_random2/'

# list file and directories
res = os.listdir(dir_path)
res

In [None]:
lst_of_pd_dataframes = []
for ml_partitioned in res:
    if ml_partitioned.endswith('.csv'):
        lst_of_pd_dataframes.append(pd.read_csv(dir_path+ml_partitioned))

Lets retrieve the data 

In [None]:
dfs = [df.set_index('Unnamed: 0') for df in lst_of_pd_dataframes]
concated = pd.concat(dfs, axis=1)
concated

In [None]:
# test results
test_results = concated[['0']]
test_results['mean'] = test_results.mean(axis=1)
test_results['std'] = test_results.std(axis=1)
test_results

In [None]:
# Cross-validated mean MAE
cross_validated_results = concated[['CV_mean_MAE']]
cross_validated_results['mean'] = cross_validated_results.mean(axis=1)
cross_validated_results

In [None]:
# Cross-validated mean SD
cross_validated_results_sd = concated[['CV_SD_MAE']]
cross_validated_results_sd['mean'] = cross_validated_results_sd.mean(axis=1)
cross_validated_results_sd

In [None]:
from constrain.plotting.plotting import plot_ml_learning_curve

In [None]:
# CV
x = list(cross_validated_results.index)
y = list(cross_validated_results['mean'])
sd =  list(cross_validated_results_sd['mean'])

# model_performance
y1 = list(test_results['mean'])
sd1 = np.array(list(test_results['std']))


plot_ml_learning_curve(x, y1, y,sd1,  sd, save_pdf = True ,
                       path = 'ConStrain_on_google_colab/data/10-plots/07_1_LEARN_DataAnalysis/Learning_curve_on_partitioned_data_DBTL1_12_01_2023_10_height_10_length', 
                            size_height = 10,
                             size_length = 10)