# Create synthetic well data from gempy model

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
import numpy as np
import pandas as pd
import random

from sklearn.datasets.samples_generator import make_blobs

### Safe GemPy model

In [2]:
# extract gempy_model with coordinates and safe as file.csv
gempy_model = {'layer': np.flipud(clf.lith_block[0]).astype(int), 'X': np.flipud(clf.geo_data.grid.values[:,0]), 
               'Y': np.flipud(clf.geo_data.grid.values[:,1]),'Z': np.flipud(clf.geo_data.grid.values[:,2])}
lith_block = pd.DataFrame(gempy_model)

lith_block.to_csv('../data/gempy_model_3layers_tilted.csv', index= False)

NameError: name 'clf' is not defined

### Create random synthetic well logs from GemPy model

In [3]:
def create_syn_well_data(well_model, n_boreholes ,n_features, form_std):
    
    # well_model.layer = np.asarray(well_model.layer) - min(well_model.layer)
    gp_res = well_model.gp_res.loc[0]
    len_boreholes = int(gp_res - len(np.where(well_model.layer < 0)[0])/(gp_res**2))
    well_model = well_model.drop('gp_res',axis=1)
    layer = list(set(well_model.layer))
    n_layers = len(layer)-1
    layer = layer[:n_layers]
    
    # drop basement values
    ind = []
    for i in range(len(layer)):
        ind.append(list(np.where(well_model.layer == layer[i])[0]))
    ind_flat = [item for sublist in ind for item in sublist]
    well_model = well_model.loc[ind_flat].sort_index()
    well_model = well_model.reset_index(drop=True)
    
    # extract random wells from the Gempy model
    BH_total = pd.DataFrame()
    for i in range(0,n_boreholes):
        a = int(len_boreholes* random.randint(0, int(len(well_model)/len_boreholes)-1))
        BH = well_model.loc[a : a + len_boreholes-1]
        BH['Well Name'] = 'BH{0}'.format(i+1) 
        BH_total = pd.concat([BH,BH_total]) 
    BH_total = BH_total.reset_index(drop=True)
    
    length = []
    for i in range(n_layers):
        length.append(len(np.where(BH_total.layer == layer[i])[0]))
        
    X, y = make_blobs(n_samples = max(length) * n_layers, n_features = n_features, 
                  centers = n_layers,
                      random_state=42, cluster_std=form_std)
    
    for j in range(n_features):
        for i in range(n_layers):
            BH_total.loc[np.where(BH_total.layer == layer[i])[0],'feature{0}'.format(j+1)] = (
                    X[np.where(y ==layer[i])[0],j][:len(np.where(BH_total.layer == layer[i])[0])] )   

    BH_total = np.round(BH_total,2)
    return BH_total, well_model

In [4]:
# load GemPy model from file.csv
gempy_model = pd.read_csv('../data/gempy_model_4layers_90deg.csv')

# create random synthetic well logs
well_data2, well_model2 = create_syn_well_data(gempy_model, n_boreholes = 5, 
                                            n_features = 4, form_std = 6)

# safe well logs as file.csv
well_data2.to_csv('../data/test.csv', index = False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
