In [1]:
import pandas as pd
import plotly.plotly as pyl
import plotly.graph_objs as go
import matplotlib.pyplot as plt

%matplotlib inline 

from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

In [2]:
df_logs = pd.read_parquet("../data/logs.parquet")
df_loc = pd.read_parquet("../data/loc.parquet")
df_tops = pd.read_parquet("../data/tops.parquet")

In [3]:
df_logs

Unnamed: 0,wellName,DEPTH,GR
1,512320698.0,421.0,96.9752
2,512320698.0,421.5,102.3324
3,512320698.0,422.0,104.6432
4,512320698.0,422.5,99.2187
5,512320698.0,423.0,92.0200
...,...,...,...
9650828,512336327.0,7187.5,780.6628
9650829,512336327.0,7188.0,798.1260
9650830,512336327.0,7188.5,808.1805
9650831,512336327.0,7189.0,810.6509


In [4]:
df_loc

Unnamed: 0_level_0,Latitude,Longitude
wellName,Unnamed: 1_level_1,Unnamed: 2_level_1
512320698.0,48.673353,2.145178
512320699.0,48.673353,2.150808
512320928.0,48.698713,2.140728
512320970.0,48.713273,2.150148
512320971.0,48.709603,2.145308
...,...,...
512334042.0,48.655583,2.221548
512334043.0,48.655623,2.221548
512335135.0,48.651423,2.216608
512335370.0,48.836473,2.121858


In [5]:
df_tops

Unnamed: 0_level_0,MARCEL,CONRAD,SYLVAIN
wellName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
512320698.0,6398.0,6661.0,6638.0
512320699.0,6407.0,6676.0,6656.0
512320928.0,6520.0,6793.0,6746.0
512320970.0,6544.0,6816.0,6770.0
512320971.0,6510.0,6789.0,6742.0
...,...,...,...
512334042.0,6264.0,6524.0,6500.0
512334043.0,6259.0,6526.0,6502.0
512335135.0,6684.0,6938.0,6914.0
512335370.0,6619.0,7086.0,


In [6]:
##################################
# CLEANING
##################################
df_logs = df_logs[df_logs['GR'] >= 0]
well_data = df_loc.merge(df_tops, how='inner', left_index=True, right_index=True)
well_data

Unnamed: 0_level_0,Latitude,Longitude,MARCEL,CONRAD,SYLVAIN
wellName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
512320698.0,48.673353,2.145178,6398.0,6661.0,6638.0
512320699.0,48.673353,2.150808,6407.0,6676.0,6656.0
512320928.0,48.698713,2.140728,6520.0,6793.0,6746.0
512320970.0,48.713273,2.150148,6544.0,6816.0,6770.0
512320971.0,48.709603,2.145308,6510.0,6789.0,6742.0
...,...,...,...,...,...
512334042.0,48.655583,2.221548,6264.0,6524.0,6500.0
512334043.0,48.655623,2.221548,6259.0,6526.0,6502.0
512335135.0,48.651423,2.216608,6684.0,6938.0,6914.0
512335370.0,48.836473,2.121858,6619.0,7086.0,


In [7]:
top = "SYLVAIN"

df_loc[top] = df_tops[top]
data = [ dict(
        type = 'scattergeo',
        lon = df_loc['Longitude'],
        lat = df_loc['Latitude'],
        text = df_loc[top],
        mode = 'markers',
        marker = dict(
            color = df_loc[top],
        )
        )]

layout = dict(
        title = '{0} Depth information<br>(Hover for depht info)'.format(top),
        colorbar = True,
        geo = dict(
            scope='europe',
            projection=dict( type='natural earth',scale=100 ),
            center = dict( lon=df_loc["Longitude"].mean(), lat = df_loc["Latitude"].mean()),
            showland = True,
            landcolor = "rgb(250, 250, 250)",
            subunitcolor = "rgb(217, 217, 217)",
            countrycolor = "rgb(217, 217, 217)",
            countrywidth = 1.5,
            subunitwidth = 0.5
        ),
    )

fig = dict( data=data, layout=layout )
iplot( fig, validate=False, filename='niobrara' )

In [8]:
top = "SYLVAIN"

wellist = df_tops.index.tolist()

layout = go.Layout(
    height=120,
    margin=go.layout.Margin(
        l=50,
        r=50,
        b=20,
        t=0,
        pad=4
    )
)

def plot_pred(wellname,top_name,wsize):
    # well name(id) is the index of df_logs
    # retrieve all logs belonging to this well
    df_temp = df_logs[df_logs["wellName"]==wellname]

    # top depth
    true_top = df_tops.loc[wellname][top_name]
    if true_top > 0:
        # the gamma log corresponding to top depth
        ctr = df_temp[df_temp["DEPTH"]==true_top].index[0]
   
        # the range that could to be visualized
        # limit the range to be valid
        true_log = df_temp.loc[max(df_temp.index[0],ctr-wsize) : min(df_temp.index[-1], ctr+wsize)]

        #A top is classified as found if it falls within +- 5feet or +-10 samples around the true top
        bar = go.Bar( x = [true_top],y = [ df_temp["GR"].max()  ],name = top, width = 10)   
        
        data = [go.Scatter(x=true_log.DEPTH,y=true_log.GR),bar]
  
        fig = go.Figure(data=data, layout=layout)
        iplot(fig)
    
for well in wellist[0:10]:  
    plot_pred(well,top,1000)

In [10]:
# 3D visualization: scatter
from ipaddress import collapse_addresses
import numpy as np
import math
import utm

def get_utm_coord_in_feet(df_loc):
    x = list()
    y = list()
    for idx, row in df_loc.iterrows():
        easting, northing, _, _ = utm.from_latlon(row['Latitude'], row['Longitude'], 31, 'U')
        # convert meter to feet
        x.append(easting * 3.28084)
        y.append(northing * 3.28084)
    x = [x_ - min(x) for x_ in x]
    y = [y_ - min(y) for y_ in y]
    return x, y

data = []
x, y = get_utm_coord_in_feet(df_loc)
for top in ['MARCEL', 'CONRAD', 'SYLVAIN']:
    df_loc[top] = df_tops[top]
    data.append(go.Scatter3d(
        x=x, 
        y=y, 
        z=df_loc[top] * -1, 
        mode='markers', 
        name=top, 
        showlegend=True, 
        marker=dict(
            size=2,
            color=df_loc[top].mean(),                # set color to an array/list of desired values
            colorscale='Viridis',   # choose a colorscale
            # opacity=0.8
        )
    ))
    
fig = go.Figure(data=data)
fig.update_yaxes(
    scaleanchor = "x",
    scaleratio = 1,
)
fig.show()

In [None]:
# # 3D visualization: surface
# from ipaddress import collapse_addresses
# import numpy as np
# import math
# import utm

# top = "SYLVAIN"
# df_loc[top] = df_tops[top]

# def get_utm_coord_in_feet(df_loc):
#     x = list()
#     y = list()
#     for idx, row in df_loc.iterrows():
#         easting, northing, _, _ = utm.from_latlon(row['Latitude'], row['Longitude'], 31, 'U')
#         # convert meter to feet
#         x.append(easting * 3.28084)
#         y.append(northing * 3.28084)
#     x = [x_ - min(x) for x_ in x]
#     y = [y_ - min(y) for y_ in y]
#     return x, y


# # step1: transform lon/lan to normal x/y corrdinates
# # the maximun distance between two wells is around 45 km
# # Latitude/Longitude to UTM coordinates
# # x = list()
# # y = list()
# # for idx, row in df_loc.iterrows():
# #     easting, northing, _, _ = utm.from_latlon(row['Latitude'], row['Longitude'], 31, 'U')
# #     # convert meter to feet
# #     x.append(easting * 3.28084)
# #     y.append(northing * 3.28084)
# # print(x[0])
# # print(y[0])
# # using equirectangular projection (https://stackoverflow.com/questions/16266809/convert-from-latitude-longitude-to-x-y)
# # R_earth_feet = 20925721.784777
# # center_lat = df_loc['Latitude'].mean()
# # x = df_loc['Longitude'] * R_earth_feet * math.cos(math.radians(center_lat))
# # y = df_loc['Latitude'] * R_earth_feet

# # color_dict = 
# data = []
# x, y = get_utm_coord_in_feet(df_loc)
# for top in ['MARCEL', 'CONRAD', 'SYLVAIN']:
#     df_loc[top] = df_tops[top]
#     data.append(go.Scatter3d(
#         x=x, 
#         y=y, 
#         z=df_loc[top] * -1, 
#         mode='lines', 
#         name=top, 
#         showlegend=True, 
#         marker=dict(
#             size=2,
#             color=df_loc[top].mean(),                # set color to an array/list of desired values
#             colorscale='Viridis',   # choose a colorscale
#             # opacity=0.8
#         )
#     ))
# # step2: interpolation missing data
# # z = df_loc[top]

# # step3: plot
# fig = go.Figure(data=data)

# fig.show()

In [202]:
print(df_loc.notnull().sum())

Latitude     600
Longitude    600
SYLVAIN      538
MARCEL       599
CONRAD       596
dtype: int64


In [203]:
print(len(df_loc[df_loc['MARCEL'] > df_loc['CONRAD']]))
print(len(df_loc[df_loc['MARCEL'] > df_loc['SYLVAIN']]))
print(len(df_loc[df_loc['SYLVAIN'] > df_loc['CONRAD']]))

print(df_loc[df_loc['SYLVAIN'] > df_loc['CONRAD']])

# Deeper (consider only not-null values): CONRAD > SYLVAIN > MARCEL
# only 2 exception cases out of 600.

0
0
2
              Latitude  Longitude  SYLVAIN  MARCEL  CONRAD
wellName                                                  
512325378.0  48.728121   2.178195   6794.0  6509.0  6792.0
512330897.0  48.713464   2.207007   6617.0  6351.0  6367.0


In [9]:
from hacktops.evaluate import recall_tops

recall, mae, df_res = recall_tops(df_tops,df_tops,tolerance = 10)
print("recall {0}, mae {1}".format(recall,mae))
df_res.head(50)

recall 1.0, mae 0.0


Unnamed: 0_level_0,MARCEL,CONRAD,SYLVAIN,MARCEL_pred,CONRAD_pred,SYLVAIN_pred,MARCEL_ae,MARCEL_tp,CONRAD_ae,CONRAD_tp,SYLVAIN_ae,SYLVAIN_tp
wellName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
512320698.0,6398.0,6661.0,6638.0,6398.0,6661.0,6638.0,0.0,True,0.0,True,0.0,True
512320699.0,6407.0,6676.0,6656.0,6407.0,6676.0,6656.0,0.0,True,0.0,True,0.0,True
512320928.0,6520.0,6793.0,6746.0,6520.0,6793.0,6746.0,0.0,True,0.0,True,0.0,True
512320970.0,6544.0,6816.0,6770.0,6544.0,6816.0,6770.0,0.0,True,0.0,True,0.0,True
512320971.0,6510.0,6789.0,6742.0,6510.0,6789.0,6742.0,0.0,True,0.0,True,0.0,True
512320972.0,6488.0,6760.0,6716.0,6488.0,6760.0,6716.0,0.0,True,0.0,True,0.0,True
512320973.0,6466.0,6735.0,6688.0,6466.0,6735.0,6688.0,0.0,True,0.0,True,0.0,True
512320974.0,6512.0,6782.0,6739.0,6512.0,6782.0,6739.0,0.0,True,0.0,True,0.0,True
512320980.0,6590.0,,6812.0,6590.0,,6812.0,0.0,True,,False,0.0,True
512321007.0,6445.0,6710.0,6664.0,6445.0,6710.0,6664.0,0.0,True,0.0,True,0.0,True
