In [8]:
import matplotlib.pyplot as plt

%matplotlib inline 

In [9]:
import pandas as pd

df_logs = pd.read_parquet("../data/logs.parquet")
df_loc = pd.read_parquet("../data/loc.parquet")
df_tops = pd.read_parquet("../data/tops.parquet")

In [10]:
##################################
# CLEANING
##################################
df_logs = df_logs[df_logs['GR'] >= 0]
well_data = df_loc.merge(df_tops, how='inner', left_index=True, right_index=True)

In [11]:
well_data[['Latitude', 'Longitude']].describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Latitude,600.0,48.71,0.050344,48.615293,48.669651,48.70962,48.751321,48.858803
Longitude,600.0,2.165,0.056001,2.088408,2.122508,2.159478,2.195293,2.416898


In [5]:
#######################
# LENGTH DEPTH
#######################
df_logs.groupby('wellName')['DEPTH'].count().mean()

12788.996666666666

In [None]:
well_names = df_logs.wellName.drop_duplicates().tolist()

In [None]:
df_loc.head()

In [None]:
# df_logs[df_logs['wellName'].isin(well_names)].groupby('wellName')['DEPTH'].agg(['min', 'max'])

In [None]:
df_logs.groupby('wellName').transform('min')

In [None]:
df_tops.describe().T

In [None]:
stats_depth = df_logs[df_logs['wellName'].isin(well_names)].groupby('wellName')['DEPTH'].agg(['min', 'max', 'mean'])

In [None]:
stats_depth['length'] = stats_depth['max'] - stats_depth['min']

In [None]:
stats_depth.head()

In [None]:
stats_depth.describe().T

In [None]:
for well_name in well_names[:5]:
    df_logs[df_logs['wellName'] == well_name].plot(x='DEPTH', y='GR', figsize=(20, 10), title=str(well_name))

In [None]:
df_tops.describe().T

In [None]:
df_tops.hist(figsize=(20, 10));

In [12]:
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go
init_notebook_mode(connected=True)

import plotly.plotly as pyl

top = "MARCEL"

df_loc[top] = df_tops[top]
data = [ dict(
        type = 'scattergeo',
        lon = df_loc['Longitude'],
        lat = df_loc['Latitude'],
        text = df_loc[top],
        mode = 'markers',
        marker = dict(
            color = df_loc[top],
        )
        )]

layout = dict(
        title = '{0} Depth information<br>(Hover for depht info)'.format(top),
        colorbar = True,
        geo = dict(
            scope='europe',
            projection=dict( type='natural earth',scale=100 ),
            center = dict( lon=df_loc["Longitude"].mean(), lat = df_loc["Latitude"].mean()),
            showland = True,
            landcolor = "rgb(250, 250, 250)",
            subunitcolor = "rgb(217, 217, 217)",
            countrycolor = "rgb(217, 217, 217)",
            countrywidth = 1.5,
            subunitwidth = 0.5
        ),
    )

fig = dict( data=data, layout=layout )
iplot( fig, validate=False, filename='niobrara' )

In [None]:
wellist = df_tops.index.tolist()

layout = go.Layout(
    height=120,
    margin=go.layout.Margin(
        l=50,
        r=50,
        b=20,
        t=0,
        pad=4
    )
)

def plot_pred(wellname,top_name,wsize):
    df_temp = df_logs[df_logs["wellName"]==wellname]

    true_top = df_tops.loc[wellname][top_name]
    if true_top > 0:
        ctr = df_temp[df_temp["DEPTH"]==true_top].index[0]
        true_log = df_temp.loc[ctr-wsize:ctr+wsize]

        bar = go.Bar( x = [true_top],y = [ df_temp["GR"].max()  ],name = top, width = 1)
        data = [go.Scatter(x=true_log.DEPTH,y=true_log.GR),bar]
  
        fig = go.Figure(data=data, layout=layout)
        iplot(fig)
    
for well in wellist[0:10]:  
    plot_pred(well,top,100)

In [None]:
from hacktops.evaluate import recall_tops

recall, mae, df_res = recall_tops(df_tops,df_tops,tolerance = 10)
print("recall {0}, mae {1}".format(recall,mae))
df_res.head(50)

In [None]:
df_res[['MARCEL', 'CONRAD', 'SYLVAIN']].describe().T

In [None]:
df_res[['MARCEL', 'CONRAD', 'SYLVAIN']].hist(figsize=(20, 10))