# Oil and Gas Visualization/Dashboard

### Import required libraries

In [3]:
import numpy as np
import pandas as pd
import plotly as py
import plotly.offline as pyo
import cufflinks as cf

### Import New York State dataset

In [4]:
df = pd.read_csv('data/wellspublic.csv', low_memory=False)
df.shape

(41716, 52)

In [5]:
df.columns

Index(['API_WellNo', 'Cnty', 'Hole', 'SideTrck', 'Completion', 'Well_Name',
       'Company_name', 'Operator_number', 'Well_Type', 'Map_Symbol',
       'Well_Status', 'Date_Status', 'Date_Permit_Application',
       'Permit_Issued', 'Date_Spudded', 'Date_Total_Depth',
       'Date_Well_Completed', 'Date_well_plugged', 'Date_well_confidential',
       'confid', 'town', 'quad', 'quadsec', 'Producing_name',
       'Producing_formation', 'Financial_security', 'Slant', 'County',
       'Region', 'State_lease', 'Proposed_depth', 'Surface_location',
       'Surface_Longitude', 'Surface_latitude', 'Bottom_hole_location',
       'Bottom_hole_longitude', 'Bottom_hole_latitude', 'True_vertical_depth',
       'Measured_depth', 'Kickoff', 'DrilledDepth', 'Elevation',
       'Original_well_type', 'Permit_Fee', 'Objective_formation', 'Depth_Fee',
       'Spacing', 'Spacing_Acres', 'Integration', 'Dt_Hearing', 'Dt_Mod',
       'LINK'],
      dtype='object')

### Make scattermapbox map

In [6]:
types = dict(
    BR = 'Brine',
    Confidential = 'Confidential',
    DH = 'Dry Hole',
    DS = 'Disposal',
    DW = 'Dry Wildcat',
    GD = 'Gas Development',
    GE = 'Gas Extension',
    GW = 'Gas Wildcat',
    IG = 'Gas Injection Well',
    IW = 'Enhanced Oil Recovery - Injection',
    LP = 'Liquefied Petroleum Gas Storage',
    MB = 'Monitoring Brine',
    MM = 'Monitoring Miscellaneous',
    MS = 'Monitoring Storage',
    NL = 'Not Listed',
    OB = 'Observation Well',
    OD = 'Oil Development',
    OE = 'Oil Extension',
    OW = 'Oil Wildcat',
    SG = 'Stratigraphic',
    ST = 'Storage',
    TH = 'Geothermal',
    UN = 'Unknown',
)

traces = []
for well, df in df.groupby('Well_Type'):
    trace = dict(
        type = 'scattermapbox',
        lon = df['Surface_Longitude'],
        lat = df['Surface_latitude'],
        text = df['Well_Name'],
        name = types[well],
        marker = dict(
            size = 4,
            opacity = 0.6,
        )
    )
    traces.append(trace)

# trace = dict(
#     type = 'scattermapbox',
#     lon = df['Surface_Longitude'],
#     lat = df['Surface_latitude'],
#     name = df['Well_Name'],
# )

# traces = [trace]

In [7]:
mapbox_access_token = 'pk.eyJ1IjoiamFja2x1byIsImEiOiJjaXhzYTB0bHcwOHNoMnFtOWZ3YWdreDB3In0.pjROwb9_CEuyKPE-x0lRUw'

layout = dict(

    title = "New York Oil and Gas map",

    # GENERAL LAYOUT
    width = 1280,
    height = 720,
    autosize = True,
    font = dict(
        family = "Overpass",
        size = 12,
        color = '#CCCCCC',
    ),
    margin = dict(
        t = 80,
        l = 40,
        b = 40,
        r = 120,
        pad = 0, 
    ),

    # OPTIONAL
    hovermode = "closest",
    
    # COLOR THEME
    plot_bgcolor = "#191A1A",
    paper_bgcolor = "#020202",
    
    # LEGEND
    legend = dict(
        x = 1.02,
        y = 1,
        font = dict(size = 10),
    ),

    # MAPBOX
    mapbox = dict(
        accesstoken = mapbox_access_token,
        style = "dark",
        center = dict(
            lon = -76.40,
            lat = 42.70,
        ),
        zoom = 5.5,
    ),

)

In [8]:
figure = dict(data=traces, layout=layout)
py.plot(figure, filename='Oil map')

AttributeError: module 'plotly' has no attribute 'plot'

In [47]:
import os
import pandas as pd

spo2 = pd.read_csv('data/data_spo2.csv')
spo2.head()

Unnamed: 0,Date,SPO2Values
0,2019-01-01,88.003045
1,2019-01-02,88.36018
2,2019-01-03,85.780407
3,2019-01-04,86.116537
4,2019-01-05,87.400621


In [49]:
spo2.index = pd.to_datetime(spo2['Date'])
#spo2.groupby(pd.Grouper(freq='M'))

In [50]:
spo2g = spo2.groupby(by = [spo2.index.year, spo2.index.month])

In [None]:
b.groupby(by=[b.index.year, b.index.month])

In [11]:
os.listdir('data')

['dash-logo copy.png',
 'data_fev1.csv',
 'data_spo2.csv',
 'OilandGasMetadata.html',
 'Oil_and_Gas_Annual_Production__1985_-_2000.csv',
 'Oil_and_Gas_Annual_Production__Beginning_2001.csv',
 'points.pkl',
 'wellspublic.csv']

In [None]:
.sum()
       .reset_index()
       .sort_values('Date')

In [51]:
spo2g.mean()


Unnamed: 0_level_0,Unnamed: 1_level_0,SPO2Values
Date,Date,Unnamed: 2_level_1
2019,1,87.237432
2019,2,87.719323
2019,3,87.890801
2019,4,88.125869
2019,5,87.781507
2019,6,87.828938
2019,7,88.039412
2019,8,88.351156
2019,9,89.025581
2019,10,89.749489


In [39]:
dict(list(spo2g)).keys()

dict_keys([(1, 2019), (1, 2020), (2, 2019), (2, 2020), (3, 2019), (3, 2020), (4, 2019), (4, 2020), (5, 2019), (5, 2020), (6, 2019), (6, 2020), (7, 2019), (7, 2020), (8, 2019), (8, 2020), (9, 2019), (9, 2020), (10, 2019), (10, 2020), (11, 2019), (12, 2019)])