## Dependencies & Setup

In [43]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

## Load Oil Prices Data File

In [44]:
# Paths to Oil DOW Prices Data File
oil_dow_prices_file = "data/oil_dow_prices.csv"

# Read the data files using Pandas
oil_prices = pd.read_csv(oil_dow_prices_file)

## Cleanup & Inspect Oil Prices Data

In [45]:
# Drop unwanted columns from Oil Prices DataFrame
oil_prices.drop(['Unnamed: 0', 'Close Price'], axis=1, inplace=True)
# Rename columns
oil_prices.rename(columns = {'date':'Date', 'Crude_Oil_Price':'Crude Oil Price'}, inplace=True)

# Filter DataFrame for oil prices between 2010 and 2017 
start_date = "2010-01-01"
end_date = "2017-12-01"
oil_prices = oil_prices.loc[(oil_prices['Date'] >= start_date) & (oil_prices['Date'] <= end_date)]
oil_prices.info()
print(oil_prices)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 96 entries, 300 to 395
Data columns (total 2 columns):
Date               96 non-null object
Crude Oil Price    96 non-null float64
dtypes: float64(1), object(1)
memory usage: 2.2+ KB
           Date  Crude Oil Price
300  2010-01-01            72.87
301  2010-02-01            72.74
302  2010-03-01            75.77
303  2010-04-01            78.80
304  2010-05-01            70.91
..          ...              ...
391  2017-08-01            44.96
392  2017-09-01            47.17
393  2017-10-01            49.12
394  2017-11-01            55.19
395  2017-12-01            56.98

[96 rows x 2 columns]


## Load Oil Pipeline Accidents Data File

In [46]:
# Paths to Oil Pipeline Accidents Data File
oil_pipeline_accidents_file = "data/Oil_Pipeline_Accidents_2010-2017.csv"

# Read the data file using Pandas
oil_accidents = pd.read_csv(oil_pipeline_accidents_file)

## Cleanup Oil Pipeline Accidents Data

In [47]:
# Inspect columns from Oil Accidents DataFrame
oil_accidents.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2795 entries, 0 to 2794
Data columns (total 48 columns):
Report Number                           2795 non-null int64
Supplemental Number                     2795 non-null int64
Accident Year                           2795 non-null int64
Accident Date/Time                      2795 non-null object
Operator ID                             2795 non-null int64
Operator Name                           2795 non-null object
Pipeline/Facility Name                  2674 non-null object
Pipeline Location                       2795 non-null object
Pipeline Type                           2777 non-null object
Liquid Type                             2795 non-null object
Liquid Subtype                          1349 non-null object
Liquid Name                             222 non-null object
Accident City                           2480 non-null object
Accident County                         2720 non-null object
Accident State                          2783

In [48]:
# Drop unwanted columns from Oil Accidents DataFrame
oil_accidents.drop(['Report Number', 'Supplemental Number', 'Unintentional Release (Barrels)', 'Intentional Release (Barrels)', 'Liquid Recovery (Barrels)', 'Liquid Ignition', 'Liquid Explosion', 'Pipeline Shutdown', 'Shutdown Date/Time', 'Restart Date/Time'], axis=1, inplace=True)

# Fill missing/blank values in DataFrame
# fill_values = {'Pipeline/Facility Name': 'Not Available', 'Pipeline Type': 'Not Available', 'Liquid Subtype': 'Not Available', 'Liquid Name': 'Not Available', 'Accident City': 'Not Available', 'Accident County': 'Not Available', 'Accident State': 'Not Available', ''}

#oil_accidents.fillna(inplace=True)
oil_accidents.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2795 entries, 0 to 2794
Data columns (total 38 columns):
Accident Year                           2795 non-null int64
Accident Date/Time                      2795 non-null object
Operator ID                             2795 non-null int64
Operator Name                           2795 non-null object
Pipeline/Facility Name                  2674 non-null object
Pipeline Location                       2795 non-null object
Pipeline Type                           2777 non-null object
Liquid Type                             2795 non-null object
Liquid Subtype                          1349 non-null object
Liquid Name                             222 non-null object
Accident City                           2480 non-null object
Accident County                         2720 non-null object
Accident State                          2783 non-null object
Accident Latitude                       2795 non-null float64
Accident Longitude                      2