# Load *.csv and clean column names

## Load Data From *.csv

In [1]:
import pandas as pd
#pd.set_option('display.max_columns', None)  

### Load *.csv from URL

In [2]:
url = "https://factpages.npd.no/ReportServer_npdpublic?/FactPages/TableView/wellbore_exploration_all&rs:Command=Render&rc:Toolbar=false&rc:Parameters=f&rs:Format=CSV&Top100=false&IpAddress=82.102.27.246&CultureCode=en"
wellbore_exploration_all = pd.read_csv(url)
wellbore_exploration_all.head()

Unnamed: 0,wlbWellboreName,wlbWell,wlbDrillingOperator,wlbProductionLicence,wlbPurpose,wlbStatus,wlbContent,wlbWellType,wlbSubSea,wlbEntryDate,...,wlbNpdidWellbore,dscNpdidDiscovery,fldNpdidField,fclNpdidFacilityDrilling,wlbNpdidWellboreReclass,prlNpdidProductionLicence,wlbNpdidSiteSurvey,wlbDateUpdated,wlbDateUpdatedMax,datesyncNPD
0,1/2-1,1/2-1,Phillips Petroleum Norsk AS,143,WILDCAT,P&A,OIL,EXPLORATION,NO,20.03.1989,...,1382,43814.0,3437650.0,296245.0,0,21956.0,,03.10.2019,03.10.2019,22.11.2019
1,1/2-2,1/2-2,Paladin Resources Norge AS,143 CS,WILDCAT,P&A,OIL SHOWS,EXPLORATION,NO,14.12.2005,...,5192,,,278245.0,0,2424919.0,,03.10.2019,03.10.2019,22.11.2019
2,1/3-1,1/3-1,A/S Norske Shell,011,WILDCAT,P&A,GAS,EXPLORATION,NO,06.07.1968,...,154,43820.0,,288604.0,0,20844.0,,03.10.2019,03.10.2019,22.11.2019
3,1/3-2,1/3-2,A/S Norske Shell,011,WILDCAT,P&A,DRY,EXPLORATION,NO,14.05.1969,...,165,,,288847.0,0,20844.0,,03.10.2019,03.10.2019,22.11.2019
4,1/3-3,1/3-3,Elf Petroleum Norge AS,065,WILDCAT,P&A,OIL,EXPLORATION,NO,22.08.1982,...,87,43826.0,1028599.0,288334.0,0,21316.0,,03.10.2019,03.10.2019,22.11.2019


### Load *.csv from local file 

In [3]:
local_relative_path = "./source/data/wellbore_exploration_all.csv"
wellbore_exploration_all = pd.read_csv(local_relative_path)
wellbore_exploration_all.head()

Unnamed: 0,wlbWellboreName,wlbWell,wlbDrillingOperator,wlbProductionLicence,wlbPurpose,wlbStatus,wlbContent,wlbWellType,wlbSubSea,wlbEntryDate,...,wlbNpdidWellbore,dscNpdidDiscovery,fldNpdidField,fclNpdidFacilityDrilling,wlbNpdidWellboreReclass,prlNpdidProductionLicence,wlbNpdidSiteSurvey,wlbDateUpdated,wlbDateUpdatedMax,datesyncNPD
0,1/2-1,1/2-1,Phillips Petroleum Norsk AS,143,WILDCAT,P&A,OIL,EXPLORATION,NO,20.03.1989,...,1382,43814.0,3437650.0,296245.0,0,21956.0,,03.10.2019,03.10.2019,22.11.2019
1,1/2-2,1/2-2,Paladin Resources Norge AS,143 CS,WILDCAT,P&A,OIL SHOWS,EXPLORATION,NO,14.12.2005,...,5192,,,278245.0,0,2424919.0,,03.10.2019,03.10.2019,22.11.2019
2,1/3-1,1/3-1,A/S Norske Shell,011,WILDCAT,P&A,GAS,EXPLORATION,NO,06.07.1968,...,154,43820.0,,288604.0,0,20844.0,,03.10.2019,03.10.2019,22.11.2019
3,1/3-2,1/3-2,A/S Norske Shell,011,WILDCAT,P&A,DRY,EXPLORATION,NO,14.05.1969,...,165,,,288847.0,0,20844.0,,03.10.2019,03.10.2019,22.11.2019
4,1/3-3,1/3-3,Elf Petroleum Norge AS,065,WILDCAT,P&A,OIL,EXPLORATION,NO,22.08.1982,...,87,43826.0,1028599.0,288334.0,0,21316.0,,03.10.2019,03.10.2019,22.11.2019


## Clean Column Names

### Replace something with something - str_replace 

First we List our columns names and we spot there parts of the column names we would like to get rid of.

We need to clean our column names so we make are subsequent workflows on dataframes as easy as it gets.

**Task:**

**Remove wlb, fld, fcl prefixes.**

It is important that we wrap our chains of functions into parantheses (why?) Alternative approach would be to end each line with a backslash.

I find the first approach more elegant so I will use it.


In [4]:
wellbore_exploration_all.columns

Index(['wlbWellboreName', 'wlbWell', 'wlbDrillingOperator',
       'wlbProductionLicence', 'wlbPurpose', 'wlbStatus', 'wlbContent',
       'wlbWellType', 'wlbSubSea', 'wlbEntryDate', 'wlbCompletionDate',
       'wlbField', 'wlbDrillPermit', 'wlbDiscovery', 'wlbDiscoveryWellbore',
       'wlbBottomHoleTemperature', 'wlbSiteSurvey', 'wlbSeismicLocation',
       'wlbMaxInclation', 'wlbKellyBushElevation', 'wlbFinalVerticalDepth',
       'wlbTotalDepth', 'wlbWaterDepth', 'wlbKickOffPoint', 'wlbAgeAtTd',
       'wlbFormationAtTd', 'wlbMainArea', 'wlbDrillingFacility',
       'wlbFacilityTypeDrilling', 'wlbDrillingFacilityFixedOrMoveable',
       'wlbLicensingActivity', 'wlbMultilateral', 'wlbPurposePlanned',
       'wlbEntryYear', 'wlbCompletionYear', 'wlbReclassFromWellbore',
       'wlbReentryExplorationActivity', 'wlbPlotSymbol', 'wlbFormationWithHc1',
       'wlbAgeWithHc1', 'wlbFormationWithHc2', 'wlbAgeWithHc2',
       'wlbFormationWithHc3', 'wlbAgeWithHc3', 'wlbDrillingDays', 'wlbRee

In [5]:
wellbore_exploration_all.columns = (
    wellbore_exploration_all.columns
        .str.replace("wlb", "")
        .str.replace("fld", "")
        .str.replace("fcl", "")
        .str.replace("pr", "")
)

In [6]:
wellbore_exploration_all.columns

Index(['WellboreName', 'Well', 'DrillingOperator', 'ProductionLicence',
       'Purpose', 'Status', 'Content', 'WellType', 'SubSea', 'EntryDate',
       'CompletionDate', 'Field', 'DrillPermit', 'Discovery',
       'DiscoveryWellbore', 'BottomHoleTemperature', 'SiteSurvey',
       'SeismicLocation', 'MaxInclation', 'KellyBushElevation',
       'FinalVerticalDepth', 'TotalDepth', 'WaterDepth', 'KickOffPoint',
       'AgeAtTd', 'FormationAtTd', 'MainArea', 'DrillingFacility',
       'FacilityTypeDrilling', 'DrillingFacilityFixedOrMoveable',
       'LicensingActivity', 'Multilateral', 'PurposePlanned', 'EntryYear',
       'CompletionYear', 'ReclassFromWellbore', 'ReentryExplorationActivity',
       'PlotSymbol', 'FormationWithHc1', 'AgeWithHc1', 'FormationWithHc2',
       'AgeWithHc2', 'FormationWithHc3', 'AgeWithHc3', 'DrillingDays',
       'Reentry', 'LicenceTargetName', 'PluggedAbandonDate', 'PluggedDate',
       'GeodeticDatum', 'NsDeg', 'NsMin', 'NsSec', 'NsCode', 'EwDeg', 'EwMin',
 

In [7]:
wellbore_exploration_all.head()

Unnamed: 0,WellboreName,Well,DrillingOperator,ProductionLicence,Purpose,Status,Content,WellType,SubSea,EntryDate,...,NpdidWellbore,dscNpdidDiscovery,NpdidField,NpdidFacilityDrilling,NpdidWellboreReclass,lNpdidProductionLicence,NpdidSiteSurvey,DateUpdated,DateUpdatedMax,datesyncNPD
0,1/2-1,1/2-1,Phillips Petroleum Norsk AS,143,WILDCAT,P&A,OIL,EXPLORATION,NO,20.03.1989,...,1382,43814.0,3437650.0,296245.0,0,21956.0,,03.10.2019,03.10.2019,22.11.2019
1,1/2-2,1/2-2,Paladin Resources Norge AS,143 CS,WILDCAT,P&A,OIL SHOWS,EXPLORATION,NO,14.12.2005,...,5192,,,278245.0,0,2424919.0,,03.10.2019,03.10.2019,22.11.2019
2,1/3-1,1/3-1,A/S Norske Shell,011,WILDCAT,P&A,GAS,EXPLORATION,NO,06.07.1968,...,154,43820.0,,288604.0,0,20844.0,,03.10.2019,03.10.2019,22.11.2019
3,1/3-2,1/3-2,A/S Norske Shell,011,WILDCAT,P&A,DRY,EXPLORATION,NO,14.05.1969,...,165,,,288847.0,0,20844.0,,03.10.2019,03.10.2019,22.11.2019
4,1/3-3,1/3-3,Elf Petroleum Norge AS,065,WILDCAT,P&A,OIL,EXPLORATION,NO,22.08.1982,...,87,43826.0,1028599.0,288334.0,0,21316.0,,03.10.2019,03.10.2019,22.11.2019


### Clean column names - even more clean_names (janitor module)

Method [clean_names (janitor module)](https://pyjanitor.readthedocs.io/reference/janitor.functions/janitor.clean_names.html):

* takes all column names
* converts them to lowercase
* replaces all spaces with underscores

This method does not mutate the original DataFrame.

In [8]:
import janitor

In [9]:
wellbore_exploration_all = (
    wellbore_exploration_all.clean_names(case_type="snake")
)

In [10]:
wellbore_exploration_all.head()

Unnamed: 0,wellbore_name,well,drilling_operator,production_licence,purpose,status,content,well_type,sub_sea,entry_date,...,npdid_wellbore,dsc_npdid_discovery,npdid_field,npdid_facility_drilling,npdid_wellbore_reclass,l_npdid_production_licence,npdid_site_survey,date_updated,date_updated_max,datesync_npd
0,1/2-1,1/2-1,Phillips Petroleum Norsk AS,143,WILDCAT,P&A,OIL,EXPLORATION,NO,20.03.1989,...,1382,43814.0,3437650.0,296245.0,0,21956.0,,03.10.2019,03.10.2019,22.11.2019
1,1/2-2,1/2-2,Paladin Resources Norge AS,143 CS,WILDCAT,P&A,OIL SHOWS,EXPLORATION,NO,14.12.2005,...,5192,,,278245.0,0,2424919.0,,03.10.2019,03.10.2019,22.11.2019
2,1/3-1,1/3-1,A/S Norske Shell,011,WILDCAT,P&A,GAS,EXPLORATION,NO,06.07.1968,...,154,43820.0,,288604.0,0,20844.0,,03.10.2019,03.10.2019,22.11.2019
3,1/3-2,1/3-2,A/S Norske Shell,011,WILDCAT,P&A,DRY,EXPLORATION,NO,14.05.1969,...,165,,,288847.0,0,20844.0,,03.10.2019,03.10.2019,22.11.2019
4,1/3-3,1/3-3,Elf Petroleum Norge AS,065,WILDCAT,P&A,OIL,EXPLORATION,NO,22.08.1982,...,87,43826.0,1028599.0,288334.0,0,21316.0,,03.10.2019,03.10.2019,22.11.2019


## Rename Column Names

### Rename columns directly 

Before:


In [11]:
(wellbore_exploration_all
    .filter(items=["drilling_operator", "purpose", "total_depth"])
).head()

Unnamed: 0,drilling_operator,purpose,total_depth
0,Phillips Petroleum Norsk AS,WILDCAT,3574.0
1,Paladin Resources Norge AS,WILDCAT,3434.0
2,A/S Norske Shell,WILDCAT,4877.0
3,A/S Norske Shell,WILDCAT,4297.0
4,Elf Petroleum Norge AS,WILDCAT,4876.0


After:

Pattern of renaming under **.rename(columns={"old_name", "new_name"})**

In [12]:
(wellbore_exploration_all
   .filter(items=["drilling_operator", "purpose", "total_depth"])
   .rename(columns={"purpose":"well_purpose"}) ##  "old_name":"new_name"
)

Unnamed: 0,drilling_operator,well_purpose,total_depth
0,Phillips Petroleum Norsk AS,WILDCAT,3574.0
1,Paladin Resources Norge AS,WILDCAT,3434.0
2,A/S Norske Shell,WILDCAT,4877.0
3,A/S Norske Shell,WILDCAT,4297.0
4,Elf Petroleum Norge AS,WILDCAT,4876.0
...,...,...,...
1917,Den norske stats oljeselskap a.s,WILDCAT,2919.0
1918,Statoil Petroleum AS,WILDCAT,2865.0
1919,Statoil Petroleum AS,WILDCAT,1210.0
1920,Equinor Energy AS,WILDCAT,4300.0


### Rename multiple columns by predefining a dictionary.

In [13]:
new_column_names = {
    "drilling_operator" : "DRILLING_COMPANY", 
    "purpose" : "WELL_PURPOSE",
    "total_depth" : "TOTAL_DEPTH_METERS"
}

(wellbore_exploration_all
   .filter(items=["drilling_operator", "purpose", "total_depth"])
   .rename(columns=new_column_names) ##  "old_name":"new_name"
)

Unnamed: 0,DRILLING_COMPANY,WELL_PURPOSE,TOTAL_DEPTH_METERS
0,Phillips Petroleum Norsk AS,WILDCAT,3574.0
1,Paladin Resources Norge AS,WILDCAT,3434.0
2,A/S Norske Shell,WILDCAT,4877.0
3,A/S Norske Shell,WILDCAT,4297.0
4,Elf Petroleum Norge AS,WILDCAT,4876.0
...,...,...,...
1917,Den norske stats oljeselskap a.s,WILDCAT,2919.0
1918,Statoil Petroleum AS,WILDCAT,2865.0
1919,Statoil Petroleum AS,WILDCAT,1210.0
1920,Equinor Energy AS,WILDCAT,4300.0


### export final csv

In [14]:
wellbore_exploration_all.to_csv("./source/data_processed/wellbore_exploration_all_clean_names.csv", index = False )