In [0]:
import numpy as np
import pandas as pd
import urllib, json

#This line will have the full data set printed out. 
#Toggle off and clear output if you no longer want to see full dataset
pd.set_option("display.max_rows", None, "display.max_columns", None)

In [0]:
#read in the csv
raw_csv = 'Pumpage_Data_WillCounty.csv'

#Create a copy of the data is not edited
#This will be needed for McKaleigh for 3d
raw_data = pd.read_csv(raw_csv)

print('size of original dataframe')
print(raw_data.shape)



#This copy of the data we will edit heavily 
raw_data_change = pd.read_csv(raw_csv)

#drop rows if the depth, lamx, or lamy is unknown
raw_data_change.dropna(subset=['depth_total_last_known', 'lam_x', 'lam_y'], inplace = True)
    
#with this line I am dropping all of the rows that are deep wells (>400 ft)
raw_data_change.drop(raw_data_change[raw_data_change['depth_total_last_known'] > 400].index, inplace = True) 

print('size of updated dataframe')
print(raw_data_change.shape)

size of original dataframe
(675, 46)
size of updated dataframe
(336, 46)


In [0]:
#Here I am taking out the more recent years
subset = raw_data_change[['2013', '2014', '2015', '2016', '2017', '2018', '2019']]

#Here I dropping the orignal recent years from the dataframe
modify3 = raw_data_change.drop(['2013', '2014', '2015', '2016', '2017', '2018', '2019'], axis='columns')


#To bridge the more recent data, I'm doing a forward fill, back fill, and subbing in 0s
subset.fillna(method='ffill', axis='columns', inplace=True, limit=3, downcast=None)
subset.fillna(method='bfill', axis='columns', inplace=True, limit=3, downcast=None)
subset.fillna(value = 0, axis=1, inplace=True)


#This is replacing my updated recent years and forming a new dataframe
df = pd.concat([modify3, subset], axis = 1)

#This will fill into for up to two years of lapsed reporting data, the rest NaNs become 0
df = df.fillna(method='ffill', axis='columns', limit=1)
df = df.fillna(method='bfill', axis='columns', limit=1)
df= df.fillna(0)


print("Contents of the Modified Dataframe : ")
df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  downcast=downcast,


Contents of the Modified Dataframe : 


Unnamed: 0,p_num,isws_facility_id,owner,fac_well_num,depth_total_last_known,lam_x,lam_y,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,152530,19713228.0,BALMORAL WOODS COUNTRY CLUB,1,300.0,3513064.0,3053266.0,3053266.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6750000.0,6750000.0,3150000.0,18000000.0,4500000.0,4500000.0,4500000.0,4500000.0,0.0,0.0,0.0
1,152562,19713228.0,BALMORAL WOODS COUNTRY CLUB,4,120.0,3513839.0,3050592.0,3050592.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18000.0,18000.0,18000.0,18000.0,18000.0,18000.0,18000.0,18000.0,0.0,0.0,0.0
2,158221,19714241.0,HANSON MATERIAL SERVICE - YARD 61,6,153.0,3389816.0,3121532.0,3121532.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,149000.0,149000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,223202,19714241.0,HANSON MATERIAL SERVICE - YARD 61,5,159.0,3386729.0,3135944.0,3135944.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,190000.0,190000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,224255,19714515.0,CRETE-MONEE HIGH SCHOOL,2,360.0,3506761.0,3068194.0,3068194.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,224256,19714515.0,CRETE-MONEE HIGH SCHOOL,1,250.0,3506761.0,3066874.0,3066874.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,304771,19713228.0,BALMORAL WOODS COUNTRY CLUB,5,180.0,3514499.0,3050592.0,3050592.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3750.0,3750.0,3750.0,3750.0,3750.0,3750.0,3750.0,3750.0,0.0,0.0,0.0
8,338236,19770000.0,FRANKFORT PARK DISTRICT,2,220.0,3460783.0,3081317.0,3081317.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,972000.0,972000.0,972000.0,1800000.0,1800000.0,1800000.0,50000.0,93000.0,700000.0,700000.0,700000.0,700000.0,0.0,0.0,0.0
9,345572,19713330.0,ELWOOD ENERGY LLC,3,120.0,3377663.0,3065950.0,3065950.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,67200.0,67200.0,67200.0,110000.0,110000.0,110000.0,110000.0,110000.0,100000.0,110000.0,110000.0,110000.0,110000.0,110000.0,110000.0
10,359321,19795400.0,COLLEGE VIEW SUBD,4,305.0,3385049.0,3125841.0,3125841.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1932000.0,1932000.0,1932000.0,1932000.0,4453100.0,4586400.0,3108300.0,3995500.0,3687600.0,3328200.0,2607500.0,2122000.0,2929200.0,3214600.0,2667100.0,2386300.0
