## Convert CSV to GeoJSON

In [47]:
import pandas
import json

In [51]:
data_frame = pandas.read_csv('data/raw/NitratesGroundwater.csv')
data_frame[data_frame.columns[:]].head()

Unnamed: 0.1,Unnamed: 0,@id,sample.samplingPoint,sample.samplingPoint.notation,sample.samplingPoint.label,sample.sampleDateTime,determinand.label,determinand.definition,determinand.notation,resultQualifier.notation,result,codedResultInterpretation.interpretation,determinand.unit.label,sample.sampledMaterialType.label,sample.isComplianceSample,sample.purpose.label,sample.samplingPoint.easting,sample.samplingPoint.northing
0,1,http://environment.data.gov.uk/water-quality/d...,http://environment.data.gov.uk/water-quality/i...,AN-FOXCVERT,HELPSTON FOX COVERT B/H,2000-01-12 09:45:00,Nitrate-N,Nitrate as N,117,<,0.1,,mg/l,GROUNDWATER,False,PLANNED INVESTIGATION (LOCAL MONITORING),516360,305085
1,2,http://environment.data.gov.uk/water-quality/d...,http://environment.data.gov.uk/water-quality/i...,AN-FOXCVERT,HELPSTON FOX COVERT B/H,2000-02-04 09:55:00,Nitrate-N,Nitrate as N,117,,0.498,,mg/l,GROUNDWATER,False,PLANNED INVESTIGATION (LOCAL MONITORING),516360,305085
2,3,http://environment.data.gov.uk/water-quality/d...,http://environment.data.gov.uk/water-quality/i...,AN-FOXCVERT,HELPSTON FOX COVERT B/H,2000-03-17 10:05:00,Nitrate-N,Nitrate as N,117,<,0.1,,mg/l,GROUNDWATER,False,PLANNED INVESTIGATION (LOCAL MONITORING),516360,305085
3,4,http://environment.data.gov.uk/water-quality/d...,http://environment.data.gov.uk/water-quality/i...,AN-FOXCVERT,HELPSTON FOX COVERT B/H,2000-04-20 09:25:00,Nitrate-N,Nitrate as N,117,<,0.1,,mg/l,GROUNDWATER,False,PLANNED INVESTIGATION (LOCAL MONITORING),516360,305085
4,5,http://environment.data.gov.uk/water-quality/d...,http://environment.data.gov.uk/water-quality/i...,AN-FOXCVERT,HELPSTON FOX COVERT B/H,2000-05-24 13:25:00,Nitrate-N,Nitrate as N,117,<,0.1,,mg/l,GROUNDWATER,False,PLANNED INVESTIGATION (LOCAL MONITORING),516360,305085


In [52]:
data_frame.dtypes

Unnamed: 0                                    int64
@id                                          object
sample.samplingPoint                         object
sample.samplingPoint.notation                object
sample.samplingPoint.label                   object
sample.sampleDateTime                        object
determinand.label                            object
determinand.definition                       object
determinand.notation                          int64
resultQualifier.notation                     object
result                                      float64
codedResultInterpretation.interpretation    float64
determinand.unit.label                       object
sample.sampledMaterialType.label             object
sample.isComplianceSample                      bool
sample.purpose.label                         object
sample.samplingPoint.easting                  int64
sample.samplingPoint.northing                 int64
dtype: object

In [56]:
import os
import pandas as pd
import pyproj

inProj = pyproj.Proj(init='epsg:27700')
outProj = pyproj.Proj(init='epsg:4326')

def proj_transform(row):
    long, lat = transform(inProj, outProj, row["sample.samplingPoint.easting"], row["sample.samplingPoint.northing"])
    
    row['long'] = long
    row['lat'] = lat
    return pd.Series(row)

data_frame_4326 = data_frame.apply(proj_transform, axis=1)

In [57]:
data_frame_4326.head()

Unnamed: 0.1,Unnamed: 0,@id,sample.samplingPoint,sample.samplingPoint.notation,sample.samplingPoint.label,sample.sampleDateTime,determinand.label,determinand.definition,determinand.notation,resultQualifier.notation,result,codedResultInterpretation.interpretation,determinand.unit.label,sample.sampledMaterialType.label,sample.isComplianceSample,sample.purpose.label,sample.samplingPoint.easting,sample.samplingPoint.northing,long,lat
0,1,http://environment.data.gov.uk/water-quality/d...,http://environment.data.gov.uk/water-quality/i...,AN-FOXCVERT,HELPSTON FOX COVERT B/H,2000-01-12 09:45:00,Nitrate-N,Nitrate as N,117,<,0.1,,mg/l,GROUNDWATER,False,PLANNED INVESTIGATION (LOCAL MONITORING),516360,305085,-0.282177,52.631056
1,2,http://environment.data.gov.uk/water-quality/d...,http://environment.data.gov.uk/water-quality/i...,AN-FOXCVERT,HELPSTON FOX COVERT B/H,2000-02-04 09:55:00,Nitrate-N,Nitrate as N,117,,0.498,,mg/l,GROUNDWATER,False,PLANNED INVESTIGATION (LOCAL MONITORING),516360,305085,-0.282177,52.631056
2,3,http://environment.data.gov.uk/water-quality/d...,http://environment.data.gov.uk/water-quality/i...,AN-FOXCVERT,HELPSTON FOX COVERT B/H,2000-03-17 10:05:00,Nitrate-N,Nitrate as N,117,<,0.1,,mg/l,GROUNDWATER,False,PLANNED INVESTIGATION (LOCAL MONITORING),516360,305085,-0.282177,52.631056
3,4,http://environment.data.gov.uk/water-quality/d...,http://environment.data.gov.uk/water-quality/i...,AN-FOXCVERT,HELPSTON FOX COVERT B/H,2000-04-20 09:25:00,Nitrate-N,Nitrate as N,117,<,0.1,,mg/l,GROUNDWATER,False,PLANNED INVESTIGATION (LOCAL MONITORING),516360,305085,-0.282177,52.631056
4,5,http://environment.data.gov.uk/water-quality/d...,http://environment.data.gov.uk/water-quality/i...,AN-FOXCVERT,HELPSTON FOX COVERT B/H,2000-05-24 13:25:00,Nitrate-N,Nitrate as N,117,<,0.1,,mg/l,GROUNDWATER,False,PLANNED INVESTIGATION (LOCAL MONITORING),516360,305085,-0.282177,52.631056


In [None]:
data_frame_4326.rename(columns={'Unnamed: 0':'id'}, inplace=True)

In [11]:
json_result_string = data_frame_4326.to_json(
    orient='records', 
    double_precision=12,
    date_format='iso'
)
json_result = json.loads(json_result_string)

In [12]:
geojson = {
    'type': 'FeatureCollection',
    'features': []
}
for record in json_result:
    geojson['features'].append({
        'type': 'Feature',
        'properties' : {     # Can also be written as 'properties': record to include all
            'id' : recrod['Unnamed: 0'],
            'X1' : record['X1'],
            'samplingPointNotation' : record['samplingPointNotation'],
            'samplingPointLabel' : record['samplingPointLabel'],
            'sampleDateTime' : record['sampleDateTime'],
            'determinandLabel' : record['determinandLabel'],
            'result' : record['result'],
            'sampledMaterialTypeLabel' : record['sampledMaterialTypeLabel']
        },
        'geometry': {
            'type': 'Point',
            'coordinates': [record['lon'], record['lat']]
        }
    })

In [13]:
geojson

{'type': 'FeatureCollection',
 'features': [{'type': 'Feature',
   'properties': {'X1': 78.0,
    'samplingPointNotation': 'AN-01M02',
    'samplingPointLabel': 'SYRESHAM STR.TRIB.OUSE.A43 RD.BR.KINDSHL',
    'sampleDateTime': '2010-06-15 14:25:00',
    'determinandLabel': 'Nitrate-N',
    'result': 2.59,
    'sampledMaterialTypeLabel': 'RIVER / RUNNING SURFACE WATER'},
   'geometry': {'type': 'Point',
    'coordinates': [-1.070689549604, 52.067306389346]}},
  {'type': 'Feature',
   'properties': {'X1': 79.0,
    'samplingPointNotation': 'AN-01M02',
    'samplingPointLabel': 'SYRESHAM STR.TRIB.OUSE.A43 RD.BR.KINDSHL',
    'sampleDateTime': '2010-06-15 14:25:00',
    'determinandLabel': 'Nitrite-N',
    'result': 0.028,
    'sampledMaterialTypeLabel': 'RIVER / RUNNING SURFACE WATER'},
   'geometry': {'type': 'Point',
    'coordinates': [-1.070689549604, 52.067306389346]}},
  {'type': 'Feature',
   'properties': {'X1': 92.0,
    'samplingPointNotation': 'AN-01M02',
    'samplingPointLabe

In [14]:
with open('test.geojson', 'w') as json_file:  
    json.dump(geojson, json_file, indent=4)

In [15]:
print("Min is: " + str(data_frame['result'].min()))
print("Mean is: " + str(data_frame['result'].mean()))
print("Max is: " + str(data_frame['result'].max()))

Min is: 1e-05
Mean is: 3.166230152827276
Max is: 24700.0


In [58]:
data_frame_4326.to_csv("data/NitratesGroundwater_4326.csv")