# Cleaning MagnaProbe Data
## This notebook demonstrates how the pandas and geopandas Python libraries clean up 'raw' magnaprobe data.
### Notice how the DataFrame changes in each cell. New columns are appended to the end.

In [1]:
import magnaprobe
help(magnaprobe)

Help on module magnaprobe:

NAME
    magnaprobe - # A Module to Clean MagnaProbe Data

FUNCTIONS
    consolidate_coords(df)
        Consolidate coordinate information that is split across many columns
    
    convert_depth_cm_to_m(df)
        Convert snow depth in cm to snow depth in m. cm column is retained.
    
    convert_wgs_to_utm(gdf, epsg_code)
        Convert WGS 84 GeoDataFrame to UTM GeoDataFrame
    
    create_geodataframe(df)
        Create GeoDataFrame with WGS 84 Spatial Reference
    
    create_geometry(df)
        Add Geometry column to specify lat and lon are special, i.e. point vector data
    
    read_tabular(raw_file, header_row)
        Read 'raw' MagnaProbe data (*. xls, *.dat, *.csv, etc.) to DataFrame
    
    save_as_csv(df, out_dst)
    
    save_as_shp(gdf, out_dst)
    
    strip_junk_rows(raw_df, first_n_rows)
        Drop n header rows that are not needed
    
    trim_cols(df, cols_to_keep)

FILE
    /home/cparr/workspace/magnaprobe_tools/magnaprobe-

In [2]:
raw = magnaprobe.read_tabular('example_data/Geo1_6_raw.dat', 1)
raw.head()

Unnamed: 0,TIMESTAMP,RECORD,Counter,DepthCm,BattVolts,latitude_a,latitude_b,Longitude_a,Longitude_b,fix_quality,...,altitudeB,DepthVolts,LatitudeDDDDD,LongitudeDDDDD,month,dayofmonth,hourofday,minutes,seconds,microseconds
0,TS,RN,,,,degrees,minutes,degrees,minutes,unitless,...,,,,,,,,,,
1,,,Smp,Smp,Smp,Smp,Smp,Smp,Smp,Smp,...,Smp,Smp,Smp,Smp,Smp,Smp,Smp,Smp,Smp,Smp
2,2017-11-14 15:15:18.75,7025,100001,34.74,13.64,65,2.4709,-147,-25.0207,1,...,726.1,2.267,0.04118167,-0.4170117,11,14,15,15,18,750000
3,2017-11-14 15:16:08,7026,100002,34.68,13.61,65,2.4709,-147,-25.0207,1,...,726.8,2.264,0.04118167,-0.4170117,11,14,15,16,8,0
4,2017-11-14 15:16:08.25,7027,110001,35.79,13.64,65,2.4702,-147,-25.0186,2,...,730.1,2.336,0.04117,-0.4169767,11,14,15,16,8,250000


In [3]:
magnaprobe.strip_junk_rows(raw, 2)
raw.head()

Unnamed: 0,TIMESTAMP,RECORD,Counter,DepthCm,BattVolts,latitude_a,latitude_b,Longitude_a,Longitude_b,fix_quality,...,altitudeB,DepthVolts,LatitudeDDDDD,LongitudeDDDDD,month,dayofmonth,hourofday,minutes,seconds,microseconds
2,2017-11-14 15:15:18.75,7025,100001,34.74,13.64,65,2.4709,-147,-25.0207,1,...,726.1,2.267,0.04118167,-0.4170117,11,14,15,15,18,750000
3,2017-11-14 15:16:08,7026,100002,34.68,13.61,65,2.4709,-147,-25.0207,1,...,726.8,2.264,0.04118167,-0.4170117,11,14,15,16,8,0
4,2017-11-14 15:16:08.25,7027,110001,35.79,13.64,65,2.4702,-147,-25.0186,2,...,730.1,2.336,0.04117,-0.4169767,11,14,15,16,8,250000
5,2017-11-14 15:16:09,7028,110002,35.74,13.63,65,2.4696,-147,-25.0183,2,...,731.5,2.333,0.04116,-0.4169716,11,14,15,16,9,0
6,2017-11-14 15:16:09.5,7029,110003,35.74,13.63,65,2.4699,-147,-25.0188,2,...,731.6,2.333,0.041165,-0.41698,11,14,15,16,9,500000


In [4]:
df = magnaprobe.consolidate_coords(raw)
df.head()

Unnamed: 0,timestamp,record,counter,depthcm,battvolts,latitude_a,latitude_b,longitude_a,longitude_b,fix_quality,...,latitudeddddd,longitudeddddd,month,dayofmonth,hourofday,minutes,seconds,microseconds,Latitude,Longitude
2,2017-11-14 15:15:18.75,7025,100001,34.74,13.64,65,2.4709,-147,-25.0207,1,...,0.04118167,-0.4170117,11,14,15,15,18,750000,65.041182,-146.582988
3,2017-11-14 15:16:08,7026,100002,34.68,13.61,65,2.4709,-147,-25.0207,1,...,0.04118167,-0.4170117,11,14,15,16,8,0,65.041182,-146.582988
4,2017-11-14 15:16:08.25,7027,110001,35.79,13.64,65,2.4702,-147,-25.0186,2,...,0.04117,-0.4169767,11,14,15,16,8,250000,65.04117,-146.583023
5,2017-11-14 15:16:09,7028,110002,35.74,13.63,65,2.4696,-147,-25.0183,2,...,0.04116,-0.4169716,11,14,15,16,9,0,65.04116,-146.583028
6,2017-11-14 15:16:09.5,7029,110003,35.74,13.63,65,2.4699,-147,-25.0188,2,...,0.041165,-0.41698,11,14,15,16,9,500000,65.041165,-146.58302


In [5]:
depth_df = magnaprobe.convert_depth_cm_to_m(df)
depth_df.head()

Unnamed: 0,timestamp,record,counter,depthcm,battvolts,latitude_a,latitude_b,longitude_a,longitude_b,fix_quality,...,longitudeddddd,month,dayofmonth,hourofday,minutes,seconds,microseconds,Latitude,Longitude,Snow Depth m
2,2017-11-14 15:15:18.75,7025,100001,34.74,13.64,65,2.4709,-147,-25.0207,1,...,-0.4170117,11,14,15,15,18,750000,65.041182,-146.582988,0.3474
3,2017-11-14 15:16:08,7026,100002,34.68,13.61,65,2.4709,-147,-25.0207,1,...,-0.4170117,11,14,15,16,8,0,65.041182,-146.582988,0.3468
4,2017-11-14 15:16:08.25,7027,110001,35.79,13.64,65,2.4702,-147,-25.0186,2,...,-0.4169767,11,14,15,16,8,250000,65.04117,-146.583023,0.3579
5,2017-11-14 15:16:09,7028,110002,35.74,13.63,65,2.4696,-147,-25.0183,2,...,-0.4169716,11,14,15,16,9,0,65.04116,-146.583028,0.3574
6,2017-11-14 15:16:09.5,7029,110003,35.74,13.63,65,2.4699,-147,-25.0188,2,...,-0.41698,11,14,15,16,9,500000,65.041165,-146.58302,0.3574


In [6]:
# Select which columns to keep. Maybe you want to know the voltage.
# Or maybe your MagnaProbe internals are programmed with different column names.
clean_df = magnaprobe.trim_cols(depth_df, ['timestamp', 'counter',
                                           'Latitude', 'Longitude',
                                           'Snow Depth m'])
clean_df.head()

Unnamed: 0,timestamp,counter,Latitude,Longitude,Snow Depth m
2,2017-11-14 15:15:18.75,100001,65.041182,-146.582988,0.3474
3,2017-11-14 15:16:08,100002,65.041182,-146.582988,0.3468
4,2017-11-14 15:16:08.25,110001,65.04117,-146.583023,0.3579
5,2017-11-14 15:16:09,110002,65.04116,-146.583028,0.3574
6,2017-11-14 15:16:09.5,110003,65.041165,-146.58302,0.3574


In [7]:
geom_df = magnaprobe.create_geometry(clean_df)
geom_df.head()

Unnamed: 0,timestamp,counter,Latitude,Longitude,Snow Depth m,geometry
2,2017-11-14 15:15:18.75,100001,65.041182,-146.582988,0.3474,POINT (-146.5829883 65.04118167)
3,2017-11-14 15:16:08,100002,65.041182,-146.582988,0.3468,POINT (-146.5829883 65.04118167)
4,2017-11-14 15:16:08.25,110001,65.04117,-146.583023,0.3579,POINT (-146.5830233 65.04116999999999)
5,2017-11-14 15:16:09,110002,65.04116,-146.583028,0.3574,POINT (-146.5830284 65.04116)
6,2017-11-14 15:16:09.5,110003,65.041165,-146.58302,0.3574,POINT (-146.58302 65.04116500000001)


In [8]:
geo_df = magnaprobe.create_geodataframe(geom_df)
geo_df.head()

Unnamed: 0,timestamp,counter,Latitude,Longitude,Snow Depth m,geometry
2,2017-11-14 15:15:18.75,100001,65.041182,-146.582988,0.3474,POINT (-146.58299 65.04118)
3,2017-11-14 15:16:08,100002,65.041182,-146.582988,0.3468,POINT (-146.58299 65.04118)
4,2017-11-14 15:16:08.25,110001,65.04117,-146.583023,0.3579,POINT (-146.58302 65.04117)
5,2017-11-14 15:16:09,110002,65.04116,-146.583028,0.3574,POINT (-146.58303 65.04116)
6,2017-11-14 15:16:09.5,110003,65.041165,-146.58302,0.3574,POINT (-146.58302 65.04117)


In [9]:
# 32606 is the EPSG code for UTM Zone 6 N
utm_geo_df = magnaprobe.convert_wgs_to_utm(geo_df, 32606)
utm_geo_df.head()

Unnamed: 0,timestamp,counter,Latitude,Longitude,Snow Depth m,geometry
2,2017-11-14 15:15:18.75,100001,65.041182,-146.582988,0.3474,POINT (519634.488 7213108.989)
3,2017-11-14 15:16:08,100002,65.041182,-146.582988,0.3468,POINT (519634.488 7213108.989)
4,2017-11-14 15:16:08.25,110001,65.04117,-146.583023,0.3579,POINT (519632.849 7213107.678)
5,2017-11-14 15:16:09,110002,65.04116,-146.583028,0.3574,POINT (519632.616 7213106.562)
6,2017-11-14 15:16:09.5,110003,65.041165,-146.58302,0.3574,POINT (519633.008 7213107.122)
