## Importing libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import geopandas as gpd
from shapely.geometry import Point
from shapely.geometry import Polygon
import os
import warnings
warnings.filterwarnings('ignore')

## Reading the clipped geodataframes - first 15 chunks

In [2]:
chem = []
for i in range(1,16):
    c = pd.read_csv("sarig_rs_chem_exp"+str(i)+"_ed1.csv")
    chem.append(gpd.GeoDataFrame(c, geometry=gpd.points_from_xy(x=c.LONGITUDE_GDA2020,y=c.LATITUDE_GDA2020)))

chem1,chem2,chem3,chem4,chem5,chem6,chem7,chem8,chem9,chem10,chem11,chem12,chem13,chem14,chem15 = chem

## Concatenating the first chunks into a single geodataframe

In [3]:
chemI = pd.concat([chem1,chem2,chem3,chem4,chem5,chem6,chem7,chem8,chem9,chem10,chem11,chem12,chem13,chem14,chem15])

Checking the head of the geodataframe

In [4]:
pd.set_option('display.max_columns', 40) 
chemI.head()

Unnamed: 0.1,Unnamed: 0,SAMPLE_NO,SAMPLE_SOURCE_CODE,SAMPLE_SOURCE,ROCK_GROUP_CODE,ROCK_GROUP,LITHO_CODE,LITHO_CONF,LITHOLOGY_NAME,LITHO_MODIFIER,MAP_SYMBOL,STRAT_CONF,STRAT_NAME,COLLECTED_BY,COLLECTORS_NUMBER,COLLECTED_DATE,DRILLHOLE_NUMBER,DH_NAME,DH_DEPTH_FROM,DH_DEPTH_TO,SITE_NO,EASTING_GDA2020,NORTHING_GDA2020,ZONE_GDA2020,LONGITUDE_GDA2020,LATITUDE_GDA2020,LONGITUDE_GDA94,LATITUDE_GDA94,SAMPLE_ANALYSIS_NO,OTHER_ANALYSIS_ID,ANALYSIS_TYPE_DESC,LABORATORY,CHEM_CODE,VALUE,UNIT,CHEM_METHOD_CODE,CHEM_METHOD_DESC,geometry
0,14,2078806,DC,Drill core,,,,,,,,,,WESTERN MINING CORPORATION LTD.,RA25046,22/08/2008,280578.0,WRD30,1327.0,1328.0,1984090,687490.6,6605633.24,53,136.957005,-30.667316,136.956995,-30.667302,1636520,,GEOCHEMISTRY,"AMDEL Laboratory, S.A.",U3O8,5,ppm,XRF1,X-Ray Fluorescence Spectrometry,POINT (136.95700 -30.66732)
1,37,2078802,DC,Drill core,,,,,,,,,,WESTERN MINING CORPORATION LTD.,RA25042,22/08/2008,280578.0,WRD30,1323.0,1324.0,1984090,687490.6,6605633.24,53,136.957005,-30.667316,136.956995,-30.667302,1636516,,GEOCHEMISTRY,"AMDEL Laboratory, S.A.",U3O8,5,ppm,XRF1,X-Ray Fluorescence Spectrometry,POINT (136.95700 -30.66732)
2,38,2078803,DC,Drill core,,,,,,,,,,WESTERN MINING CORPORATION LTD.,RA25043,22/08/2008,280578.0,WRD30,1324.0,1325.0,1984090,687490.6,6605633.24,53,136.957005,-30.667316,136.956995,-30.667302,1636517,,GEOCHEMISTRY,"AMDEL Laboratory, S.A.",U3O8,5,ppm,XRF1,X-Ray Fluorescence Spectrometry,POINT (136.95700 -30.66732)
3,39,2078804,DC,Drill core,,,,,,,,,,WESTERN MINING CORPORATION LTD.,RA25044,22/08/2008,280578.0,WRD30,1325.0,1326.0,1984090,687490.6,6605633.24,53,136.957005,-30.667316,136.956995,-30.667302,1636518,,GEOCHEMISTRY,"AMDEL Laboratory, S.A.",U3O8,5,ppm,XRF1,X-Ray Fluorescence Spectrometry,POINT (136.95700 -30.66732)
4,40,2078805,DC,Drill core,,,,,,,,,,WESTERN MINING CORPORATION LTD.,RA25045,22/08/2008,280578.0,WRD30,1326.0,1327.0,1984090,687490.6,6605633.24,53,136.957005,-30.667316,136.956995,-30.667302,1636519,,GEOCHEMISTRY,"AMDEL Laboratory, S.A.",U3O8,5,ppm,XRF1,X-Ray Fluorescence Spectrometry,POINT (136.95700 -30.66732)


## Counting how many times sample numbers appear on the table (first geodataframe - 15 chunks)

As we can see from the table shot above, the dataset is arranged so as each line represents the reading of a single element. Therefore, if a sample is analyzed to more than one element, the sample identification number ("SAMPLE_NO" column) repeats itself in subsequent rows.

Thus, the number of times a sample number is repeated is exactly the number of chemical species in the assay.

In this first chunk, we see samples ranging from 1 to 60 elements read.

In [5]:
chemI['SAMPLE_NO'].value_counts()

1707879    60
1707887    60
1707877    60
1707878    60
1707880    60
           ..
1932358     1
1932357     1
1932356     1
1932355     1
1367794     1
Name: SAMPLE_NO, Length: 205261, dtype: int64

## Ruling out samples with less than 25 assayed elements (first geodataframe - 15 chunks)

In [6]:
counts = chemI['SAMPLE_NO'].value_counts()
chemI = chemI[~chemI['SAMPLE_NO'].isin(counts[counts < 25].index)]

In [7]:
chemI['SAMPLE_NO'].value_counts()

1707886    60
1707880    60
1707885    60
1707884    60
1707883    60
           ..
522077     25
522076     25
522075     25
522074     25
696908     25
Name: SAMPLE_NO, Length: 4073, dtype: int64

## Repeating the algorithm throughout the entire database (153 chunks)

* Reading 15 csv's (chunks) through list iteration via for looping and convert them into geospatial data
* Concatenating them into one single dataframe
* Checking the head of the data and value counts on the "SAMPLE_NO" column (optional)
* Selecting only the samples that repeat at least 25 times (at least 25 elements) and passing to a new variable

In [8]:
chem = []
for i in range(16,31):
    c = pd.read_csv("sarig_rs_chem_exp"+str(i)+"_ed1.csv")
    chem.append(gpd.GeoDataFrame(c, geometry=gpd.points_from_xy(x=c.LONGITUDE_GDA2020,y=c.LATITUDE_GDA2020)))

chem16,chem17,chem18,chem19,chem20,chem21,chem22,chem23,chem24,chem25,chem26,chem27,chem28,chem29,chem30 = chem

In [9]:
chemII = pd.concat([chem16,chem17,chem18,chem19,chem20,chem21,chem22,chem23,chem24,chem25,chem26,chem27,chem28,chem29,
                    chem30])

In [10]:
pd.set_option('display.max_columns', 40) 
chemII.head()

Unnamed: 0.1,Unnamed: 0,SAMPLE_NO,SAMPLE_SOURCE_CODE,SAMPLE_SOURCE,ROCK_GROUP_CODE,ROCK_GROUP,LITHO_CODE,LITHO_CONF,LITHOLOGY_NAME,LITHO_MODIFIER,MAP_SYMBOL,STRAT_CONF,STRAT_NAME,COLLECTED_BY,COLLECTORS_NUMBER,COLLECTED_DATE,DRILLHOLE_NUMBER,DH_NAME,DH_DEPTH_FROM,DH_DEPTH_TO,SITE_NO,EASTING_GDA2020,NORTHING_GDA2020,ZONE_GDA2020,LONGITUDE_GDA2020,LATITUDE_GDA2020,LONGITUDE_GDA94,LATITUDE_GDA94,SAMPLE_ANALYSIS_NO,OTHER_ANALYSIS_ID,ANALYSIS_TYPE_DESC,LABORATORY,CHEM_CODE,VALUE,UNIT,CHEM_METHOD_CODE,CHEM_METHOD_DESC,geometry
0,170839,2332954,DC,Drill core,III,Igneous Intermediate Intrusive,QZDI,,Quartz Diorite,,Mh,?,Hiltaba Suite,MONAX ALLIANCE PTY LTD.,139611,10/04/2015,293946.0,MCDD1501,799.0,800.0,2044215,614702.77,6674712.55,53,136.189868,-30.053295,136.189858,-30.053281,1866779,1162.0/150,GEOCHEMISTRY,"Genalysis Laboratory Services Pty Ltd, W.A.",Ba,1200.1,ppm,FB6/MS,"Lithium borate fusion, ICP-MS determination",POINT (136.18987 -30.05330)
1,170840,2332954,DC,Drill core,III,Igneous Intermediate Intrusive,QZDI,,Quartz Diorite,,Mh,?,Hiltaba Suite,MONAX ALLIANCE PTY LTD.,139611,10/04/2015,293946.0,MCDD1501,799.0,800.0,2044215,614702.77,6674712.55,53,136.189868,-30.053295,136.189858,-30.053281,1866779,1162.0/150,GEOCHEMISTRY,"Genalysis Laboratory Services Pty Ltd, W.A.",Be,1.3,ppm,FB6/MS,"Lithium borate fusion, ICP-MS determination",POINT (136.18987 -30.05330)
2,170841,2332954,DC,Drill core,III,Igneous Intermediate Intrusive,QZDI,,Quartz Diorite,,Mh,?,Hiltaba Suite,MONAX ALLIANCE PTY LTD.,139611,10/04/2015,293946.0,MCDD1501,799.0,800.0,2044215,614702.77,6674712.55,53,136.189868,-30.053295,136.189858,-30.053281,1866779,1162.0/150,GEOCHEMISTRY,"Genalysis Laboratory Services Pty Ltd, W.A.",Ce,55.6,ppm,FB6/MS,"Lithium borate fusion, ICP-MS determination",POINT (136.18987 -30.05330)
3,170842,2332954,DC,Drill core,III,Igneous Intermediate Intrusive,QZDI,,Quartz Diorite,,Mh,?,Hiltaba Suite,MONAX ALLIANCE PTY LTD.,139611,10/04/2015,293946.0,MCDD1501,799.0,800.0,2044215,614702.77,6674712.55,53,136.189868,-30.053295,136.189858,-30.053281,1866779,1162.0/150,GEOCHEMISTRY,"Genalysis Laboratory Services Pty Ltd, W.A.",Eu,1.59,ppm,FB6/MS,"Lithium borate fusion, ICP-MS determination",POINT (136.18987 -30.05330)
4,170843,2332954,DC,Drill core,III,Igneous Intermediate Intrusive,QZDI,,Quartz Diorite,,Mh,?,Hiltaba Suite,MONAX ALLIANCE PTY LTD.,139611,10/04/2015,293946.0,MCDD1501,799.0,800.0,2044215,614702.77,6674712.55,53,136.189868,-30.053295,136.189858,-30.053281,1866779,1162.0/150,GEOCHEMISTRY,"Genalysis Laboratory Services Pty Ltd, W.A.",Dy,5.32,ppm,FB6/MS,"Lithium borate fusion, ICP-MS determination",POINT (136.18987 -30.05330)


In [11]:
chemII['SAMPLE_NO'].value_counts()

1964573    38
2719354    34
2719375    34
2719377    34
2719378    34
           ..
2749799     1
2749819     1
2749822     1
2749823     1
2566034     1
Name: SAMPLE_NO, Length: 35045, dtype: int64

In [12]:
counts = chemII['SAMPLE_NO'].value_counts()
chemII = chemII[~chemII['SAMPLE_NO'].isin(counts[counts < 25].index)]

In [13]:
chemII['SAMPLE_NO'].value_counts()

1964573    38
2719612    34
2719359    34
2719350    34
2719351    34
           ..
2719489    34
2719242    33
2332956    29
2332955    29
2332954    29
Name: SAMPLE_NO, Length: 388, dtype: int64

In [14]:
chem = []
for i in range(31,46):
    c = pd.read_csv("sarig_rs_chem_exp"+str(i)+"_ed1.csv")
    chem.append(gpd.GeoDataFrame(c, geometry=gpd.points_from_xy(x=c.LONGITUDE_GDA2020,y=c.LATITUDE_GDA2020)))

chem31,chem32,chem33,chem34,chem35,chem36,chem37,chem38,chem39,chem40,chem41,chem42,chem43,chem44,chem45 = chem

In [15]:
chemIII = pd.concat([chem31,chem32,chem33,chem34,chem35,chem36,chem37,chem38,chem39,chem40,chem41,chem42,chem43,chem44,
                     chem45])

In [16]:
pd.set_option('display.max_columns', 40) 
chemIII.head()

Unnamed: 0.1,Unnamed: 0,SAMPLE_NO,SAMPLE_SOURCE_CODE,SAMPLE_SOURCE,ROCK_GROUP_CODE,ROCK_GROUP,LITHO_CODE,LITHO_CONF,LITHOLOGY_NAME,LITHO_MODIFIER,MAP_SYMBOL,STRAT_CONF,STRAT_NAME,COLLECTED_BY,COLLECTORS_NUMBER,COLLECTED_DATE,DRILLHOLE_NUMBER,DH_NAME,DH_DEPTH_FROM,DH_DEPTH_TO,SITE_NO,EASTING_GDA2020,NORTHING_GDA2020,ZONE_GDA2020,LONGITUDE_GDA2020,LATITUDE_GDA2020,LONGITUDE_GDA94,LATITUDE_GDA94,SAMPLE_ANALYSIS_NO,OTHER_ANALYSIS_ID,ANALYSIS_TYPE_DESC,LABORATORY,CHEM_CODE,VALUE,UNIT,CHEM_METHOD_CODE,CHEM_METHOD_DESC,geometry
0,0,2634441,DC,Drill core,,,,,,,,,,BHP BILLITON OD CORP. PTY LTD.,SR1053259,12/01/2007,312946.0,RD2785,1735.0,1736.0,2090367,681844.73,6630406.93,53,136.893757,-30.444759,136.893747,-30.444745,2158878,,GEOCHEMISTRY,"AMDEL Laboratory, S.A.",Fe,6.47,%,IC4BHP,IC4BHP,POINT (136.89376 -30.44476)
1,1,2634441,DC,Drill core,,,,,,,,,,BHP BILLITON OD CORP. PTY LTD.,SR1053259,12/01/2007,312946.0,RD2785,1735.0,1736.0,2090367,681844.73,6630406.93,53,136.893757,-30.444759,136.893747,-30.444745,2158878,,GEOCHEMISTRY,"AMDEL Laboratory, S.A.",K,4.97,%,IC4BHP,IC4BHP,POINT (136.89376 -30.44476)
2,2,2634441,DC,Drill core,,,,,,,,,,BHP BILLITON OD CORP. PTY LTD.,SR1053259,12/01/2007,312946.0,RD2785,1735.0,1736.0,2090367,681844.73,6630406.93,53,136.893757,-30.444759,136.893747,-30.444745,2158878,,GEOCHEMISTRY,"AMDEL Laboratory, S.A.",Mg,0.27,%,IC4BHP,IC4BHP,POINT (136.89376 -30.44476)
3,3,2634441,DC,Drill core,,,,,,,,,,BHP BILLITON OD CORP. PTY LTD.,SR1053259,12/01/2007,312946.0,RD2785,1735.0,1736.0,2090367,681844.73,6630406.93,53,136.893757,-30.444759,136.893747,-30.444745,2158878,,GEOCHEMISTRY,"AMDEL Laboratory, S.A.",Mn,0.13,%,IC4BHP,IC4BHP,POINT (136.89376 -30.44476)
4,4,2634441,DC,Drill core,,,,,,,,,,BHP BILLITON OD CORP. PTY LTD.,SR1053259,12/01/2007,312946.0,RD2785,1735.0,1736.0,2090367,681844.73,6630406.93,53,136.893757,-30.444759,136.893747,-30.444745,2158878,,GEOCHEMISTRY,"AMDEL Laboratory, S.A.",Na,0.07,%,IC4BHP,IC4BHP,POINT (136.89376 -30.44476)


In [17]:
chemIII['SAMPLE_NO'].value_counts()

1833265    60
1833264    60
1833263    60
1833262    60
1840253    36
           ..
701104      1
1797920     1
1797929     1
1797938     1
699271      1
Name: SAMPLE_NO, Length: 172855, dtype: int64

In [18]:
counts = chemIII['SAMPLE_NO'].value_counts()
chemIII = chemIII[~chemIII['SAMPLE_NO'].isin(counts[counts < 25].index)]

In [19]:
chemIII['SAMPLE_NO'].value_counts()

1833265    60
1833264    60
1833263    60
1833262    60
366927     36
           ..
434080     25
434081     25
434082     25
434083     25
434108     25
Name: SAMPLE_NO, Length: 2503, dtype: int64

In [20]:
chem = []
for i in range(46,61):
    c = pd.read_csv("sarig_rs_chem_exp"+str(i)+"_ed1.csv")
    chem.append(gpd.GeoDataFrame(c, geometry=gpd.points_from_xy(x=c.LONGITUDE_GDA2020,y=c.LATITUDE_GDA2020)))

chem46,chem47,chem48,chem49,chem50,chem51,chem52,chem53,chem54,chem55,chem56,chem57,chem58,chem59,chem60 = chem

In [21]:
chemIV = pd.concat([chem46,chem47,chem48,chem49,chem50,chem51,chem52,chem53,chem54,chem55,chem56,chem57,chem58,chem59,
                     chem60])

In [22]:
pd.set_option('display.max_columns', 40) 
chemIV.head()

Unnamed: 0.1,Unnamed: 0,SAMPLE_NO,SAMPLE_SOURCE_CODE,SAMPLE_SOURCE,ROCK_GROUP_CODE,ROCK_GROUP,LITHO_CODE,LITHO_CONF,LITHOLOGY_NAME,LITHO_MODIFIER,MAP_SYMBOL,STRAT_CONF,STRAT_NAME,COLLECTED_BY,COLLECTORS_NUMBER,COLLECTED_DATE,DRILLHOLE_NUMBER,DH_NAME,DH_DEPTH_FROM,DH_DEPTH_TO,SITE_NO,EASTING_GDA2020,NORTHING_GDA2020,ZONE_GDA2020,LONGITUDE_GDA2020,LATITUDE_GDA2020,LONGITUDE_GDA94,LATITUDE_GDA94,SAMPLE_ANALYSIS_NO,OTHER_ANALYSIS_ID,ANALYSIS_TYPE_DESC,LABORATORY,CHEM_CODE,VALUE,UNIT,CHEM_METHOD_CODE,CHEM_METHOD_DESC,geometry
0,0,1876764,HCORE,Sawn half drill core,,,,,,,,,,TECK COMINCO AUSTRALIA PTY LTD.,B29947,10/06/2007,265060.0,CAR054,1074.0,1075.0,1788656,738097.59,6543302.44,53,137.499448,-31.220187,137.499438,-31.220173,1452454,,GEOCHEMISTRY,"Genalysis Laboratory Services Pty Ltd, W.A.",Th,58.0,ppm,D/MS,Oxidative alkaline fusion with sodium peroxide...,POINT (137.49945 -31.22019)
1,1,1876764,HCORE,Sawn half drill core,,,,,,,,,,TECK COMINCO AUSTRALIA PTY LTD.,B29947,10/06/2007,265060.0,CAR054,1074.0,1075.0,1788656,738097.59,6543302.44,53,137.499448,-31.220187,137.499438,-31.220173,1452454,,GEOCHEMISTRY,"Genalysis Laboratory Services Pty Ltd, W.A.",U,52.5,ppm,D/MS,Oxidative alkaline fusion with sodium peroxide...,POINT (137.49945 -31.22019)
2,2,1876764,HCORE,Sawn half drill core,,,,,,,,,,TECK COMINCO AUSTRALIA PTY LTD.,B29947,10/06/2007,265060.0,CAR054,1074.0,1075.0,1788656,738097.59,6543302.44,53,137.499448,-31.220187,137.499438,-31.220173,1452454,,GEOCHEMISTRY,"Genalysis Laboratory Services Pty Ltd, W.A.",V,40.0,ppm,D/MS,Oxidative alkaline fusion with sodium peroxide...,POINT (137.49945 -31.22019)
3,3,1876764,HCORE,Sawn half drill core,,,,,,,,,,TECK COMINCO AUSTRALIA PTY LTD.,B29947,10/06/2007,265060.0,CAR054,1074.0,1075.0,1788656,738097.59,6543302.44,53,137.499448,-31.220187,137.499438,-31.220173,1452454,,GEOCHEMISTRY,"Genalysis Laboratory Services Pty Ltd, W.A.",Y,58.0,ppm,D/MS,Oxidative alkaline fusion with sodium peroxide...,POINT (137.49945 -31.22019)
4,4,1876764,HCORE,Sawn half drill core,,,,,,,,,,TECK COMINCO AUSTRALIA PTY LTD.,B29947,10/06/2007,265060.0,CAR054,1074.0,1075.0,1788656,738097.59,6543302.44,53,137.499448,-31.220187,137.499438,-31.220173,1452454,,GEOCHEMISTRY,"Genalysis Laboratory Services Pty Ltd, W.A.",Zr,90.0,ppm,D/MS,Oxidative alkaline fusion with sodium peroxide...,POINT (137.49945 -31.22019)


In [23]:
chemIV['SAMPLE_NO'].value_counts()

2748270    60
2748292    60
2748251    60
2748293    60
2748284    60
           ..
2695497     1
2667031     1
2846093     1
2695026     1
2664895     1
Name: SAMPLE_NO, Length: 293198, dtype: int64

In [24]:
counts = chemIV['SAMPLE_NO'].value_counts()
chemIV = chemIV[~chemIV['SAMPLE_NO'].isin(counts[counts < 25].index)]

In [25]:
chemIV['SAMPLE_NO'].value_counts()

2748290    60
2748239    60
2748296    60
2748264    60
2748287    60
           ..
2022559    25
2022484    25
2022591    25
2022623    25
2022580    25
Name: SAMPLE_NO, Length: 1570, dtype: int64

In [26]:
chem = []
for i in range(61,76):
    c = pd.read_csv("sarig_rs_chem_exp"+str(i)+"_ed1.csv")
    chem.append(gpd.GeoDataFrame(c, geometry=gpd.points_from_xy(x=c.LONGITUDE_GDA2020,y=c.LATITUDE_GDA2020)))

chem61,chem62,chem63,chem64,chem65,chem66,chem67,chem68,chem69,chem70,chem71,chem72,chem73,chem74,chem75 = chem

In [27]:
chemV = pd.concat([chem61,chem62,chem63,chem64,chem65,chem66,chem67,chem68,chem69,chem70,chem71,chem72,chem73,chem74,
                   chem75])

In [28]:
pd.set_option('display.max_columns', 40) 
chemV.head()

Unnamed: 0.1,Unnamed: 0,SAMPLE_NO,SAMPLE_SOURCE_CODE,SAMPLE_SOURCE,ROCK_GROUP_CODE,ROCK_GROUP,LITHO_CODE,LITHO_CONF,LITHOLOGY_NAME,LITHO_MODIFIER,MAP_SYMBOL,STRAT_CONF,STRAT_NAME,COLLECTED_BY,COLLECTORS_NUMBER,COLLECTED_DATE,DRILLHOLE_NUMBER,DH_NAME,DH_DEPTH_FROM,DH_DEPTH_TO,SITE_NO,EASTING_GDA2020,NORTHING_GDA2020,ZONE_GDA2020,LONGITUDE_GDA2020,LATITUDE_GDA2020,LONGITUDE_GDA94,LATITUDE_GDA94,SAMPLE_ANALYSIS_NO,OTHER_ANALYSIS_ID,ANALYSIS_TYPE_DESC,LABORATORY,CHEM_CODE,VALUE,UNIT,CHEM_METHOD_CODE,CHEM_METHOD_DESC,geometry
0,0,2146277,DC,Drill core,,,,,,,,,,TASMAN RESOURCES NL.,230400,21/10/2007,253543.0,TI013,733.0,734.6,1612891,677532.6,6663338.34,53,136.843304,-30.148378,136.843294,-30.148364,1689746,,GEOCHEMISTRY,"Ultra Trace Pty Ltd, Canning Vale W.A.",Sb,0.2,ppm,ICP302,"Mixed acid digest, ICP-MS determination.",POINT (136.84330 -30.14838)
1,1,2146277,DC,Drill core,,,,,,,,,,TASMAN RESOURCES NL.,230400,21/10/2007,253543.0,TI013,733.0,734.6,1612891,677532.6,6663338.34,53,136.843304,-30.148378,136.843294,-30.148364,1689746,,GEOCHEMISTRY,"Ultra Trace Pty Ltd, Canning Vale W.A.",Sr,52.0,ppm,ICP302,"Mixed acid digest, ICP-MS determination.",POINT (136.84330 -30.14838)
2,2,2146277,DC,Drill core,,,,,,,,,,TASMAN RESOURCES NL.,230400,21/10/2007,253543.0,TI013,733.0,734.6,1612891,677532.6,6663338.34,53,136.843304,-30.148378,136.843294,-30.148364,1689746,,GEOCHEMISTRY,"Ultra Trace Pty Ltd, Canning Vale W.A.",U,7.2,ppm,ICP302,"Mixed acid digest, ICP-MS determination.",POINT (136.84330 -30.14838)
3,3,2146277,DC,Drill core,,,,,,,,,,TASMAN RESOURCES NL.,230400,21/10/2007,253543.0,TI013,733.0,734.6,1612891,677532.6,6663338.34,53,136.843304,-30.148378,136.843294,-30.148364,1689746,,GEOCHEMISTRY,"Ultra Trace Pty Ltd, Canning Vale W.A.",Y,15.0,ppm,ICP302,"Mixed acid digest, ICP-MS determination.",POINT (136.84330 -30.14838)
4,4,2146277,DC,Drill core,,,,,,,,,,TASMAN RESOURCES NL.,230400,21/10/2007,253543.0,TI013,733.0,734.6,1612891,677532.6,6663338.34,53,136.843304,-30.148378,136.843294,-30.148364,1689746,,GEOCHEMISTRY,"Ultra Trace Pty Ltd, Canning Vale W.A.",Yb,1.5,ppm,ICP302,"Mixed acid digest, ICP-MS determination.",POINT (136.84330 -30.14838)


In [29]:
chemV['SAMPLE_NO'].value_counts()

2079847    48
2079954    48
2079849    48
2079951    48
2079944    48
           ..
2695043     1
2346713     1
2170499     1
806132      1
844914      1
Name: SAMPLE_NO, Length: 127422, dtype: int64

In [30]:
counts = chemV['SAMPLE_NO'].value_counts()
chemV = chemV[~chemV['SAMPLE_NO'].isin(counts[counts < 25].index)]

In [31]:
chemV['SAMPLE_NO'].value_counts()

2079821    48
2079825    48
2079935    48
2079832    48
2079810    48
           ..
705419     25
705977     25
705162     25
705291     25
705053     25
Name: SAMPLE_NO, Length: 54471, dtype: int64

In [32]:
chem = []
for i in range(76,91):
    c = pd.read_csv("sarig_rs_chem_exp"+str(i)+"_ed1.csv")
    chem.append(gpd.GeoDataFrame(c, geometry=gpd.points_from_xy(x=c.LONGITUDE_GDA2020,y=c.LATITUDE_GDA2020)))

chem76,chem77,chem78,chem79,chem80,chem81,chem82,chem83,chem84,chem85,chem86,chem87,chem88,chem89,chem90 = chem

In [33]:
chemVI = pd.concat([chem76,chem77,chem78,chem79,chem80,chem81,chem82,chem83,chem84,chem85,chem86,chem87,chem88,chem89,
                    chem90])

In [34]:
pd.set_option('display.max_columns', 40) 
chemVI.head()

Unnamed: 0.1,Unnamed: 0,SAMPLE_NO,SAMPLE_SOURCE_CODE,SAMPLE_SOURCE,ROCK_GROUP_CODE,ROCK_GROUP,LITHO_CODE,LITHO_CONF,LITHOLOGY_NAME,LITHO_MODIFIER,MAP_SYMBOL,STRAT_CONF,STRAT_NAME,COLLECTED_BY,COLLECTORS_NUMBER,COLLECTED_DATE,DRILLHOLE_NUMBER,DH_NAME,DH_DEPTH_FROM,DH_DEPTH_TO,SITE_NO,EASTING_GDA2020,NORTHING_GDA2020,ZONE_GDA2020,LONGITUDE_GDA2020,LATITUDE_GDA2020,LONGITUDE_GDA94,LATITUDE_GDA94,SAMPLE_ANALYSIS_NO,OTHER_ANALYSIS_ID,ANALYSIS_TYPE_DESC,LABORATORY,CHEM_CODE,VALUE,UNIT,CHEM_METHOD_CODE,CHEM_METHOD_DESC,geometry
0,837,919557,CALC,Calcrete,,,,,,,,,,DOMINION MINING LTD.,G314999,,,,,,1055989,338018.75,6642292.54,53,133.314866,-30.340387,133.314857,-30.340373,539891,,GEOCHEMISTRY,"Genalysis Laboratory Services Pty Ltd, W.A.",Au,<0.001,ppm,B/ETA,Graphite furnace atomic absorption spectrometry,POINT (133.31487 -30.34039)
1,838,919558,CALC,Calcrete,,,,,,,,,,DOMINION MINING LTD.,G315002,,,,,,1055990,338118.82,6642087.49,53,133.315875,-30.34225,133.315866,-30.342236,539892,,GEOCHEMISTRY,"Genalysis Laboratory Services Pty Ltd, W.A.",Au,0.003,ppm,B/ETA,Graphite furnace atomic absorption spectrometry,POINT (133.31588 -30.34225)
2,839,919559,CALC,Calcrete,,,,,,,,,,DOMINION MINING LTD.,G315003,,,,,,1055991,337923.79,6642082.48,53,133.313846,-30.342269,133.313837,-30.342255,539893,,GEOCHEMISTRY,"Genalysis Laboratory Services Pty Ltd, W.A.",Au,0.005,ppm,B/ETA,Graphite furnace atomic absorption spectrometry,POINT (133.31385 -30.34227)
3,840,919560,CALC,Calcrete,,,,,,,,,,DOMINION MINING LTD.,G315004,,,,,,1055992,337728.77,6642083.57,53,133.311818,-30.342233,133.311809,-30.342219,539894,,GEOCHEMISTRY,"Genalysis Laboratory Services Pty Ltd, W.A.",Au,0.007,ppm,B/ETA,Graphite furnace atomic absorption spectrometry,POINT (133.31182 -30.34223)
4,841,919561,CALC,Calcrete,,,,,,,,,,DOMINION MINING LTD.,G315010,,,,,,1055993,337818.78,6641882.48,53,133.312723,-30.344059,133.312714,-30.344045,539895,,GEOCHEMISTRY,"Genalysis Laboratory Services Pty Ltd, W.A.",Au,<0.001,ppm,B/ETA,Graphite furnace atomic absorption spectrometry,POINT (133.31272 -30.34406)


In [35]:
chemVI['SAMPLE_NO'].value_counts()

2745629    62
2745643    62
2745702    62
2745780    62
2745647    62
           ..
991956      1
1440710     1
1440711     1
1440712     1
2552748     1
Name: SAMPLE_NO, Length: 169469, dtype: int64

In [36]:
counts = chemVI['SAMPLE_NO'].value_counts()
chemVI = chemVI[~chemVI['SAMPLE_NO'].isin(counts[counts < 25].index)]

In [37]:
chemVI['SAMPLE_NO'].value_counts()

2745628    62
2745749    62
2745627    62
2745702    62
2745629    62
           ..
1905184    25
1905185    25
1905186    25
1905187    25
1906292    25
Name: SAMPLE_NO, Length: 19259, dtype: int64

In [38]:
chemI.to_csv("sarig_rs_chem_exp1_25elements.csv")
chemII.to_csv("sarig_rs_chem_exp2_25elements.csv")
chemIII.to_csv("sarig_rs_chem_exp3_25elements.csv")
chemIV.to_csv("sarig_rs_chem_exp4_25elements.csv")
chemV.to_csv("sarig_rs_chem_exp5_25elements.csv")
chemVI.to_csv("sarig_rs_chem_exp6_25elements.csv")

In [2]:
chem = []
for i in range(91,106):
    c = pd.read_csv("sarig_rs_chem_exp"+str(i)+"_ed1.csv")
    chem.append(gpd.GeoDataFrame(c, geometry=gpd.points_from_xy(x=c.LONGITUDE_GDA2020,y=c.LATITUDE_GDA2020)))

chem91,chem92,chem93,chem94,chem95,chem96,chem97,chem98,chem99,chem100,chem101,chem102,chem103,chem104,chem105 = chem

In [3]:
chemVII = pd.concat([chem91,chem92,chem93,chem94,chem95,chem96,chem97,chem98,chem99,chem100,chem101,chem102,chem103,
                     chem104,chem105])

In [4]:
pd.set_option('display.max_columns', 40) 
chemVII.head()

Unnamed: 0.1,Unnamed: 0,SAMPLE_NO,SAMPLE_SOURCE_CODE,SAMPLE_SOURCE,ROCK_GROUP_CODE,ROCK_GROUP,LITHO_CODE,LITHO_CONF,LITHOLOGY_NAME,LITHO_MODIFIER,MAP_SYMBOL,STRAT_CONF,STRAT_NAME,COLLECTED_BY,COLLECTORS_NUMBER,COLLECTED_DATE,DRILLHOLE_NUMBER,DH_NAME,DH_DEPTH_FROM,DH_DEPTH_TO,SITE_NO,EASTING_GDA2020,NORTHING_GDA2020,ZONE_GDA2020,LONGITUDE_GDA2020,LATITUDE_GDA2020,LONGITUDE_GDA94,LATITUDE_GDA94,SAMPLE_ANALYSIS_NO,OTHER_ANALYSIS_ID,ANALYSIS_TYPE_DESC,LABORATORY,CHEM_CODE,VALUE,UNIT,CHEM_METHOD_CODE,CHEM_METHOD_DESC,geometry
0,0,2730121,DC,Drill core,,,,,,,,,,BHP BILLITON OD CORP. PTY LTD.,CR94524,12/09/1980,315992.0,RD31A,478.0,484.0,2104468,682458.24,6629921.1,53,136.900228,-30.449048,136.900218,-30.449034,2246695,,GEOCHEMISTRY,"AMDEL Laboratory, S.A.",Mg,0.2,%,IC4,"0.1g sample, lithium metaborate fusion followe...",POINT (136.90023 -30.44905)
1,1,2730121,DC,Drill core,,,,,,,,,,BHP BILLITON OD CORP. PTY LTD.,CR94524,12/09/1980,315992.0,RD31A,478.0,484.0,2104468,682458.24,6629921.1,53,136.900228,-30.449048,136.900218,-30.449034,2246695,,GEOCHEMISTRY,"AMDEL Laboratory, S.A.",Mn,0.01,%,IC4,"0.1g sample, lithium metaborate fusion followe...",POINT (136.90023 -30.44905)
2,2,2730121,DC,Drill core,,,,,,,,,,BHP BILLITON OD CORP. PTY LTD.,CR94524,12/09/1980,315992.0,RD31A,478.0,484.0,2104468,682458.24,6629921.1,53,136.900228,-30.449048,136.900218,-30.449034,2246695,,GEOCHEMISTRY,"AMDEL Laboratory, S.A.",Na,0.07,%,IC4,"0.1g sample, lithium metaborate fusion followe...",POINT (136.90023 -30.44905)
3,3,2730121,DC,Drill core,,,,,,,,,,BHP BILLITON OD CORP. PTY LTD.,CR94524,12/09/1980,315992.0,RD31A,478.0,484.0,2104468,682458.24,6629921.1,53,136.900228,-30.449048,136.900218,-30.449034,2246695,,GEOCHEMISTRY,"AMDEL Laboratory, S.A.",P,0.11,%,IC4,"0.1g sample, lithium metaborate fusion followe...",POINT (136.90023 -30.44905)
4,4,2730121,DC,Drill core,,,,,,,,,,BHP BILLITON OD CORP. PTY LTD.,CR94524,12/09/1980,315992.0,RD31A,478.0,484.0,2104468,682458.24,6629921.1,53,136.900228,-30.449048,136.900218,-30.449034,2246695,,GEOCHEMISTRY,"AMDEL Laboratory, S.A.",Si,26.4,%,IC4,"0.1g sample, lithium metaborate fusion followe...",POINT (136.90023 -30.44905)


In [5]:
chemVII['SAMPLE_NO'].value_counts()

861824     97
859869     96
861844     66
867193     66
860588     65
           ..
2859810     1
2865953     1
2863904     1
514733      1
2119691     1
Name: SAMPLE_NO, Length: 250541, dtype: int64

In [6]:
counts = chemVII['SAMPLE_NO'].value_counts()
chemVII = chemVII[~chemVII['SAMPLE_NO'].isin(counts[counts < 25].index)]

In [7]:
chemVII['SAMPLE_NO'].value_counts()

861824    97
859869    96
861844    66
867193    66
857190    65
          ..
657072    25
658345    25
658505    25
658277    25
61478     25
Name: SAMPLE_NO, Length: 40297, dtype: int64

In [8]:
chem = []
for i in range(106,121):
    c = pd.read_csv("sarig_rs_chem_exp"+str(i)+"_ed1.csv")
    chem.append(gpd.GeoDataFrame(c, geometry=gpd.points_from_xy(x=c.LONGITUDE_GDA2020,y=c.LATITUDE_GDA2020)))

chem106,chem107,chem108,chem109,chem110,chem111,chem112,chem113,chem114,chem115,chem116,chem117,chem118,chem119,chem120 = chem

In [9]:
chemVIII = pd.concat([chem106,chem107,chem108,chem109,chem110,chem111,chem112,chem113,chem114,chem115,chem116,chem117,
                      chem118,chem119,chem120])

In [10]:
pd.set_option('display.max_columns', 40) 
chemVIII.head()

Unnamed: 0.1,Unnamed: 0,SAMPLE_NO,SAMPLE_SOURCE_CODE,SAMPLE_SOURCE,ROCK_GROUP_CODE,ROCK_GROUP,LITHO_CODE,LITHO_CONF,LITHOLOGY_NAME,LITHO_MODIFIER,MAP_SYMBOL,STRAT_CONF,STRAT_NAME,COLLECTED_BY,COLLECTORS_NUMBER,COLLECTED_DATE,DRILLHOLE_NUMBER,DH_NAME,DH_DEPTH_FROM,DH_DEPTH_TO,SITE_NO,EASTING_GDA2020,NORTHING_GDA2020,ZONE_GDA2020,LONGITUDE_GDA2020,LATITUDE_GDA2020,LONGITUDE_GDA94,LATITUDE_GDA94,SAMPLE_ANALYSIS_NO,OTHER_ANALYSIS_ID,ANALYSIS_TYPE_DESC,LABORATORY,CHEM_CODE,VALUE,UNIT,CHEM_METHOD_CODE,CHEM_METHOD_DESC,geometry
0,0,2175169,CT,Drill cuttings,,,,,,,,,,INVESTIGATOR RESOURCES LTD.,435030,21/07/2015,290994.0,PPRC351,6.0,9.0,2025105,595891.52,6388105.16,53,136.022353,-32.640495,136.022343,-32.640481,1713207,467,GEOCHEMISTRY,"ALS Chemex, Perth WA",Ca,700,ppm,ME-MS61,"Four acid near total digest for 47 elements, c...",POINT (136.02235 -32.64050)
1,1,2175169,CT,Drill cuttings,,,,,,,,,,INVESTIGATOR RESOURCES LTD.,435030,21/07/2015,290994.0,PPRC351,6.0,9.0,2025105,595891.52,6388105.16,53,136.022353,-32.640495,136.022343,-32.640481,1713207,467,GEOCHEMISTRY,"ALS Chemex, Perth WA",Cd,<0.02,ppm,ME-MS61,"Four acid near total digest for 47 elements, c...",POINT (136.02235 -32.64050)
2,2,2175169,CT,Drill cuttings,,,,,,,,,,INVESTIGATOR RESOURCES LTD.,435030,21/07/2015,290994.0,PPRC351,6.0,9.0,2025105,595891.52,6388105.16,53,136.022353,-32.640495,136.022343,-32.640481,1713207,467,GEOCHEMISTRY,"ALS Chemex, Perth WA",Ce,22.4,ppm,ME-MS61,"Four acid near total digest for 47 elements, c...",POINT (136.02235 -32.64050)
3,3,2175169,CT,Drill cuttings,,,,,,,,,,INVESTIGATOR RESOURCES LTD.,435030,21/07/2015,290994.0,PPRC351,6.0,9.0,2025105,595891.52,6388105.16,53,136.022353,-32.640495,136.022343,-32.640481,1713207,467,GEOCHEMISTRY,"ALS Chemex, Perth WA",Co,1.7,ppm,ME-MS61,"Four acid near total digest for 47 elements, c...",POINT (136.02235 -32.64050)
4,4,2175169,CT,Drill cuttings,,,,,,,,,,INVESTIGATOR RESOURCES LTD.,435030,21/07/2015,290994.0,PPRC351,6.0,9.0,2025105,595891.52,6388105.16,53,136.022353,-32.640495,136.022343,-32.640481,1713207,467,GEOCHEMISTRY,"ALS Chemex, Perth WA",Cr,49,ppm,ME-MS61,"Four acid near total digest for 47 elements, c...",POINT (136.02235 -32.64050)


In [11]:
chemVIII['SAMPLE_NO'].value_counts()

2175512    46
2175660    46
2175658    46
2175657    46
2175656    46
           ..
2400667     1
2400666     1
2400665     1
2400664     1
2678404     1
Name: SAMPLE_NO, Length: 100906, dtype: int64

In [12]:
counts = chemVIII['SAMPLE_NO'].value_counts()
chemVIII = chemVIII[~chemVIII['SAMPLE_NO'].isin(counts[counts < 25].index)]

In [13]:
chemVIII['SAMPLE_NO'].value_counts()

2175031    46
2175610    46
2175588    46
2175589    46
2175590    46
           ..
924775     25
924774     25
924773     25
924772     25
925195     25
Name: SAMPLE_NO, Length: 2722, dtype: int64

In [14]:
chem = []
for i in range(121,136):
    c = pd.read_csv("sarig_rs_chem_exp"+str(i)+"_ed1.csv")
    chem.append(gpd.GeoDataFrame(c, geometry=gpd.points_from_xy(x=c.LONGITUDE_GDA2020,y=c.LATITUDE_GDA2020)))

chem121,chem122,chem123,chem124,chem125,chem126,chem127,chem128,chem129,chem130,chem131,chem132,chem133,chem134,chem135 = chem

In [15]:
chemIX = pd.concat([chem121,chem122,chem123,chem124,chem125,chem126,chem127,chem128,chem129,chem130,chem131,chem132,
                    chem133,chem134,chem135])

In [16]:
pd.set_option('display.max_columns', 40) 
chemIX.head()

Unnamed: 0.1,Unnamed: 0,SAMPLE_NO,SAMPLE_SOURCE_CODE,SAMPLE_SOURCE,ROCK_GROUP_CODE,ROCK_GROUP,LITHO_CODE,LITHO_CONF,LITHOLOGY_NAME,LITHO_MODIFIER,MAP_SYMBOL,STRAT_CONF,STRAT_NAME,COLLECTED_BY,COLLECTORS_NUMBER,COLLECTED_DATE,DRILLHOLE_NUMBER,DH_NAME,DH_DEPTH_FROM,DH_DEPTH_TO,SITE_NO,EASTING_GDA2020,NORTHING_GDA2020,ZONE_GDA2020,LONGITUDE_GDA2020,LATITUDE_GDA2020,LONGITUDE_GDA94,LATITUDE_GDA94,SAMPLE_ANALYSIS_NO,OTHER_ANALYSIS_ID,ANALYSIS_TYPE_DESC,LABORATORY,CHEM_CODE,VALUE,UNIT,CHEM_METHOD_CODE,CHEM_METHOD_DESC,geometry
0,13241,1421117,SO,Soil,,,,,,,,,,GRENFELL RESOURCES NL.,597601,16/01/2003,,,,,1145330,433450.76,6609001.53,53,134.305435,-30.649807,134.305426,-30.649793,1014702,,GEOCHEMISTRY,"AMDEL Laboratory, S.A.",Ag,1.4,ppb,IC8/35,"Partial leach (MMI), ICP-MS determination (AAS...",POINT (134.30544 -30.64981)
1,13242,1421117,SO,Soil,,,,,,,,,,GRENFELL RESOURCES NL.,597601,16/01/2003,,,,,1145330,433450.76,6609001.53,53,134.305435,-30.649807,134.305426,-30.649793,1014702,,GEOCHEMISTRY,"AMDEL Laboratory, S.A.",As,141.0,ppb,IC8/35,"Partial leach (MMI), ICP-MS determination (AAS...",POINT (134.30544 -30.64981)
2,13243,1421117,SO,Soil,,,,,,,,,,GRENFELL RESOURCES NL.,597601,16/01/2003,,,,,1145330,433450.76,6609001.53,53,134.305435,-30.649807,134.305426,-30.649793,1014702,,GEOCHEMISTRY,"AMDEL Laboratory, S.A.",Au,0.14,ppb,IC8/35,"Partial leach (MMI), ICP-MS determination (AAS...",POINT (134.30544 -30.64981)
3,13244,1421117,SO,Soil,,,,,,,,,,GRENFELL RESOURCES NL.,597601,16/01/2003,,,,,1145330,433450.76,6609001.53,53,134.305435,-30.649807,134.305426,-30.649793,1014702,,GEOCHEMISTRY,"AMDEL Laboratory, S.A.",Bi,0.2,ppb,IC8/35,"Partial leach (MMI), ICP-MS determination (AAS...",POINT (134.30544 -30.64981)
4,13245,1421117,SO,Soil,,,,,,,,,,GRENFELL RESOURCES NL.,597601,16/01/2003,,,,,1145330,433450.76,6609001.53,53,134.305435,-30.649807,134.305426,-30.649793,1014702,,GEOCHEMISTRY,"AMDEL Laboratory, S.A.",Cd,9.6,ppb,IC8/35,"Partial leach (MMI), ICP-MS determination (AAS...",POINT (134.30544 -30.64981)


In [17]:
chemIX['SAMPLE_NO'].value_counts()

2915822    53
2916033    53
2916060    53
2916050    53
2916049    53
           ..
868910      1
868909      1
868908      1
868907      1
2536279     1
Name: SAMPLE_NO, Length: 207218, dtype: int64

In [18]:
counts = chemIX['SAMPLE_NO'].value_counts()
chemIX = chemIX[~chemIX['SAMPLE_NO'].isin(counts[counts < 25].index)]

In [19]:
chemIX['SAMPLE_NO'].value_counts()

2916063    53
2916401    53
2916032    53
2916033    53
2916034    53
           ..
1589143    25
1589142    25
1589141    25
1589140    25
2132072    25
Name: SAMPLE_NO, Length: 15232, dtype: int64

In [20]:
chem = []
for i in range(136,151):
    c = pd.read_csv("sarig_rs_chem_exp"+str(i)+"_ed1.csv")
    chem.append(gpd.GeoDataFrame(c, geometry=gpd.points_from_xy(x=c.LONGITUDE_GDA2020,y=c.LATITUDE_GDA2020)))

chem136,chem137,chem138,chem139,chem140,chem141,chem142,chem143,chem144,chem145,chem146,chem147,chem148,chem149,chem150 = chem

In [21]:
chemX = pd.concat([chem136,chem137,chem138,chem139,chem140,chem141,chem142,chem143,chem144,chem145,chem146,chem147,chem148,
                   chem149,chem150])

In [22]:
pd.set_option('display.max_columns', 40) 
chemX.head()

Unnamed: 0.1,Unnamed: 0,SAMPLE_NO,SAMPLE_SOURCE_CODE,SAMPLE_SOURCE,ROCK_GROUP_CODE,ROCK_GROUP,LITHO_CODE,LITHO_CONF,LITHOLOGY_NAME,LITHO_MODIFIER,MAP_SYMBOL,STRAT_CONF,STRAT_NAME,COLLECTED_BY,COLLECTORS_NUMBER,COLLECTED_DATE,DRILLHOLE_NUMBER,DH_NAME,DH_DEPTH_FROM,DH_DEPTH_TO,SITE_NO,EASTING_GDA2020,NORTHING_GDA2020,ZONE_GDA2020,LONGITUDE_GDA2020,LATITUDE_GDA2020,LONGITUDE_GDA94,LATITUDE_GDA94,SAMPLE_ANALYSIS_NO,OTHER_ANALYSIS_ID,ANALYSIS_TYPE_DESC,LABORATORY,CHEM_CODE,VALUE,UNIT,CHEM_METHOD_CODE,CHEM_METHOD_DESC,geometry
0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,POINT (nan nan)
1,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,POINT (nan nan)
2,2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,POINT (nan nan)
3,3,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,POINT (nan nan)
4,4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,POINT (nan nan)


In [23]:
chemX['SAMPLE_NO'].value_counts()

864544     54
1835135    40
1835086    40
1835093    40
2605256    40
           ..
1329387     1
1329386     1
1329385     1
1329384     1
1572864     1
Name: SAMPLE_NO, Length: 260006, dtype: int64

In [24]:
counts = chemX['SAMPLE_NO'].value_counts()
chemX = chemX[~chemX['SAMPLE_NO'].isin(counts[counts < 25].index)]

In [25]:
chemX['SAMPLE_NO'].value_counts()

864544     54
1835007    40
2605116    40
2605139    40
2605138    40
           ..
2191815    26
2191816    26
446047     25
446046     25
446048     25
Name: SAMPLE_NO, Length: 6308, dtype: int64

In [26]:
chem = []
for i in range(151,154):
    c = pd.read_csv("sarig_rs_chem_exp"+str(i)+"_ed1.csv")
    chem.append(gpd.GeoDataFrame(c, geometry=gpd.points_from_xy(x=c.LONGITUDE_GDA2020,y=c.LATITUDE_GDA2020)))

chem151,chem152,chem153 = chem

In [28]:
chemXI = pd.concat([chem151,chem152,chem153])

In [29]:
pd.set_option('display.max_columns', 40) 
chemXI.head()

Unnamed: 0.1,Unnamed: 0,SAMPLE_NO,SAMPLE_SOURCE_CODE,SAMPLE_SOURCE,ROCK_GROUP_CODE,ROCK_GROUP,LITHO_CODE,LITHO_CONF,LITHOLOGY_NAME,LITHO_MODIFIER,MAP_SYMBOL,STRAT_CONF,STRAT_NAME,COLLECTED_BY,COLLECTORS_NUMBER,COLLECTED_DATE,DRILLHOLE_NUMBER,DH_NAME,DH_DEPTH_FROM,DH_DEPTH_TO,SITE_NO,EASTING_GDA2020,NORTHING_GDA2020,ZONE_GDA2020,LONGITUDE_GDA2020,LATITUDE_GDA2020,LONGITUDE_GDA94,LATITUDE_GDA94,SAMPLE_ANALYSIS_NO,OTHER_ANALYSIS_ID,ANALYSIS_TYPE_DESC,LABORATORY,CHEM_CODE,VALUE,UNIT,CHEM_METHOD_CODE,CHEM_METHOD_DESC,geometry
0,393,2421078,DC,Drill core,,,,,,,,,,MINOTAUR EXPLORATION LTD.,TR003,13/10/2007,235676.0,WC07D01,351.0,352.0,1211836,593000.75,6737451.52,53,135.959361,-29.489005,135.959351,-29.488991,1951902,AD07139097,GEOCHEMISTRY,"ALS Chemex, Perth WA",Ag,<0.5,ppm,ME-ICP61,Four acid near total digestion for 34 elements...,POINT (135.95936 -29.48901)
1,394,2421078,DC,Drill core,,,,,,,,,,MINOTAUR EXPLORATION LTD.,TR003,13/10/2007,235676.0,WC07D01,351.0,352.0,1211836,593000.75,6737451.52,53,135.959361,-29.489005,135.959351,-29.488991,1951902,AD07139097,GEOCHEMISTRY,"ALS Chemex, Perth WA",Al,6.93,%,ME-ICP61,Four acid near total digestion for 34 elements...,POINT (135.95936 -29.48901)
2,395,2421078,DC,Drill core,,,,,,,,,,MINOTAUR EXPLORATION LTD.,TR003,13/10/2007,235676.0,WC07D01,351.0,352.0,1211836,593000.75,6737451.52,53,135.959361,-29.489005,135.959351,-29.488991,1951902,AD07139097,GEOCHEMISTRY,"ALS Chemex, Perth WA",As,<5,ppm,ME-ICP61,Four acid near total digestion for 34 elements...,POINT (135.95936 -29.48901)
3,396,2421078,DC,Drill core,,,,,,,,,,MINOTAUR EXPLORATION LTD.,TR003,13/10/2007,235676.0,WC07D01,351.0,352.0,1211836,593000.75,6737451.52,53,135.959361,-29.489005,135.959351,-29.488991,1951902,AD07139097,GEOCHEMISTRY,"ALS Chemex, Perth WA",Ba,90,ppm,ME-ICP61,Four acid near total digestion for 34 elements...,POINT (135.95936 -29.48901)
4,397,2421078,DC,Drill core,,,,,,,,,,MINOTAUR EXPLORATION LTD.,TR003,13/10/2007,235676.0,WC07D01,351.0,352.0,1211836,593000.75,6737451.52,53,135.959361,-29.489005,135.959351,-29.488991,1951902,AD07139097,GEOCHEMISTRY,"ALS Chemex, Perth WA",Be,0.5,ppm,ME-ICP61,Four acid near total digestion for 34 elements...,POINT (135.95936 -29.48901)


In [30]:
chemXI['SAMPLE_NO'].value_counts()

2602423    44
2602445    44
2603539    44
2603725    44
2603074    44
           ..
2750653     1
2432151     1
2745608     1
2749706     1
2750394     1
Name: SAMPLE_NO, Length: 27009, dtype: int64

In [31]:
counts = chemXI['SAMPLE_NO'].value_counts()
chemXI = chemXI[~chemXI['SAMPLE_NO'].isin(counts[counts < 25].index)]

In [32]:
chemXI['SAMPLE_NO'].value_counts()

2602415    44
2603168    44
2603424    44
2603552    44
2603680    44
           ..
2749429    27
2978789    27
2749301    27
2978661    27
2730037    27
Name: SAMPLE_NO, Length: 5969, dtype: int64

## Saving the new reduced geodataframes into csv tables

In [35]:
chemI = pd.read_csv("sarig_rs_chem_exp1_25elements.csv")
chemII = pd.read_csv("sarig_rs_chem_exp2_25elements.csv")
chemIII = pd.read_csv("sarig_rs_chem_exp3_25elements.csv")
chemIV = pd.read_csv("sarig_rs_chem_exp4_25elements.csv")
chemV = pd.read_csv("sarig_rs_chem_exp5_25elements.csv")
chemVI = pd.read_csv("sarig_rs_chem_exp6_25elements.csv")
chemVII.to_csv("sarig_rs_chem_exp7_25elements.csv")
chemVIII.to_csv("sarig_rs_chem_exp8_25elements.csv")
chemIX.to_csv("sarig_rs_chem_exp9_25elements.csv")
chemX.to_csv("sarig_rs_chem_exp10_25elements.csv")
chemXI.to_csv("sarig_rs_chem_exp11_25elements.csv")

## Concatenating the geodataframes and saving as csv

In [36]:
chem = pd.concat([chemI,chemII,chemIII,chemIV,chemV,chemVI,chemVII,chemVIII,chemIX,chemX,chemXI])

In [37]:
chem.to_csv("sarig_rs_chem_exp_25elements.csv")