## Data Cleaning
---
- This notebook converts raw netCDF files into DataFrames for preprocessing.
- Data includes temperature (K), precipitation (inches/month), aerosol optical depth, and NO2 (molecules/cm^2).
- Drops null values, renames columns, and saves final results to CSV files.

In [1]:
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt

In [2]:
# Define months to go through within for loop
months_2019 = ['01', '02', '03', '04',
            '05', '06', '07', '08',
            '09', '10', '11', '12']

In [5]:
def clean_temp_data(month, year):
    '''
    This function takes a netCDF of temperature data, drops NaNs, removes 
    unnecessary columns, and saves the final results to a CSV file.
    Inputs: month and year of desired file to be cleaned.
    '''
    inputfile = 'tempdata/' + year + '/temp_' + month + '.nc'
    tempnc = xr.open_dataset(inputfile)
    tempdf = tempnc.to_dataframe()
    print('*'*80)
    print(f'Cleaning temp data set {month}, {year}:')
    print(f'Original data set shape: {tempdf.shape}')
    tempdf.dropna(inplace = True)
    print(f'Data set shape after dropping NaNs: {tempdf.shape}')
    tempdf.reset_index(inplace = True)
    tempdf.drop(columns = ['latv', 'lonv', 'lat_bnds', 'lon_bnds', 'shape_mask'], inplace = True)
    tempdf.rename(columns={'MYD11C3_006_LST_Day_CMG':'temp'}, inplace = True)
    outputfile = 'tempdata_cleaned/' + year + '/temp_' + month + '.csv'
    #tempdf.to_csv(outputfile)
    display(tempdf.head())

In [7]:
for month in months_2019:
    clean_temp_data(month, '2019')

********************************************************************************
Cleaning temp data set 01, 2021:
Original data set shape: (30217648, 4)
Data set shape after dropping NaNs: (1823568, 4)


Unnamed: 0,lat,lon,temp
0,18.925,-155.725,307.0
1,18.925,-155.725,307.0
2,18.925,-155.725,307.0
3,18.925,-155.725,307.0
4,18.925,-155.675,306.959991


********************************************************************************
Cleaning temp data set 02, 2021:
Original data set shape: (30217648, 4)
Data set shape after dropping NaNs: (1823588, 4)


Unnamed: 0,lat,lon,temp
0,18.925,-155.725,304.940002
1,18.925,-155.725,304.940002
2,18.925,-155.725,304.940002
3,18.925,-155.725,304.940002
4,18.925,-155.675,304.940002


********************************************************************************
Cleaning temp data set 03, 2021:
Original data set shape: (30217648, 4)
Data set shape after dropping NaNs: (1823704, 4)


Unnamed: 0,lat,lon,temp
0,18.925,-155.725,304.440002
1,18.925,-155.725,304.440002
2,18.925,-155.725,304.440002
3,18.925,-155.725,304.440002
4,18.925,-155.675,304.440002


********************************************************************************
Cleaning temp data set 04, 2021:
Original data set shape: (30217648, 4)
Data set shape after dropping NaNs: (1823680, 4)


Unnamed: 0,lat,lon,temp
0,18.925,-155.725,304.399994
1,18.925,-155.725,304.399994
2,18.925,-155.725,304.399994
3,18.925,-155.725,304.399994
4,18.925,-155.675,304.73999


********************************************************************************
Cleaning temp data set 05, 2021:
Original data set shape: (30217648, 4)
Data set shape after dropping NaNs: (1823676, 4)


Unnamed: 0,lat,lon,temp
0,18.925,-155.725,308.039978
1,18.925,-155.725,308.039978
2,18.925,-155.725,308.039978
3,18.925,-155.725,308.039978
4,18.925,-155.675,308.380005


********************************************************************************
Cleaning temp data set 06, 2021:
Original data set shape: (30217648, 4)
Data set shape after dropping NaNs: (1823672, 4)


Unnamed: 0,lat,lon,temp
0,18.925,-155.725,311.5
1,18.925,-155.725,311.5
2,18.925,-155.725,311.5
3,18.925,-155.725,311.5
4,18.925,-155.675,311.019989


********************************************************************************
Cleaning temp data set 07, 2021:
Original data set shape: (30217648, 4)
Data set shape after dropping NaNs: (1823612, 4)


Unnamed: 0,lat,lon,temp
0,18.925,-155.725,309.579987
1,18.925,-155.725,309.579987
2,18.925,-155.725,309.579987
3,18.925,-155.725,309.579987
4,18.925,-155.675,309.579987


********************************************************************************
Cleaning temp data set 08, 2021:
Original data set shape: (30217648, 4)
Data set shape after dropping NaNs: (1823688, 4)


Unnamed: 0,lat,lon,temp
0,18.925,-155.725,310.119995
1,18.925,-155.725,310.119995
2,18.925,-155.725,310.119995
3,18.925,-155.725,310.119995
4,18.925,-155.675,310.119995


********************************************************************************
Cleaning temp data set 09, 2021:
Original data set shape: (30217648, 4)
Data set shape after dropping NaNs: (1823720, 4)


Unnamed: 0,lat,lon,temp
0,18.925,-155.725,308.220001
1,18.925,-155.725,308.220001
2,18.925,-155.725,308.220001
3,18.925,-155.725,308.220001
4,18.925,-155.675,308.220001


In [8]:
def clean_precip_data(month, year):
    '''
    This function takes a netCDF of precipitation data, drops NaNs, removes 
    unnecessary columns, and saves the final results to a CSV file.
    Inputs: month and year of desired file to be cleaned.
    '''
    inputfile = 'precipdata/' + year + '/precip_' + month + '.nc'
    precipnc = xr.open_dataset(inputfile)
    precipdf = precipnc.to_dataframe()
    print('*'*80)
    print(f'Cleaning precip data set {month}, {year}:')
    print(f'Original data set shape: {precipdf.shape}')
    precipdf.dropna(inplace = True)
    print(f'Data set shape after dropping NaNs: {precipdf.shape}')
    precipdf.reset_index(inplace = True)
    precipdf.drop(columns = ['latv', 'lonv', 'lat_bnds', 'lon_bnds', 'shape_mask'], inplace = True)
    precipdf.rename(columns={'GPM_3IMERGM_06_precipitation':'precip'}, inplace = True)
    outputfile = 'precipdata_cleaned/' + year + '/precip_' + month + '.csv'
    #precipdf.to_csv(outputfile)
    display(precipdf.head())

In [9]:
for month in months_2019:
    clean_precip_data(month, '2019')

********************************************************************************
Cleaning precip data set 05, 2018:
Original data set shape: (7569828, 4)
Data set shape after dropping NaNs: (462564, 4)


Unnamed: 0,lat,lon,precip
0,18.950001,-155.75,0.685019
1,18.950001,-155.75,0.685019
2,18.950001,-155.75,0.685019
3,18.950001,-155.75,0.685019
4,18.950001,-155.649994,0.516747


********************************************************************************
Cleaning precip data set 06, 2018:
Original data set shape: (7569828, 4)
Data set shape after dropping NaNs: (462732, 4)


Unnamed: 0,lat,lon,precip
0,18.950001,-155.75,0.126139
1,18.950001,-155.75,0.126139
2,18.950001,-155.75,0.126139
3,18.950001,-155.75,0.126139
4,18.950001,-155.649994,0.346249


********************************************************************************
Cleaning precip data set 07, 2018:
Original data set shape: (7569828, 4)
Data set shape after dropping NaNs: (462772, 4)


Unnamed: 0,lat,lon,precip
0,18.950001,-155.75,0.102647
1,18.950001,-155.75,0.102647
2,18.950001,-155.75,0.102647
3,18.950001,-155.75,0.102647
4,18.950001,-155.649994,0.118458


********************************************************************************
Cleaning precip data set 08, 2018:
Original data set shape: (7569828, 4)
Data set shape after dropping NaNs: (462832, 4)


Unnamed: 0,lat,lon,precip
0,18.950001,-155.75,2.253564
1,18.950001,-155.75,2.253564
2,18.950001,-155.75,2.253564
3,18.950001,-155.75,2.253564
4,18.950001,-155.649994,2.569436


********************************************************************************
Cleaning precip data set 09, 2018:
Original data set shape: (7569828, 4)
Data set shape after dropping NaNs: (462848, 4)


Unnamed: 0,lat,lon,precip
0,18.950001,-155.75,0.364265
1,18.950001,-155.75,0.364265
2,18.950001,-155.75,0.364265
3,18.950001,-155.75,0.364265
4,18.950001,-155.649994,0.16396


********************************************************************************
Cleaning precip data set 10, 2018:
Original data set shape: (7569828, 4)
Data set shape after dropping NaNs: (462796, 4)


Unnamed: 0,lat,lon,precip
0,18.950001,-155.75,1.43228
1,18.950001,-155.75,1.43228
2,18.950001,-155.75,1.43228
3,18.950001,-155.75,1.43228
4,18.950001,-155.649994,1.094009


********************************************************************************
Cleaning precip data set 11, 2018:
Original data set shape: (7569828, 4)
Data set shape after dropping NaNs: (462608, 4)


Unnamed: 0,lat,lon,precip
0,18.950001,-155.75,0.345436
1,18.950001,-155.75,0.345436
2,18.950001,-155.75,0.345436
3,18.950001,-155.75,0.345436
4,18.950001,-155.649994,0.349981


********************************************************************************
Cleaning precip data set 12, 2018:
Original data set shape: (7569828, 4)
Data set shape after dropping NaNs: (462348, 4)


Unnamed: 0,lat,lon,precip
0,18.950001,-155.75,0.073917
1,18.950001,-155.75,0.073917
2,18.950001,-155.75,0.073917
3,18.950001,-155.75,0.073917
4,18.950001,-155.649994,0.131762


********************************************************************************
Cleaning precip data set 01, 2021:
Original data set shape: (7569828, 4)
Data set shape after dropping NaNs: (462212, 4)


Unnamed: 0,lat,lon,precip
0,18.950001,-155.75,3.448825
1,18.950001,-155.75,3.448825
2,18.950001,-155.75,3.448825
3,18.950001,-155.75,3.448825
4,18.950001,-155.649994,3.189152


********************************************************************************
Cleaning precip data set 02, 2021:
Original data set shape: (7569828, 4)
Data set shape after dropping NaNs: (462236, 4)


Unnamed: 0,lat,lon,precip
0,18.950001,-155.75,0.540363
1,18.950001,-155.75,0.540363
2,18.950001,-155.75,0.540363
3,18.950001,-155.75,0.540363
4,18.950001,-155.649994,0.654443


********************************************************************************
Cleaning precip data set 03, 2021:
Original data set shape: (7569828, 4)
Data set shape after dropping NaNs: (461424, 4)


Unnamed: 0,lat,lon,precip
0,18.950001,-155.75,4.854724
1,18.950001,-155.75,4.854724
2,18.950001,-155.75,4.854724
3,18.950001,-155.75,4.854724
4,18.950001,-155.649994,4.516256


********************************************************************************
Cleaning precip data set 04, 2021:
Original data set shape: (7569828, 4)
Data set shape after dropping NaNs: (462236, 4)


Unnamed: 0,lat,lon,precip
0,18.950001,-155.75,0.503061
1,18.950001,-155.75,0.503061
2,18.950001,-155.75,0.503061
3,18.950001,-155.75,0.503061
4,18.950001,-155.649994,0.542465


********************************************************************************
Cleaning precip data set 05, 2021:
Original data set shape: (7569828, 4)
Data set shape after dropping NaNs: (462400, 4)


Unnamed: 0,lat,lon,precip
0,18.950001,-155.75,0.605416
1,18.950001,-155.75,0.605416
2,18.950001,-155.75,0.605416
3,18.950001,-155.75,0.605416
4,18.950001,-155.649994,0.494475


********************************************************************************
Cleaning precip data set 06, 2021:
Original data set shape: (7569828, 4)
Data set shape after dropping NaNs: (462656, 4)


Unnamed: 0,lat,lon,precip
0,18.950001,-155.75,0.129914
1,18.950001,-155.75,0.129914
2,18.950001,-155.75,0.129914
3,18.950001,-155.75,0.129914
4,18.950001,-155.649994,0.131125


********************************************************************************
Cleaning precip data set 07, 2021:
Original data set shape: (7569828, 4)
Data set shape after dropping NaNs: (462788, 4)


Unnamed: 0,lat,lon,precip
0,18.950001,-155.75,0.097707
1,18.950001,-155.75,0.097707
2,18.950001,-155.75,0.097707
3,18.950001,-155.75,0.097707
4,18.950001,-155.649994,0.064185


********************************************************************************
Cleaning precip data set 08, 2021:
Original data set shape: (7569828, 4)
Data set shape after dropping NaNs: (462848, 4)


Unnamed: 0,lat,lon,precip
0,18.950001,-155.75,0.16308
1,18.950001,-155.75,0.16308
2,18.950001,-155.75,0.16308
3,18.950001,-155.75,0.16308
4,18.950001,-155.649994,0.108149


********************************************************************************
Cleaning precip data set 09, 2021:
Original data set shape: (7569828, 4)
Data set shape after dropping NaNs: (462836, 4)


Unnamed: 0,lat,lon,precip
0,18.950001,-155.75,0.458449
1,18.950001,-155.75,0.458449
2,18.950001,-155.75,0.458449
3,18.950001,-155.75,0.458449
4,18.950001,-155.649994,0.379891


In [10]:
def clean_AOD_data(month, year):
    '''
    This function takes a netCDF of AOD data, drops NaNs, removes 
    unnecessary columns, and saves the final results to a CSV file.
    Inputs: month and year of desired file to be cleaned.
    '''
    inputfile = 'AODdata/' + year + '/AOD_' + month + '.nc'
    AODnc = xr.open_dataset(inputfile)
    AODdf = AODnc.to_dataframe()
    print('*'*80)
    print(f'Cleaning AOD data set {month}, {year}:')
    print(f'Original data set shape: {AODdf.shape}')
    AODdf = AODdf[AODdf['AOD_550_AVG'] != -9.0]
    print(f'Data set shape after dropping -9.0 values: {AODdf.shape}')
    AODdf.reset_index(inplace = True)
    AODdf.drop(columns = ['ydim', 'xdim', 'Number_Of_Days', 'AOD_550_MIN', 'AOD_550_MAX', 'AOD_550_MED', 'AOD_550_STD'], inplace = True)
    outputfile = 'AODdata_cleaned/' + year + '/AOD_' + month + '.csv'
    #AODdf.to_csv(outputfile)
    display(AODdf.head())

In [11]:
for month in months_2019:
    clean_AOD_data(month, '2019')

********************************************************************************
Cleaning AOD data set 05, 2018:
Original data set shape: (6485401, 8)
Data set shape after dropping -9.0 values: (3324459, 8)


Unnamed: 0,Longitude,Latitude,AOD_550_AVG
0,-91.0,-63.299999,0.033
1,-176.5,-63.199997,0.043
2,-91.0,-63.199997,0.033
3,-90.299995,-63.199997,0.027
4,-177.100006,-63.099998,0.051


********************************************************************************
Cleaning AOD data set 06, 2018:
Original data set shape: (6485401, 8)
Data set shape after dropping -9.0 values: (3960484, 8)


Unnamed: 0,Longitude,Latitude,AOD_550_AVG
0,-153.899994,-56.200001,0.036
1,165.800018,-56.200001,0.051
2,-153.899994,-56.099998,0.045
3,-66.5,-56.099998,0.008
4,-65.799995,-56.099998,0.026


********************************************************************************
Cleaning AOD data set 07, 2018:
Original data set shape: (6485401, 8)
Data set shape after dropping -9.0 values: (4152866, 8)


Unnamed: 0,Longitude,Latitude,AOD_550_AVG
0,159.100006,-60.199997,0.043
1,-68.099998,-60.099998,0.013
2,159.100006,-60.099998,0.041
3,-68.099998,-60.0,0.016
4,-67.299995,-60.0,0.017


********************************************************************************
Cleaning AOD data set 08, 2018:
Original data set shape: (6485401, 8)
Data set shape after dropping -9.0 values: (4321445, 8)


Unnamed: 0,Longitude,Latitude,AOD_550_AVG
0,116.800018,-66.599998,0.054
1,-167.199997,-66.099998,0.025
2,-166.800003,-66.099998,0.023
3,-166.399994,-66.099998,0.023
4,-166.899994,-66.0,0.031


********************************************************************************
Cleaning AOD data set 09, 2018:
Original data set shape: (6485401, 8)
Data set shape after dropping -9.0 values: (4471527, 8)


Unnamed: 0,Longitude,Latitude,AOD_550_AVG
0,-137.100006,-74.900002,0.029
1,-137.300003,-74.800003,0.017
2,-137.399994,-74.699997,0.028
3,-138.0,-74.599998,0.043
4,-137.600006,-74.599998,0.036


********************************************************************************
Cleaning AOD data set 10, 2018:
Original data set shape: (6485401, 8)
Data set shape after dropping -9.0 values: (4210588, 8)


Unnamed: 0,Longitude,Latitude,AOD_550_AVG
0,-172.199997,-84.099998,0.319
1,-171.399994,-84.099998,0.343
2,-172.899994,-84.0,0.336
3,-172.899994,-83.900002,0.252
4,-172.300003,-83.900002,0.345


********************************************************************************
Cleaning AOD data set 11, 2018:
Original data set shape: (6485401, 8)
Data set shape after dropping -9.0 values: (3847383, 8)


Unnamed: 0,Longitude,Latitude,AOD_550_AVG
0,176.300018,-82.900002,0.393
1,177.0,-82.900002,0.329
2,177.899994,-82.900002,0.388
3,176.0,-82.800003,0.367
4,177.600006,-82.800003,0.322


********************************************************************************
Cleaning AOD data set 12, 2018:
Original data set shape: (6485401, 8)
Data set shape after dropping -9.0 values: (3765140, 8)


Unnamed: 0,Longitude,Latitude,AOD_550_AVG
0,-179.100006,-82.699997,0.528
1,-179.600006,-82.599998,0.5
2,-173.0,-81.5,0.682
3,-172.899994,-81.5,0.487
4,-173.600006,-81.400002,0.576


********************************************************************************
Cleaning AOD data set 01, 2021:
Original data set shape: (6485401, 8)
Data set shape after dropping -9.0 values: (3778455, 8)


Unnamed: 0,Longitude,Latitude,AOD_550_AVG
0,174.0,-82.599998,0.813
1,174.200012,-82.5,0.82
2,-173.600006,-81.300003,0.635
3,-163.899994,-78.699997,0.0695
4,-163.699997,-78.699997,0.0895


********************************************************************************
Cleaning AOD data set 02, 2021:
Original data set shape: (6485401, 8)
Data set shape after dropping -9.0 values: (3884122, 8)


Unnamed: 0,Longitude,Latitude,AOD_550_AVG
0,-166.600006,-82.199997,1.046
1,-169.199997,-81.400002,0.839
2,-164.300003,-78.699997,0.083
3,-164.199997,-78.699997,0.148
4,-164.0,-78.699997,0.06


********************************************************************************
Cleaning AOD data set 03, 2021:
Original data set shape: (6485401, 8)
Data set shape after dropping -9.0 values: (4044612, 8)


Unnamed: 0,Longitude,Latitude,AOD_550_AVG
0,-168.199997,-78.400002,0.117
1,-167.699997,-78.400002,0.104
2,-167.399994,-78.400002,0.098
3,-167.0,-78.400002,0.114
4,-166.800003,-78.400002,0.121


********************************************************************************
Cleaning AOD data set 04, 2021:
Original data set shape: (6485401, 8)
Data set shape after dropping -9.0 values: (4049589, 8)


Unnamed: 0,Longitude,Latitude,AOD_550_AVG
0,-76.0,-72.199997,0.012
1,-85.400002,-72.099998,0.015
2,-84.900002,-72.099998,0.021
3,-84.799995,-72.099998,0.022
4,-76.699997,-72.099998,0.01


********************************************************************************
Cleaning AOD data set 05, 2021:
Original data set shape: (6485401, 8)
Data set shape after dropping -9.0 values: (4012749, 8)


Unnamed: 0,Longitude,Latitude,AOD_550_AVG
0,-16.099991,-63.099998,0.029
1,46.300003,-63.099998,0.033
2,46.900009,-63.099998,0.045
3,-16.099991,-63.0,0.029
4,46.900009,-63.0,0.049


********************************************************************************
Cleaning AOD data set 06, 2021:
Original data set shape: (6485401, 8)
Data set shape after dropping -9.0 values: (4010343, 8)


Unnamed: 0,Longitude,Latitude,AOD_550_AVG
0,176.100006,-56.5,0.018
1,176.899994,-56.5,0.015
2,-95.099998,-56.200001,0.058
3,-94.400002,-56.200001,0.065
4,-71.099998,-56.200001,0.0135


********************************************************************************
Cleaning AOD data set 07, 2021:
Original data set shape: (6485401, 8)
Data set shape after dropping -9.0 values: (4172355, 8)


Unnamed: 0,Longitude,Latitude,AOD_550_AVG
0,-164.5,-60.299999,0.056
1,-163.699997,-60.299999,0.026
2,-51.0,-60.299999,0.091
3,-50.199997,-60.299999,0.099
4,-49.599991,-60.299999,0.11


********************************************************************************
Cleaning AOD data set 08, 2021:
Original data set shape: (6485401, 8)
Data set shape after dropping -9.0 values: (4398744, 8)


Unnamed: 0,Longitude,Latitude,AOD_550_AVG
0,-69.0,-67.900002,0.045
1,-68.5,-67.800003,0.042
2,-68.0,-67.699997,0.043
3,-68.099998,-67.599998,0.041
4,-104.900002,-66.800003,0.023


********************************************************************************
Cleaning AOD data set 09, 2021:
Original data set shape: (6485401, 8)
Data set shape after dropping -9.0 values: (3777485, 8)


Unnamed: 0,Longitude,Latitude,AOD_550_AVG
0,-134.199997,-71.0,0.073
1,-133.199997,-70.599998,0.116
2,-133.300003,-70.5,0.164
3,-72.799995,-68.900002,0.059
4,-91.799995,-67.699997,0.069


In [14]:
def clean_NO2_data(month, year):
    '''
    This function takes a netCDF of NO2 data, drops NaNs, lowers
    the resolution of the data from 0.01 to 0.01 deg, removes 
    unnecessary columns, and saves the final results to a CSV file.
    Inputs: month and year of desired file to be cleaned.
    '''
    inputfile = 'NO2data/' + year + '/NO2_' + month + '.nc'
    NO2nc = xr.open_dataset(inputfile)
    NO2df = NO2nc.to_dataframe()
    print('*'*80)
    print(f'Cleaning NO2 data set {month}, {year}:')
    print(f'Original data set shape: {NO2df.shape}')
    NO2df.dropna(inplace = True)
    print(f'Data set shape after dropping NaNs: {NO2df.shape}')
    # NO2 data is much finer resolution, so we are reducing the resolution to be on par with the other data sets
    NO2df = NO2df.iloc[::10, :]
    NO2df.reset_index(inplace = True)
    NO2df.drop(columns = ['y', 'x', 'num'], inplace = True)
    outputfile = 'NO2data_cleaned/' + year + '/NO2_' + month + '.csv'
    #NO2df.to_csv(outputfile)
    display(NO2df.head())

In [15]:
for month in months_2019:
    clean_NO2_data(month, '2019')

********************************************************************************
Cleaning NO2 data set 05, 2018:
Original data set shape: (14500000, 4)
Data set shape after dropping NaNs: (14474246, 4)


Unnamed: 0,NO2,lat,lon
0,734662100000000.0,24.5,-124.739998
1,533230700000000.0,24.5,-124.639999
2,652221000000000.0,24.5,-124.540001
3,582202000000000.0,24.5,-124.440002
4,446760200000000.0,24.5,-124.339996


********************************************************************************
Cleaning NO2 data set 06, 2018:
Original data set shape: (14500000, 4)
Data set shape after dropping NaNs: (14493235, 4)


Unnamed: 0,NO2,lat,lon
0,471030800000000.0,24.5,-124.75
1,402326000000000.0,24.5,-124.650002
2,518922100000000.0,24.5,-124.550003
3,471953500000000.0,24.5,-124.449997
4,403346500000000.0,24.5,-124.349998


********************************************************************************
Cleaning NO2 data set 07, 2018:
Original data set shape: (14500000, 4)
Data set shape after dropping NaNs: (14493962, 4)


Unnamed: 0,NO2,lat,lon
0,165720700000000.0,24.5,-124.75
1,413195900000000.0,24.5,-124.650002
2,380370200000000.0,24.5,-124.550003
3,505309300000000.0,24.5,-124.449997
4,336551000000000.0,24.5,-124.349998


********************************************************************************
Cleaning NO2 data set 08, 2018:
Original data set shape: (14500000, 4)
Data set shape after dropping NaNs: (14493943, 4)


Unnamed: 0,NO2,lat,lon
0,204564700000000.0,24.5,-124.75
1,143247800000000.0,24.5,-124.650002
2,346782100000000.0,24.5,-124.550003
3,410694800000000.0,24.5,-124.449997
4,533701100000000.0,24.5,-124.349998


********************************************************************************
Cleaning NO2 data set 09, 2018:
Original data set shape: (14500000, 4)
Data set shape after dropping NaNs: (14491739, 4)


Unnamed: 0,NO2,lat,lon
0,370943600000000.0,24.5,-124.75
1,243639600000000.0,24.5,-124.620003
2,479719700000000.0,24.5,-124.519997
3,388596600000000.0,24.5,-124.419998
4,522548000000000.0,24.5,-124.32


********************************************************************************
Cleaning NO2 data set 10, 2018:
Original data set shape: (14500000, 4)
Data set shape after dropping NaNs: (14428529, 4)


Unnamed: 0,NO2,lat,lon
0,691916100000000.0,24.5,-124.75
1,474911800000000.0,24.5,-124.650002
2,374266800000000.0,24.5,-124.550003
3,567357900000000.0,24.5,-124.449997
4,537558200000000.0,24.5,-124.349998


********************************************************************************
Cleaning NO2 data set 11, 2018:
Original data set shape: (14500000, 4)
Data set shape after dropping NaNs: (13644208, 4)


Unnamed: 0,NO2,lat,lon
0,628090400000000.0,24.5,-124.75
1,195824800000000.0,24.5,-124.650002
2,464786800000000.0,24.5,-124.550003
3,476011500000000.0,24.5,-124.449997
4,383888800000000.0,24.5,-124.349998


********************************************************************************
Cleaning NO2 data set 12, 2018:
Original data set shape: (14500000, 4)
Data set shape after dropping NaNs: (12894822, 4)


Unnamed: 0,NO2,lat,lon
0,295646600000000.0,24.5,-124.75
1,433096100000000.0,24.5,-124.650002
2,521630400000000.0,24.5,-124.550003
3,206608800000000.0,24.5,-124.449997
4,138343100000000.0,24.5,-124.349998


********************************************************************************
Cleaning NO2 data set 01, 2021:
Original data set shape: (14500000, 4)
Data set shape after dropping NaNs: (12548676, 4)


Unnamed: 0,NO2,lat,lon
0,452586900000000.0,24.5,-124.75
1,489981000000000.0,24.5,-124.650002
2,618429300000000.0,24.5,-124.550003
3,452125700000000.0,24.5,-124.449997
4,556172400000000.0,24.5,-124.349998


********************************************************************************
Cleaning NO2 data set 02, 2021:
Original data set shape: (14500000, 4)
Data set shape after dropping NaNs: (12709314, 4)


Unnamed: 0,NO2,lat,lon
0,689687100000000.0,24.5,-124.75
1,606150700000000.0,24.5,-124.650002
2,650518800000000.0,24.5,-124.550003
3,389072900000000.0,24.5,-124.449997
4,888987600000000.0,24.5,-124.349998


********************************************************************************
Cleaning NO2 data set 03, 2021:
Original data set shape: (14500000, 4)
Data set shape after dropping NaNs: (14114859, 4)


Unnamed: 0,NO2,lat,lon
0,497627800000000.0,24.5,-124.75
1,461601000000000.0,24.5,-124.650002
2,775651000000000.0,24.5,-124.550003
3,626566500000000.0,24.5,-124.449997
4,596938900000000.0,24.5,-124.349998


********************************************************************************
Cleaning NO2 data set 04, 2021:
Original data set shape: (14500000, 4)
Data set shape after dropping NaNs: (14451231, 4)


Unnamed: 0,NO2,lat,lon
0,608287800000000.0,24.5,-124.75
1,311194300000000.0,24.5,-124.650002
2,524613500000000.0,24.5,-124.550003
3,350177900000000.0,24.5,-124.449997
4,413620300000000.0,24.5,-124.349998


********************************************************************************
Cleaning NO2 data set 05, 2021:
Original data set shape: (14500000, 4)
Data set shape after dropping NaNs: (14480345, 4)


Unnamed: 0,NO2,lat,lon
0,314422400000000.0,24.5,-124.75
1,339829500000000.0,24.5,-124.650002
2,547063000000000.0,24.5,-124.550003
3,626062400000000.0,24.5,-124.449997
4,502616600000000.0,24.5,-124.349998


********************************************************************************
Cleaning NO2 data set 06, 2021:
Original data set shape: (14500000, 4)
Data set shape after dropping NaNs: (14491464, 4)


Unnamed: 0,NO2,lat,lon
0,289374600000000.0,24.5,-124.75
1,167964200000000.0,24.5,-124.650002
2,597543300000000.0,24.5,-124.550003
3,330011200000000.0,24.5,-124.449997
4,422195400000000.0,24.5,-124.349998


********************************************************************************
Cleaning NO2 data set 07, 2021:
Original data set shape: (14500000, 4)
Data set shape after dropping NaNs: (14496937, 4)


Unnamed: 0,NO2,lat,lon
0,293861800000000.0,24.5,-124.75
1,453056000000000.0,24.5,-124.650002
2,510474900000000.0,24.5,-124.550003
3,570648000000000.0,24.5,-124.449997
4,381836500000000.0,24.5,-124.349998


********************************************************************************
Cleaning NO2 data set 08, 2021:
Original data set shape: (14500000, 4)
Data set shape after dropping NaNs: (14497256, 4)


Unnamed: 0,NO2,lat,lon
0,568676200000000.0,24.5,-124.739998
1,357033800000000.0,24.5,-124.639999
2,565147500000000.0,24.5,-124.540001
3,548225900000000.0,24.5,-124.440002
4,562515100000000.0,24.5,-124.339996


********************************************************************************
Cleaning NO2 data set 09, 2021:
Original data set shape: (14500000, 4)
Data set shape after dropping NaNs: (14496794, 4)


Unnamed: 0,NO2,lat,lon
0,508055400000000.0,24.5,-124.75
1,502508000000000.0,24.5,-124.650002
2,389230500000000.0,24.5,-124.550003
3,99782190000000.0,24.5,-124.449997
4,150218400000000.0,24.5,-124.349998
